001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.hfile; 019 020import static org.apache.hadoop.hbase.io.hfile.HFileBlock.FILL_HEADER; 021 022import java.io.IOException; 023import java.lang.reflect.Modifier; 024import java.nio.ByteBuffer; 025import java.util.HashSet; 026import java.util.Map; 027import java.util.NavigableMap; 028import java.util.NavigableSet; 029import java.util.Set; 030import java.util.concurrent.ConcurrentSkipListMap; 031import java.util.concurrent.ConcurrentSkipListSet; 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.hbase.metrics.impl.FastLongHistogram; 034import org.apache.hadoop.hbase.nio.ByteBuff; 035import org.apache.hadoop.hbase.regionserver.HRegion; 036import org.apache.hadoop.hbase.util.Bytes; 037import org.apache.hadoop.hbase.util.ChecksumType; 038import org.apache.hadoop.hbase.util.GsonUtil; 039import org.apache.yetus.audience.InterfaceAudience; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042 043import org.apache.hbase.thirdparty.com.google.gson.Gson; 044import org.apache.hbase.thirdparty.com.google.gson.TypeAdapter; 045import org.apache.hbase.thirdparty.com.google.gson.stream.JsonReader; 046import org.apache.hbase.thirdparty.com.google.gson.stream.JsonWriter; 047 048/** 049 * Utilty for aggregating counts in CachedBlocks and toString/toJSON CachedBlocks and BlockCaches. 050 * No attempt has been made at making this thread safe. 051 */ 052@InterfaceAudience.Private 053public class BlockCacheUtil { 054 055 private static final Logger LOG = LoggerFactory.getLogger(BlockCacheUtil.class); 056 057 public static final long NANOS_PER_SECOND = 1000000000; 058 059 /** 060 * Needed generating JSON. 061 */ 062 private static final Gson GSON = 063 GsonUtil.createGson().excludeFieldsWithModifiers(Modifier.PRIVATE) 064 .registerTypeAdapter(FastLongHistogram.class, new TypeAdapter<FastLongHistogram>() { 065 066 @Override 067 public void write(JsonWriter out, FastLongHistogram value) throws IOException { 068 AgeSnapshot snapshot = new AgeSnapshot(value); 069 out.beginObject(); 070 out.name("mean").value(snapshot.getMean()); 071 out.name("min").value(snapshot.getMin()); 072 out.name("max").value(snapshot.getMax()); 073 out.name("75thPercentile").value(snapshot.get75thPercentile()); 074 out.name("95thPercentile").value(snapshot.get95thPercentile()); 075 out.name("98thPercentile").value(snapshot.get98thPercentile()); 076 out.name("99thPercentile").value(snapshot.get99thPercentile()); 077 out.name("999thPercentile").value(snapshot.get999thPercentile()); 078 out.endObject(); 079 } 080 081 @Override 082 public FastLongHistogram read(JsonReader in) throws IOException { 083 throw new UnsupportedOperationException(); 084 } 085 }).setPrettyPrinting().create(); 086 087 /** Returns The block content as String. */ 088 public static String toString(final CachedBlock cb, final long now) { 089 return "filename=" + cb.getFilename() + ", " + toStringMinusFileName(cb, now); 090 } 091 092 /** 093 * Little data structure to hold counts for a file. Used doing a toJSON. 094 */ 095 static class CachedBlockCountsPerFile { 096 private int count = 0; 097 private long size = 0; 098 private int countData = 0; 099 private long sizeData = 0; 100 private final String filename; 101 102 CachedBlockCountsPerFile(final String filename) { 103 this.filename = filename; 104 } 105 106 public int getCount() { 107 return count; 108 } 109 110 public long getSize() { 111 return size; 112 } 113 114 public int getCountData() { 115 return countData; 116 } 117 118 public long getSizeData() { 119 return sizeData; 120 } 121 122 public String getFilename() { 123 return filename; 124 } 125 } 126 127 /** Returns A JSON String of <code>filename</code> and counts of <code>blocks</code> */ 128 public static String toJSON(String filename, NavigableSet<CachedBlock> blocks) 129 throws IOException { 130 CachedBlockCountsPerFile counts = new CachedBlockCountsPerFile(filename); 131 for (CachedBlock cb : blocks) { 132 counts.count++; 133 counts.size += cb.getSize(); 134 BlockType bt = cb.getBlockType(); 135 if (bt != null && bt.isData()) { 136 counts.countData++; 137 counts.sizeData += cb.getSize(); 138 } 139 } 140 return GSON.toJson(counts); 141 } 142 143 /** Returns JSON string of <code>cbsf</code> aggregated */ 144 public static String toJSON(CachedBlocksByFile cbsbf) throws IOException { 145 return GSON.toJson(cbsbf); 146 } 147 148 /** Returns JSON string of <code>bc</code> content. */ 149 public static String toJSON(BlockCache bc) throws IOException { 150 return GSON.toJson(bc); 151 } 152 153 /** Returns The block content of <code>bc</code> as a String minus the filename. */ 154 public static String toStringMinusFileName(final CachedBlock cb, final long now) { 155 return "offset=" + cb.getOffset() + ", size=" + cb.getSize() + ", age=" 156 + (now - cb.getCachedTime()) + ", type=" + cb.getBlockType() + ", priority=" 157 + cb.getBlockPriority(); 158 } 159 160 /** 161 * Get a {@link CachedBlocksByFile} instance and load it up by iterating content in 162 * {@link BlockCache}. 163 * @param conf Used to read configurations 164 * @param bc Block Cache to iterate. 165 * @return Laoded up instance of CachedBlocksByFile 166 */ 167 public static CachedBlocksByFile getLoadedCachedBlocksByFile(final Configuration conf, 168 final BlockCache bc) { 169 CachedBlocksByFile cbsbf = new CachedBlocksByFile(conf); 170 for (CachedBlock cb : bc) { 171 if (cbsbf.update(cb)) break; 172 } 173 return cbsbf; 174 } 175 176 private static int compareCacheBlock(Cacheable left, Cacheable right, 177 boolean includeNextBlockMetadata) { 178 ByteBuffer l = ByteBuffer.allocate(left.getSerializedLength()); 179 left.serialize(l, includeNextBlockMetadata); 180 ByteBuffer r = ByteBuffer.allocate(right.getSerializedLength()); 181 right.serialize(r, includeNextBlockMetadata); 182 return Bytes.compareTo(l.array(), l.arrayOffset(), l.limit(), r.array(), r.arrayOffset(), 183 r.limit()); 184 } 185 186 /** 187 * Validate that the existing and newBlock are the same without including the nextBlockMetadata, 188 * if not, throw an exception. If they are the same without the nextBlockMetadata, return the 189 * comparison. 190 * @param existing block that is existing in the cache. 191 * @param newBlock block that is trying to be cached. 192 * @param cacheKey the cache key of the blocks. 193 * @return comparison of the existing block to the newBlock. 194 */ 195 public static int validateBlockAddition(Cacheable existing, Cacheable newBlock, 196 BlockCacheKey cacheKey) { 197 int comparison = compareCacheBlock(existing, newBlock, false); 198 if (comparison != 0) { 199 throw new RuntimeException( 200 "Cached block contents differ, which should not have happened." + "cacheKey:" + cacheKey); 201 } 202 if ((existing instanceof HFileBlock) && (newBlock instanceof HFileBlock)) { 203 comparison = ((HFileBlock) existing).getNextBlockOnDiskSize() 204 - ((HFileBlock) newBlock).getNextBlockOnDiskSize(); 205 } 206 return comparison; 207 } 208 209 /** 210 * Because of the region splitting, it's possible that the split key locate in the middle of a 211 * block. So it's possible that both the daughter regions load the same block from their parent 212 * HFile. When pread, we don't force the read to read all of the next block header. So when two 213 * threads try to cache the same block, it's possible that one thread read all of the next block 214 * header but the other one didn't. if the already cached block hasn't next block header but the 215 * new block to cache has, then we can replace the existing block with the new block for better 216 * performance.(HBASE-20447) 217 * @param blockCache BlockCache to check 218 * @param cacheKey the block cache key 219 * @param newBlock the new block which try to put into the block cache. 220 * @return true means need to replace existing block with new block for the same block cache key. 221 * false means just keep the existing block. 222 */ 223 public static boolean shouldReplaceExistingCacheBlock(BlockCache blockCache, 224 BlockCacheKey cacheKey, Cacheable newBlock) { 225 // NOTICE: The getBlock has retained the existingBlock inside. 226 Cacheable existingBlock = blockCache.getBlock(cacheKey, false, false, false); 227 if (existingBlock == null) { 228 return true; 229 } 230 try { 231 int comparison = BlockCacheUtil.validateBlockAddition(existingBlock, newBlock, cacheKey); 232 if (comparison < 0) { 233 LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the new block has " 234 + "nextBlockOnDiskSize set. Caching new block."); 235 return true; 236 } else if (comparison > 0) { 237 LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the existing block has " 238 + "nextBlockOnDiskSize set, Keeping cached block."); 239 return false; 240 } else { 241 LOG.debug("Caching an already cached block: {}. This is harmless and can happen in rare " 242 + "cases (see HBASE-8547)", cacheKey); 243 return false; 244 } 245 } finally { 246 // Release this block to decrement the reference count. 247 existingBlock.release(); 248 } 249 } 250 251 public static Set<String> listAllFilesNames(Map<String, HRegion> onlineRegions) { 252 Set<String> files = new HashSet<>(); 253 onlineRegions.values().forEach(r -> { 254 r.getStores().forEach(s -> { 255 s.getStorefiles().forEach(f -> files.add(f.getPath().getName())); 256 }); 257 }); 258 return files; 259 } 260 261 private static final int DEFAULT_MAX = 1000000; 262 263 public static int getMaxCachedBlocksByFile(Configuration conf) { 264 return conf == null ? DEFAULT_MAX : conf.getInt("hbase.ui.blockcache.by.file.max", DEFAULT_MAX); 265 } 266 267 /** 268 * Similarly to HFileBlock.Writer.getBlockForCaching(), creates a HFileBlock instance without 269 * checksum for caching. This is needed for when we cache blocks via readers (either prefetch or 270 * client read), otherwise we may fail equality comparison when checking against same block that 271 * may already have been cached at write time. 272 * @param cacheConf the related CacheConfig object. 273 * @param block the HFileBlock instance to be converted. 274 * @return the resulting HFileBlock instance without checksum. 275 */ 276 public static HFileBlock getBlockForCaching(CacheConfig cacheConf, HFileBlock block) { 277 // Calculate how many bytes we need for checksum on the tail of the block. 278 int numBytes = cacheConf.shouldCacheCompressed(block.getBlockType().getCategory()) 279 ? 0 280 : (int) ChecksumUtil.numBytes(block.getOnDiskDataSizeWithHeader(), 281 block.getHFileContext().getBytesPerChecksum()); 282 ByteBuff buff = block.getBufferReadOnly(); 283 HFileBlockBuilder builder = new HFileBlockBuilder(); 284 return builder.withBlockType(block.getBlockType()) 285 .withOnDiskSizeWithoutHeader(block.getOnDiskSizeWithoutHeader()) 286 .withUncompressedSizeWithoutHeader(block.getUncompressedSizeWithoutHeader()) 287 .withPrevBlockOffset(block.getPrevBlockOffset()).withByteBuff(buff) 288 .withFillHeader(FILL_HEADER).withOffset(block.getOffset()).withNextBlockOnDiskSize(-1) 289 .withOnDiskDataSizeWithHeader(block.getOnDiskDataSizeWithHeader() + numBytes) 290 .withNextBlockOnDiskSize(block.getNextBlockOnDiskSize()) 291 .withHFileContext(cloneContext(block.getHFileContext())) 292 .withByteBuffAllocator(cacheConf.getByteBuffAllocator()).withShared(!buff.hasArray()).build(); 293 } 294 295 public static HFileContext cloneContext(HFileContext context) { 296 HFileContext newContext = new HFileContextBuilder().withBlockSize(context.getBlocksize()) 297 .withBytesPerCheckSum(0).withChecksumType(ChecksumType.NULL) // no checksums in cached data 298 .withCompression(context.getCompression()) 299 .withDataBlockEncoding(context.getDataBlockEncoding()) 300 .withHBaseCheckSum(context.isUseHBaseChecksum()).withCompressTags(context.isCompressTags()) 301 .withIncludesMvcc(context.isIncludesMvcc()).withIncludesTags(context.isIncludesTags()) 302 .withColumnFamily(context.getColumnFamily()).withTableName(context.getTableName()).build(); 303 return newContext; 304 } 305 306 /** 307 * Use one of these to keep a running account of cached blocks by file. Throw it away when done. 308 * This is different than metrics in that it is stats on current state of a cache. See 309 * getLoadedCachedBlocksByFile 310 */ 311 public static class CachedBlocksByFile { 312 private int count; 313 private int dataBlockCount; 314 private long size; 315 private long dataSize; 316 private final long now = System.nanoTime(); 317 /** 318 * How many blocks to look at before we give up. There could be many millions of blocks. We 319 * don't want the ui to freeze while we run through 1B blocks... users will think hbase dead. UI 320 * displays warning in red when stats are incomplete. 321 */ 322 private final int max; 323 324 CachedBlocksByFile() { 325 this(null); 326 } 327 328 CachedBlocksByFile(final Configuration c) { 329 this.max = getMaxCachedBlocksByFile(c); 330 } 331 332 /** 333 * Map by filename. use concurent utils because we want our Map and contained blocks sorted. 334 */ 335 private transient NavigableMap<String, NavigableSet<CachedBlock>> cachedBlockByFile = 336 new ConcurrentSkipListMap<>(); 337 FastLongHistogram hist = new FastLongHistogram(); 338 339 /** Returns True if full.... if we won't be adding any more. */ 340 public boolean update(final CachedBlock cb) { 341 if (isFull()) return true; 342 NavigableSet<CachedBlock> set = this.cachedBlockByFile.get(cb.getFilename()); 343 if (set == null) { 344 set = new ConcurrentSkipListSet<>(); 345 this.cachedBlockByFile.put(cb.getFilename(), set); 346 } 347 set.add(cb); 348 this.size += cb.getSize(); 349 this.count++; 350 BlockType bt = cb.getBlockType(); 351 if (bt != null && bt.isData()) { 352 this.dataBlockCount++; 353 this.dataSize += cb.getSize(); 354 } 355 long age = (this.now - cb.getCachedTime()) / NANOS_PER_SECOND; 356 this.hist.add(age, 1); 357 return false; 358 } 359 360 /** 361 * @return True if full; i.e. there are more items in the cache but we only loaded up the 362 * maximum set in configuration <code>hbase.ui.blockcache.by.file.max</code> (Default: 363 * DEFAULT_MAX). 364 */ 365 public boolean isFull() { 366 return this.count >= this.max; 367 } 368 369 public NavigableMap<String, NavigableSet<CachedBlock>> getCachedBlockStatsByFile() { 370 return this.cachedBlockByFile; 371 } 372 373 /** Returns count of blocks in the cache */ 374 public int getCount() { 375 return count; 376 } 377 378 public int getDataCount() { 379 return dataBlockCount; 380 } 381 382 /** Returns size of blocks in the cache */ 383 public long getSize() { 384 return size; 385 } 386 387 /** Returns Size of data. */ 388 public long getDataSize() { 389 return dataSize; 390 } 391 392 public AgeSnapshot getAgeInCacheSnapshot() { 393 return new AgeSnapshot(this.hist); 394 } 395 396 @Override 397 public String toString() { 398 AgeSnapshot snapshot = getAgeInCacheSnapshot(); 399 return "count=" + count + ", dataBlockCount=" + dataBlockCount + ", size=" + size 400 + ", dataSize=" + getDataSize() + ", mean age=" + snapshot.getMean() + ", min age=" 401 + snapshot.getMin() + ", max age=" + snapshot.getMax() + ", 75th percentile age=" 402 + snapshot.get75thPercentile() + ", 95th percentile age=" + snapshot.get95thPercentile() 403 + ", 98th percentile age=" + snapshot.get98thPercentile() + ", 99th percentile age=" 404 + snapshot.get99thPercentile() + ", 99.9th percentile age=" + snapshot.get99thPercentile(); 405 } 406 } 407}