001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import static org.apache.hadoop.hbase.io.hfile.HFileBlock.FILL_HEADER;
021
022import java.io.IOException;
023import java.lang.reflect.Modifier;
024import java.nio.ByteBuffer;
025import java.util.HashSet;
026import java.util.Map;
027import java.util.NavigableMap;
028import java.util.NavigableSet;
029import java.util.Set;
030import java.util.concurrent.ConcurrentSkipListMap;
031import java.util.concurrent.ConcurrentSkipListSet;
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.hbase.metrics.impl.FastLongHistogram;
034import org.apache.hadoop.hbase.nio.ByteBuff;
035import org.apache.hadoop.hbase.regionserver.HRegion;
036import org.apache.hadoop.hbase.util.Bytes;
037import org.apache.hadoop.hbase.util.ChecksumType;
038import org.apache.hadoop.hbase.util.GsonUtil;
039import org.apache.yetus.audience.InterfaceAudience;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043import org.apache.hbase.thirdparty.com.google.gson.Gson;
044import org.apache.hbase.thirdparty.com.google.gson.TypeAdapter;
045import org.apache.hbase.thirdparty.com.google.gson.stream.JsonReader;
046import org.apache.hbase.thirdparty.com.google.gson.stream.JsonWriter;
047
048/**
049 * Utilty for aggregating counts in CachedBlocks and toString/toJSON CachedBlocks and BlockCaches.
050 * No attempt has been made at making this thread safe.
051 */
052@InterfaceAudience.Private
053public class BlockCacheUtil {
054
055  private static final Logger LOG = LoggerFactory.getLogger(BlockCacheUtil.class);
056
057  public static final long NANOS_PER_SECOND = 1000000000;
058
059  /**
060   * Needed generating JSON.
061   */
062  private static final Gson GSON =
063    GsonUtil.createGson().excludeFieldsWithModifiers(Modifier.PRIVATE)
064      .registerTypeAdapter(FastLongHistogram.class, new TypeAdapter<FastLongHistogram>() {
065
066        @Override
067        public void write(JsonWriter out, FastLongHistogram value) throws IOException {
068          AgeSnapshot snapshot = new AgeSnapshot(value);
069          out.beginObject();
070          out.name("mean").value(snapshot.getMean());
071          out.name("min").value(snapshot.getMin());
072          out.name("max").value(snapshot.getMax());
073          out.name("75thPercentile").value(snapshot.get75thPercentile());
074          out.name("95thPercentile").value(snapshot.get95thPercentile());
075          out.name("98thPercentile").value(snapshot.get98thPercentile());
076          out.name("99thPercentile").value(snapshot.get99thPercentile());
077          out.name("999thPercentile").value(snapshot.get999thPercentile());
078          out.endObject();
079        }
080
081        @Override
082        public FastLongHistogram read(JsonReader in) throws IOException {
083          throw new UnsupportedOperationException();
084        }
085      }).setPrettyPrinting().create();
086
087  /** Returns The block content as String. */
088  public static String toString(final CachedBlock cb, final long now) {
089    return "filename=" + cb.getFilename() + ", " + toStringMinusFileName(cb, now);
090  }
091
092  /**
093   * Little data structure to hold counts for a file. Used doing a toJSON.
094   */
095  static class CachedBlockCountsPerFile {
096    private int count = 0;
097    private long size = 0;
098    private int countData = 0;
099    private long sizeData = 0;
100    private final String filename;
101
102    CachedBlockCountsPerFile(final String filename) {
103      this.filename = filename;
104    }
105
106    public int getCount() {
107      return count;
108    }
109
110    public long getSize() {
111      return size;
112    }
113
114    public int getCountData() {
115      return countData;
116    }
117
118    public long getSizeData() {
119      return sizeData;
120    }
121
122    public String getFilename() {
123      return filename;
124    }
125  }
126
127  /** Returns A JSON String of <code>filename</code> and counts of <code>blocks</code> */
128  public static String toJSON(String filename, NavigableSet<CachedBlock> blocks)
129    throws IOException {
130    CachedBlockCountsPerFile counts = new CachedBlockCountsPerFile(filename);
131    for (CachedBlock cb : blocks) {
132      counts.count++;
133      counts.size += cb.getSize();
134      BlockType bt = cb.getBlockType();
135      if (bt != null && bt.isData()) {
136        counts.countData++;
137        counts.sizeData += cb.getSize();
138      }
139    }
140    return GSON.toJson(counts);
141  }
142
143  /** Returns JSON string of <code>cbsf</code> aggregated */
144  public static String toJSON(CachedBlocksByFile cbsbf) throws IOException {
145    return GSON.toJson(cbsbf);
146  }
147
148  /** Returns JSON string of <code>bc</code> content. */
149  public static String toJSON(BlockCache bc) throws IOException {
150    return GSON.toJson(bc);
151  }
152
153  /** Returns The block content of <code>bc</code> as a String minus the filename. */
154  public static String toStringMinusFileName(final CachedBlock cb, final long now) {
155    return "offset=" + cb.getOffset() + ", size=" + cb.getSize() + ", age="
156      + (now - cb.getCachedTime()) + ", type=" + cb.getBlockType() + ", priority="
157      + cb.getBlockPriority();
158  }
159
160  /**
161   * Get a {@link CachedBlocksByFile} instance and load it up by iterating content in
162   * {@link BlockCache}.
163   * @param conf Used to read configurations
164   * @param bc   Block Cache to iterate.
165   * @return Laoded up instance of CachedBlocksByFile
166   */
167  public static CachedBlocksByFile getLoadedCachedBlocksByFile(final Configuration conf,
168    final BlockCache bc) {
169    CachedBlocksByFile cbsbf = new CachedBlocksByFile(conf);
170    for (CachedBlock cb : bc) {
171      if (cbsbf.update(cb)) break;
172    }
173    return cbsbf;
174  }
175
176  private static int compareCacheBlock(Cacheable left, Cacheable right,
177    boolean includeNextBlockMetadata) {
178    ByteBuffer l = ByteBuffer.allocate(left.getSerializedLength());
179    left.serialize(l, includeNextBlockMetadata);
180    ByteBuffer r = ByteBuffer.allocate(right.getSerializedLength());
181    right.serialize(r, includeNextBlockMetadata);
182    return Bytes.compareTo(l.array(), l.arrayOffset(), l.limit(), r.array(), r.arrayOffset(),
183      r.limit());
184  }
185
186  /**
187   * Validate that the existing and newBlock are the same without including the nextBlockMetadata,
188   * if not, throw an exception. If they are the same without the nextBlockMetadata, return the
189   * comparison.
190   * @param existing block that is existing in the cache.
191   * @param newBlock block that is trying to be cached.
192   * @param cacheKey the cache key of the blocks.
193   * @return comparison of the existing block to the newBlock.
194   */
195  public static int validateBlockAddition(Cacheable existing, Cacheable newBlock,
196    BlockCacheKey cacheKey) {
197    int comparison = compareCacheBlock(existing, newBlock, false);
198    if (comparison != 0) {
199      throw new RuntimeException(
200        "Cached block contents differ, which should not have happened." + "cacheKey:" + cacheKey);
201    }
202    if ((existing instanceof HFileBlock) && (newBlock instanceof HFileBlock)) {
203      comparison = ((HFileBlock) existing).getNextBlockOnDiskSize()
204        - ((HFileBlock) newBlock).getNextBlockOnDiskSize();
205    }
206    return comparison;
207  }
208
209  /**
210   * Because of the region splitting, it's possible that the split key locate in the middle of a
211   * block. So it's possible that both the daughter regions load the same block from their parent
212   * HFile. When pread, we don't force the read to read all of the next block header. So when two
213   * threads try to cache the same block, it's possible that one thread read all of the next block
214   * header but the other one didn't. if the already cached block hasn't next block header but the
215   * new block to cache has, then we can replace the existing block with the new block for better
216   * performance.(HBASE-20447)
217   * @param blockCache BlockCache to check
218   * @param cacheKey   the block cache key
219   * @param newBlock   the new block which try to put into the block cache.
220   * @return true means need to replace existing block with new block for the same block cache key.
221   *         false means just keep the existing block.
222   */
223  public static boolean shouldReplaceExistingCacheBlock(BlockCache blockCache,
224    BlockCacheKey cacheKey, Cacheable newBlock) {
225    // NOTICE: The getBlock has retained the existingBlock inside.
226    Cacheable existingBlock = blockCache.getBlock(cacheKey, false, false, false);
227    if (existingBlock == null) {
228      return true;
229    }
230    try {
231      int comparison = BlockCacheUtil.validateBlockAddition(existingBlock, newBlock, cacheKey);
232      if (comparison < 0) {
233        LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the new block has "
234          + "nextBlockOnDiskSize set. Caching new block.");
235        return true;
236      } else if (comparison > 0) {
237        LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the existing block has "
238          + "nextBlockOnDiskSize set, Keeping cached block.");
239        return false;
240      } else {
241        LOG.debug("Caching an already cached block: {}. This is harmless and can happen in rare "
242          + "cases (see HBASE-8547)", cacheKey);
243        return false;
244      }
245    } finally {
246      // Release this block to decrement the reference count.
247      existingBlock.release();
248    }
249  }
250
251  public static Set<String> listAllFilesNames(Map<String, HRegion> onlineRegions) {
252    Set<String> files = new HashSet<>();
253    onlineRegions.values().forEach(r -> {
254      r.getStores().forEach(s -> {
255        s.getStorefiles().forEach(f -> files.add(f.getPath().getName()));
256      });
257    });
258    return files;
259  }
260
261  private static final int DEFAULT_MAX = 1000000;
262
263  public static int getMaxCachedBlocksByFile(Configuration conf) {
264    return conf == null ? DEFAULT_MAX : conf.getInt("hbase.ui.blockcache.by.file.max", DEFAULT_MAX);
265  }
266
267  /**
268   * Similarly to HFileBlock.Writer.getBlockForCaching(), creates a HFileBlock instance without
269   * checksum for caching. This is needed for when we cache blocks via readers (either prefetch or
270   * client read), otherwise we may fail equality comparison when checking against same block that
271   * may already have been cached at write time.
272   * @param cacheConf the related CacheConfig object.
273   * @param block     the HFileBlock instance to be converted.
274   * @return the resulting HFileBlock instance without checksum.
275   */
276  public static HFileBlock getBlockForCaching(CacheConfig cacheConf, HFileBlock block) {
277    // Calculate how many bytes we need for checksum on the tail of the block.
278    int numBytes = cacheConf.shouldCacheCompressed(block.getBlockType().getCategory())
279      ? 0
280      : (int) ChecksumUtil.numBytes(block.getOnDiskDataSizeWithHeader(),
281        block.getHFileContext().getBytesPerChecksum());
282    ByteBuff buff = block.getBufferReadOnly();
283    HFileBlockBuilder builder = new HFileBlockBuilder();
284    return builder.withBlockType(block.getBlockType())
285      .withOnDiskSizeWithoutHeader(block.getOnDiskSizeWithoutHeader())
286      .withUncompressedSizeWithoutHeader(block.getUncompressedSizeWithoutHeader())
287      .withPrevBlockOffset(block.getPrevBlockOffset()).withByteBuff(buff)
288      .withFillHeader(FILL_HEADER).withOffset(block.getOffset()).withNextBlockOnDiskSize(-1)
289      .withOnDiskDataSizeWithHeader(block.getOnDiskDataSizeWithHeader() + numBytes)
290      .withNextBlockOnDiskSize(block.getNextBlockOnDiskSize())
291      .withHFileContext(cloneContext(block.getHFileContext()))
292      .withByteBuffAllocator(cacheConf.getByteBuffAllocator()).withShared(!buff.hasArray()).build();
293  }
294
295  public static HFileContext cloneContext(HFileContext context) {
296    HFileContext newContext = new HFileContextBuilder().withBlockSize(context.getBlocksize())
297      .withBytesPerCheckSum(0).withChecksumType(ChecksumType.NULL) // no checksums in cached data
298      .withCompression(context.getCompression())
299      .withDataBlockEncoding(context.getDataBlockEncoding())
300      .withHBaseCheckSum(context.isUseHBaseChecksum()).withCompressTags(context.isCompressTags())
301      .withIncludesMvcc(context.isIncludesMvcc()).withIncludesTags(context.isIncludesTags())
302      .withColumnFamily(context.getColumnFamily()).withTableName(context.getTableName()).build();
303    return newContext;
304  }
305
306  /**
307   * Use one of these to keep a running account of cached blocks by file. Throw it away when done.
308   * This is different than metrics in that it is stats on current state of a cache. See
309   * getLoadedCachedBlocksByFile
310   */
311  public static class CachedBlocksByFile {
312    private int count;
313    private int dataBlockCount;
314    private long size;
315    private long dataSize;
316    private final long now = System.nanoTime();
317    /**
318     * How many blocks to look at before we give up. There could be many millions of blocks. We
319     * don't want the ui to freeze while we run through 1B blocks... users will think hbase dead. UI
320     * displays warning in red when stats are incomplete.
321     */
322    private final int max;
323
324    CachedBlocksByFile() {
325      this(null);
326    }
327
328    CachedBlocksByFile(final Configuration c) {
329      this.max = getMaxCachedBlocksByFile(c);
330    }
331
332    /**
333     * Map by filename. use concurent utils because we want our Map and contained blocks sorted.
334     */
335    private transient NavigableMap<String, NavigableSet<CachedBlock>> cachedBlockByFile =
336      new ConcurrentSkipListMap<>();
337    FastLongHistogram hist = new FastLongHistogram();
338
339    /** Returns True if full.... if we won't be adding any more. */
340    public boolean update(final CachedBlock cb) {
341      if (isFull()) return true;
342      NavigableSet<CachedBlock> set = this.cachedBlockByFile.get(cb.getFilename());
343      if (set == null) {
344        set = new ConcurrentSkipListSet<>();
345        this.cachedBlockByFile.put(cb.getFilename(), set);
346      }
347      set.add(cb);
348      this.size += cb.getSize();
349      this.count++;
350      BlockType bt = cb.getBlockType();
351      if (bt != null && bt.isData()) {
352        this.dataBlockCount++;
353        this.dataSize += cb.getSize();
354      }
355      long age = (this.now - cb.getCachedTime()) / NANOS_PER_SECOND;
356      this.hist.add(age, 1);
357      return false;
358    }
359
360    /**
361     * @return True if full; i.e. there are more items in the cache but we only loaded up the
362     *         maximum set in configuration <code>hbase.ui.blockcache.by.file.max</code> (Default:
363     *         DEFAULT_MAX).
364     */
365    public boolean isFull() {
366      return this.count >= this.max;
367    }
368
369    public NavigableMap<String, NavigableSet<CachedBlock>> getCachedBlockStatsByFile() {
370      return this.cachedBlockByFile;
371    }
372
373    /** Returns count of blocks in the cache */
374    public int getCount() {
375      return count;
376    }
377
378    public int getDataCount() {
379      return dataBlockCount;
380    }
381
382    /** Returns size of blocks in the cache */
383    public long getSize() {
384      return size;
385    }
386
387    /** Returns Size of data. */
388    public long getDataSize() {
389      return dataSize;
390    }
391
392    public AgeSnapshot getAgeInCacheSnapshot() {
393      return new AgeSnapshot(this.hist);
394    }
395
396    @Override
397    public String toString() {
398      AgeSnapshot snapshot = getAgeInCacheSnapshot();
399      return "count=" + count + ", dataBlockCount=" + dataBlockCount + ", size=" + size
400        + ", dataSize=" + getDataSize() + ", mean age=" + snapshot.getMean() + ", min age="
401        + snapshot.getMin() + ", max age=" + snapshot.getMax() + ", 75th percentile age="
402        + snapshot.get75thPercentile() + ", 95th percentile age=" + snapshot.get95thPercentile()
403        + ", 98th percentile age=" + snapshot.get98thPercentile() + ", 99th percentile age="
404        + snapshot.get99thPercentile() + ", 99.9th percentile age=" + snapshot.get99thPercentile();
405    }
406  }
407}