001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.IOException;
021import java.nio.ByteBuffer;
022import java.util.NavigableMap;
023import java.util.NavigableSet;
024import java.util.concurrent.ConcurrentSkipListMap;
025import java.util.concurrent.ConcurrentSkipListSet;
026import org.apache.hadoop.conf.Configuration;
027import org.apache.hadoop.hbase.metrics.impl.FastLongHistogram;
028import org.apache.hadoop.hbase.util.Bytes;
029import org.apache.hadoop.hbase.util.GsonUtil;
030import org.apache.yetus.audience.InterfaceAudience;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034import org.apache.hbase.thirdparty.com.google.gson.Gson;
035import org.apache.hbase.thirdparty.com.google.gson.TypeAdapter;
036import org.apache.hbase.thirdparty.com.google.gson.stream.JsonReader;
037import org.apache.hbase.thirdparty.com.google.gson.stream.JsonWriter;
038
039/**
040 * Utilty for aggregating counts in CachedBlocks and toString/toJSON CachedBlocks and BlockCaches.
041 * No attempt has been made at making this thread safe.
042 */
043@InterfaceAudience.Private
044public class BlockCacheUtil {
045
046  private static final Logger LOG = LoggerFactory.getLogger(BlockCacheUtil.class);
047
048  public static final long NANOS_PER_SECOND = 1000000000;
049
050  /**
051   * Needed generating JSON.
052   */
053  private static final Gson GSON = GsonUtil.createGson()
054    .registerTypeAdapter(FastLongHistogram.class, new TypeAdapter<FastLongHistogram>() {
055
056      @Override
057      public void write(JsonWriter out, FastLongHistogram value) throws IOException {
058        AgeSnapshot snapshot = new AgeSnapshot(value);
059        out.beginObject();
060        out.name("mean").value(snapshot.getMean());
061        out.name("min").value(snapshot.getMin());
062        out.name("max").value(snapshot.getMax());
063        out.name("75thPercentile").value(snapshot.get75thPercentile());
064        out.name("95thPercentile").value(snapshot.get95thPercentile());
065        out.name("98thPercentile").value(snapshot.get98thPercentile());
066        out.name("99thPercentile").value(snapshot.get99thPercentile());
067        out.name("999thPercentile").value(snapshot.get999thPercentile());
068        out.endObject();
069      }
070
071      @Override
072      public FastLongHistogram read(JsonReader in) throws IOException {
073        throw new UnsupportedOperationException();
074      }
075    }).setPrettyPrinting().create();
076
077  /** Returns The block content as String. */
078  public static String toString(final CachedBlock cb, final long now) {
079    return "filename=" + cb.getFilename() + ", " + toStringMinusFileName(cb, now);
080  }
081
082  /**
083   * Little data structure to hold counts for a file. Used doing a toJSON.
084   */
085  static class CachedBlockCountsPerFile {
086    private int count = 0;
087    private long size = 0;
088    private int countData = 0;
089    private long sizeData = 0;
090    private final String filename;
091
092    CachedBlockCountsPerFile(final String filename) {
093      this.filename = filename;
094    }
095
096    public int getCount() {
097      return count;
098    }
099
100    public long getSize() {
101      return size;
102    }
103
104    public int getCountData() {
105      return countData;
106    }
107
108    public long getSizeData() {
109      return sizeData;
110    }
111
112    public String getFilename() {
113      return filename;
114    }
115  }
116
117  /** Returns A JSON String of <code>filename</code> and counts of <code>blocks</code> */
118  public static String toJSON(String filename, NavigableSet<CachedBlock> blocks)
119    throws IOException {
120    CachedBlockCountsPerFile counts = new CachedBlockCountsPerFile(filename);
121    for (CachedBlock cb : blocks) {
122      counts.count++;
123      counts.size += cb.getSize();
124      BlockType bt = cb.getBlockType();
125      if (bt != null && bt.isData()) {
126        counts.countData++;
127        counts.sizeData += cb.getSize();
128      }
129    }
130    return GSON.toJson(counts);
131  }
132
133  /** Returns JSON string of <code>cbsf</code> aggregated */
134  public static String toJSON(CachedBlocksByFile cbsbf) throws IOException {
135    return GSON.toJson(cbsbf);
136  }
137
138  /** Returns JSON string of <code>bc</code> content. */
139  public static String toJSON(BlockCache bc) throws IOException {
140    return GSON.toJson(bc);
141  }
142
143  /** Returns The block content of <code>bc</code> as a String minus the filename. */
144  public static String toStringMinusFileName(final CachedBlock cb, final long now) {
145    return "offset=" + cb.getOffset() + ", size=" + cb.getSize() + ", age="
146      + (now - cb.getCachedTime()) + ", type=" + cb.getBlockType() + ", priority="
147      + cb.getBlockPriority();
148  }
149
150  /**
151   * Get a {@link CachedBlocksByFile} instance and load it up by iterating content in
152   * {@link BlockCache}.
153   * @param conf Used to read configurations
154   * @param bc   Block Cache to iterate.
155   * @return Laoded up instance of CachedBlocksByFile
156   */
157  public static CachedBlocksByFile getLoadedCachedBlocksByFile(final Configuration conf,
158    final BlockCache bc) {
159    CachedBlocksByFile cbsbf = new CachedBlocksByFile(conf);
160    for (CachedBlock cb : bc) {
161      if (cbsbf.update(cb)) break;
162    }
163    return cbsbf;
164  }
165
166  private static int compareCacheBlock(Cacheable left, Cacheable right,
167    boolean includeNextBlockMetadata) {
168    ByteBuffer l = ByteBuffer.allocate(left.getSerializedLength());
169    left.serialize(l, includeNextBlockMetadata);
170    ByteBuffer r = ByteBuffer.allocate(right.getSerializedLength());
171    right.serialize(r, includeNextBlockMetadata);
172    return Bytes.compareTo(l.array(), l.arrayOffset(), l.limit(), r.array(), r.arrayOffset(),
173      r.limit());
174  }
175
176  /**
177   * Validate that the existing and newBlock are the same without including the nextBlockMetadata,
178   * if not, throw an exception. If they are the same without the nextBlockMetadata, return the
179   * comparison.
180   * @param existing block that is existing in the cache.
181   * @param newBlock block that is trying to be cached.
182   * @param cacheKey the cache key of the blocks.
183   * @return comparison of the existing block to the newBlock.
184   */
185  public static int validateBlockAddition(Cacheable existing, Cacheable newBlock,
186    BlockCacheKey cacheKey) {
187    int comparison = compareCacheBlock(existing, newBlock, false);
188    if (comparison != 0) {
189      throw new RuntimeException(
190        "Cached block contents differ, which should not have happened." + "cacheKey:" + cacheKey);
191    }
192    if ((existing instanceof HFileBlock) && (newBlock instanceof HFileBlock)) {
193      comparison = ((HFileBlock) existing).getNextBlockOnDiskSize()
194        - ((HFileBlock) newBlock).getNextBlockOnDiskSize();
195    }
196    return comparison;
197  }
198
199  /**
200   * Because of the region splitting, it's possible that the split key locate in the middle of a
201   * block. So it's possible that both the daughter regions load the same block from their parent
202   * HFile. When pread, we don't force the read to read all of the next block header. So when two
203   * threads try to cache the same block, it's possible that one thread read all of the next block
204   * header but the other one didn't. if the already cached block hasn't next block header but the
205   * new block to cache has, then we can replace the existing block with the new block for better
206   * performance.(HBASE-20447)
207   * @param blockCache BlockCache to check
208   * @param cacheKey   the block cache key
209   * @param newBlock   the new block which try to put into the block cache.
210   * @return true means need to replace existing block with new block for the same block cache key.
211   *         false means just keep the existing block.
212   */
213  public static boolean shouldReplaceExistingCacheBlock(BlockCache blockCache,
214    BlockCacheKey cacheKey, Cacheable newBlock) {
215    // NOTICE: The getBlock has retained the existingBlock inside.
216    Cacheable existingBlock = blockCache.getBlock(cacheKey, false, false, false);
217    if (existingBlock == null) {
218      return true;
219    }
220    try {
221      int comparison = BlockCacheUtil.validateBlockAddition(existingBlock, newBlock, cacheKey);
222      if (comparison < 0) {
223        LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the new block has "
224          + "nextBlockOnDiskSize set. Caching new block.");
225        return true;
226      } else if (comparison > 0) {
227        LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the existing block has "
228          + "nextBlockOnDiskSize set, Keeping cached block.");
229        return false;
230      } else {
231        LOG.debug("Caching an already cached block: {}. This is harmless and can happen in rare "
232          + "cases (see HBASE-8547)", cacheKey);
233        return false;
234      }
235    } finally {
236      // Release this block to decrement the reference count.
237      existingBlock.release();
238    }
239  }
240
241  private static final int DEFAULT_MAX = 1000000;
242
243  public static int getMaxCachedBlocksByFile(Configuration conf) {
244    return conf == null ? DEFAULT_MAX : conf.getInt("hbase.ui.blockcache.by.file.max", DEFAULT_MAX);
245  }
246
247  /**
248   * Use one of these to keep a running account of cached blocks by file. Throw it away when done.
249   * This is different than metrics in that it is stats on current state of a cache. See
250   * getLoadedCachedBlocksByFile
251   */
252  public static class CachedBlocksByFile {
253    private int count;
254    private int dataBlockCount;
255    private long size;
256    private long dataSize;
257    private final long now = System.nanoTime();
258    /**
259     * How many blocks to look at before we give up. There could be many millions of blocks. We
260     * don't want the ui to freeze while we run through 1B blocks... users will think hbase dead. UI
261     * displays warning in red when stats are incomplete.
262     */
263    private final int max;
264
265    CachedBlocksByFile() {
266      this(null);
267    }
268
269    CachedBlocksByFile(final Configuration c) {
270      this.max = getMaxCachedBlocksByFile(c);
271    }
272
273    /**
274     * Map by filename. use concurent utils because we want our Map and contained blocks sorted.
275     */
276    private transient NavigableMap<String, NavigableSet<CachedBlock>> cachedBlockByFile =
277      new ConcurrentSkipListMap<>();
278    FastLongHistogram hist = new FastLongHistogram();
279
280    /** Returns True if full.... if we won't be adding any more. */
281    public boolean update(final CachedBlock cb) {
282      if (isFull()) return true;
283      NavigableSet<CachedBlock> set = this.cachedBlockByFile.get(cb.getFilename());
284      if (set == null) {
285        set = new ConcurrentSkipListSet<>();
286        this.cachedBlockByFile.put(cb.getFilename(), set);
287      }
288      set.add(cb);
289      this.size += cb.getSize();
290      this.count++;
291      BlockType bt = cb.getBlockType();
292      if (bt != null && bt.isData()) {
293        this.dataBlockCount++;
294        this.dataSize += cb.getSize();
295      }
296      long age = (this.now - cb.getCachedTime()) / NANOS_PER_SECOND;
297      this.hist.add(age, 1);
298      return false;
299    }
300
301    /**
302     * @return True if full; i.e. there are more items in the cache but we only loaded up the
303     *         maximum set in configuration <code>hbase.ui.blockcache.by.file.max</code> (Default:
304     *         DEFAULT_MAX).
305     */
306    public boolean isFull() {
307      return this.count >= this.max;
308    }
309
310    public NavigableMap<String, NavigableSet<CachedBlock>> getCachedBlockStatsByFile() {
311      return this.cachedBlockByFile;
312    }
313
314    /** Returns count of blocks in the cache */
315    public int getCount() {
316      return count;
317    }
318
319    public int getDataCount() {
320      return dataBlockCount;
321    }
322
323    /** Returns size of blocks in the cache */
324    public long getSize() {
325      return size;
326    }
327
328    /** Returns Size of data. */
329    public long getDataSize() {
330      return dataSize;
331    }
332
333    public AgeSnapshot getAgeInCacheSnapshot() {
334      return new AgeSnapshot(this.hist);
335    }
336
337    @Override
338    public String toString() {
339      AgeSnapshot snapshot = getAgeInCacheSnapshot();
340      return "count=" + count + ", dataBlockCount=" + dataBlockCount + ", size=" + size
341        + ", dataSize=" + getDataSize() + ", mean age=" + snapshot.getMean() + ", min age="
342        + snapshot.getMin() + ", max age=" + snapshot.getMax() + ", 75th percentile age="
343        + snapshot.get75thPercentile() + ", 95th percentile age=" + snapshot.get95thPercentile()
344        + ", 98th percentile age=" + snapshot.get98thPercentile() + ", 99th percentile age="
345        + snapshot.get99thPercentile() + ", 99.9th percentile age=" + snapshot.get99thPercentile();
346    }
347  }
348}