001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io.hfile;
019
020import java.io.IOException;
021import java.nio.ByteBuffer;
022import java.util.NavigableMap;
023import java.util.NavigableSet;
024import java.util.concurrent.ConcurrentSkipListMap;
025import java.util.concurrent.ConcurrentSkipListSet;
026
027import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
028import com.fasterxml.jackson.core.JsonGenerationException;
029import com.fasterxml.jackson.databind.JsonMappingException;
030import com.fasterxml.jackson.databind.ObjectMapper;
031import com.fasterxml.jackson.databind.SerializationFeature;
032import org.apache.yetus.audience.InterfaceAudience;
033import org.apache.hadoop.conf.Configuration;
034import org.apache.hadoop.hbase.metrics.impl.FastLongHistogram;
035import org.apache.hadoop.hbase.util.Bytes;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039/**
040 * Utilty for aggregating counts in CachedBlocks and toString/toJSON CachedBlocks and BlockCaches.
041 * No attempt has been made at making this thread safe.
042 */
043@InterfaceAudience.Private
044public class BlockCacheUtil {
045
046  private static final Logger LOG = LoggerFactory.getLogger(BlockCacheUtil.class);
047
048  public static final long NANOS_PER_SECOND = 1000000000;
049
050  /**
051   * Needed generating JSON.
052   */
053  private static final ObjectMapper MAPPER = new ObjectMapper();
054  static {
055    MAPPER.configure(SerializationFeature.FAIL_ON_EMPTY_BEANS, false);
056    MAPPER.configure(SerializationFeature.FLUSH_AFTER_WRITE_VALUE, true);
057    MAPPER.configure(SerializationFeature.INDENT_OUTPUT, true);
058  }
059
060  /**
061   * @param cb
062   * @return The block content as String.
063   */
064  public static String toString(final CachedBlock cb, final long now) {
065    return "filename=" + cb.getFilename() + ", " + toStringMinusFileName(cb, now);
066  }
067
068  /**
069   * Little data structure to hold counts for a file.
070   * Used doing a toJSON.
071   */
072  static class CachedBlockCountsPerFile {
073    private int count = 0;
074    private long size = 0;
075    private int countData = 0;
076    private long sizeData = 0;
077    private final String filename;
078
079    CachedBlockCountsPerFile(final String filename) {
080      this.filename = filename;
081    }
082
083    public int getCount() {
084      return count;
085    }
086
087    public long getSize() {
088      return size;
089    }
090
091    public int getCountData() {
092      return countData;
093    }
094
095    public long getSizeData() {
096      return sizeData;
097    }
098
099    public String getFilename() {
100      return filename;
101    }
102  }
103
104  /**
105   * @param filename
106   * @param blocks
107   * @return A JSON String of <code>filename</code> and counts of <code>blocks</code>
108   * @throws JsonGenerationException
109   * @throws JsonMappingException
110   * @throws IOException
111   */
112  public static String toJSON(final String filename, final NavigableSet<CachedBlock> blocks)
113  throws JsonGenerationException, JsonMappingException, IOException {
114    CachedBlockCountsPerFile counts = new CachedBlockCountsPerFile(filename);
115    for (CachedBlock cb: blocks) {
116      counts.count++;
117      counts.size += cb.getSize();
118      BlockType bt = cb.getBlockType();
119      if (bt != null && bt.isData()) {
120        counts.countData++;
121        counts.sizeData += cb.getSize();
122      }
123    }
124    return MAPPER.writeValueAsString(counts);
125  }
126
127  /**
128   * @param cbsbf
129   * @return JSON string of <code>cbsf</code> aggregated
130   * @throws JsonGenerationException
131   * @throws JsonMappingException
132   * @throws IOException
133   */
134  public static String toJSON(final CachedBlocksByFile cbsbf)
135  throws JsonGenerationException, JsonMappingException, IOException {
136    return MAPPER.writeValueAsString(cbsbf);
137  }
138
139  /**
140   * @param bc
141   * @return JSON string of <code>bc</code> content.
142   * @throws JsonGenerationException
143   * @throws JsonMappingException
144   * @throws IOException
145   */
146  public static String toJSON(final BlockCache bc)
147  throws JsonGenerationException, JsonMappingException, IOException {
148    return MAPPER.writeValueAsString(bc);
149  }
150
151  /**
152   * @param cb
153   * @return The block content of <code>bc</code> as a String minus the filename.
154   */
155  public static String toStringMinusFileName(final CachedBlock cb, final long now) {
156    return "offset=" + cb.getOffset() +
157      ", size=" + cb.getSize() +
158      ", age=" + (now - cb.getCachedTime()) +
159      ", type=" + cb.getBlockType() +
160      ", priority=" + cb.getBlockPriority();
161  }
162
163  /**
164   * Get a {@link CachedBlocksByFile} instance and load it up by iterating content in
165   * {@link BlockCache}.
166   * @param conf Used to read configurations
167   * @param bc Block Cache to iterate.
168   * @return Laoded up instance of CachedBlocksByFile
169   */
170  public static CachedBlocksByFile getLoadedCachedBlocksByFile(final Configuration conf,
171      final BlockCache bc) {
172    CachedBlocksByFile cbsbf = new CachedBlocksByFile(conf);
173    for (CachedBlock cb: bc) {
174      if (cbsbf.update(cb)) break;
175    }
176    return cbsbf;
177  }
178
179  private static int compareCacheBlock(Cacheable left, Cacheable right,
180                                       boolean includeNextBlockMetadata) {
181    ByteBuffer l = ByteBuffer.allocate(left.getSerializedLength());
182    left.serialize(l, includeNextBlockMetadata);
183    ByteBuffer r = ByteBuffer.allocate(right.getSerializedLength());
184    right.serialize(r, includeNextBlockMetadata);
185    return Bytes.compareTo(l.array(), l.arrayOffset(), l.limit(),
186        r.array(), r.arrayOffset(), r.limit());
187  }
188
189  /**
190   * Validate that the existing and newBlock are the same without including the nextBlockMetadata,
191   * if not, throw an exception. If they are the same without the nextBlockMetadata,
192   * return the comparison.
193   *
194   * @param existing block that is existing in the cache.
195   * @param newBlock block that is trying to be cached.
196   * @param cacheKey the cache key of the blocks.
197   * @return comparison of the existing block to the newBlock.
198   */
199  public static int validateBlockAddition(Cacheable existing, Cacheable newBlock,
200                                          BlockCacheKey cacheKey) {
201    int comparison = compareCacheBlock(existing, newBlock, false);
202    if (comparison != 0) {
203      throw new RuntimeException("Cached block contents differ, which should not have happened."
204                                 + "cacheKey:" + cacheKey);
205    }
206    if ((existing instanceof HFileBlock) && (newBlock instanceof HFileBlock)) {
207      comparison = ((HFileBlock) existing).getNextBlockOnDiskSize()
208          - ((HFileBlock) newBlock).getNextBlockOnDiskSize();
209    }
210    return comparison;
211  }
212
213  /**
214   * Because of the region splitting, it's possible that the split key locate in the middle of a
215   * block. So it's possible that both the daughter regions load the same block from their parent
216   * HFile. When pread, we don't force the read to read all of the next block header. So when two
217   * threads try to cache the same block, it's possible that one thread read all of the next block
218   * header but the other one didn't. if the already cached block hasn't next block header but the
219   * new block to cache has, then we can replace the existing block with the new block for better
220   * performance.(HBASE-20447)
221   * @param blockCache BlockCache to check
222   * @param cacheKey the block cache key
223   * @param newBlock the new block which try to put into the block cache.
224   * @return true means need to replace existing block with new block for the same block cache key.
225   *         false means just keep the existing block.
226   */
227  public static boolean shouldReplaceExistingCacheBlock(BlockCache blockCache,
228      BlockCacheKey cacheKey, Cacheable newBlock) {
229    Cacheable existingBlock = blockCache.getBlock(cacheKey, false, false, false);
230    if (null == existingBlock) {
231      // Not exist now.
232      return true;
233    }
234    try {
235      int comparison = BlockCacheUtil.validateBlockAddition(existingBlock, newBlock, cacheKey);
236      if (comparison < 0) {
237        LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the new block has "
238            + "nextBlockOnDiskSize set. Caching new block.");
239        return true;
240      } else if (comparison > 0) {
241        LOG.warn("Cached block contents differ by nextBlockOnDiskSize, the existing block has "
242            + "nextBlockOnDiskSize set, Keeping cached block.");
243        return false;
244      } else {
245        LOG.warn("Caching an already cached block: {}. This is harmless and can happen in rare "
246            + "cases (see HBASE-8547)",
247          cacheKey);
248        return false;
249      }
250    } finally {
251      // return the block since we need to decrement the count
252      blockCache.returnBlock(cacheKey, existingBlock);
253    }
254  }
255
256  /**
257   * Use one of these to keep a running account of cached blocks by file.  Throw it away when done.
258   * This is different than metrics in that it is stats on current state of a cache.
259   * See getLoadedCachedBlocksByFile
260   */
261  @JsonIgnoreProperties({"cachedBlockStatsByFile"})
262  public static class CachedBlocksByFile {
263    private int count;
264    private int dataBlockCount;
265    private long size;
266    private long dataSize;
267    private final long now = System.nanoTime();
268    /**
269     * How many blocks to look at before we give up.
270     * There could be many millions of blocks. We don't want the
271     * ui to freeze while we run through 1B blocks... users will
272     * think hbase dead. UI displays warning in red when stats
273     * are incomplete.
274     */
275    private final int max;
276    public static final int DEFAULT_MAX = 1000000;
277
278    CachedBlocksByFile() {
279      this(null);
280    }
281
282    CachedBlocksByFile(final Configuration c) {
283      this.max = c == null? DEFAULT_MAX: c.getInt("hbase.ui.blockcache.by.file.max", DEFAULT_MAX);
284    }
285
286    /**
287     * Map by filename. use concurent utils because we want our Map and contained blocks sorted.
288     */
289    private NavigableMap<String, NavigableSet<CachedBlock>> cachedBlockByFile = new ConcurrentSkipListMap<>();
290    FastLongHistogram hist = new FastLongHistogram();
291
292    /**
293     * @param cb
294     * @return True if full.... if we won't be adding any more.
295     */
296    public boolean update(final CachedBlock cb) {
297      if (isFull()) return true;
298      NavigableSet<CachedBlock> set = this.cachedBlockByFile.get(cb.getFilename());
299      if (set == null) {
300        set = new ConcurrentSkipListSet<>();
301        this.cachedBlockByFile.put(cb.getFilename(), set);
302      }
303      set.add(cb);
304      this.size += cb.getSize();
305      this.count++;
306      BlockType bt = cb.getBlockType();
307      if (bt != null && bt.isData()) {
308        this.dataBlockCount++;
309        this.dataSize += cb.getSize();
310      }
311      long age = (this.now - cb.getCachedTime())/NANOS_PER_SECOND;
312      this.hist.add(age, 1);
313      return false;
314    }
315
316    /**
317     * @return True if full; i.e. there are more items in the cache but we only loaded up
318     * the maximum set in configuration <code>hbase.ui.blockcache.by.file.max</code>
319     * (Default: DEFAULT_MAX).
320     */
321    public boolean isFull() {
322      return this.count >= this.max;
323    }
324
325    public NavigableMap<String, NavigableSet<CachedBlock>> getCachedBlockStatsByFile() {
326      return this.cachedBlockByFile;
327    }
328
329    /**
330     * @return count of blocks in the cache
331     */
332    public int getCount() {
333      return count;
334    }
335
336    public int getDataCount() {
337      return dataBlockCount;
338    }
339
340    /**
341     * @return size of blocks in the cache
342     */
343    public long getSize() {
344      return size;
345    }
346
347    /**
348     * @return Size of data.
349     */
350    public long getDataSize() {
351      return dataSize;
352    }
353
354    public AgeSnapshot getAgeInCacheSnapshot() {
355      return new AgeSnapshot(this.hist);
356    }
357
358    @Override
359    public String toString() {
360      AgeSnapshot snapshot = getAgeInCacheSnapshot();
361      return "count=" + count + ", dataBlockCount=" + dataBlockCount + ", size=" + size +
362          ", dataSize=" + getDataSize() +
363          ", mean age=" + snapshot.getMean() +
364          ", min age=" + snapshot.getMin() +
365          ", max age=" + snapshot.getMax() +
366          ", 75th percentile age="   + snapshot.get75thPercentile() +
367          ", 95th percentile age="   + snapshot.get95thPercentile() +
368          ", 98th percentile age="   + snapshot.get98thPercentile() +
369          ", 99th percentile age="   + snapshot.get99thPercentile() +
370          ", 99.9th percentile age=" + snapshot.get99thPercentile();
371    }
372  }
373}