View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.DataInput;
22  import java.io.IOException;
23  import java.net.InetSocketAddress;
24  import java.util.Arrays;
25  import java.util.Collection;
26  import java.util.Collections;
27  import java.util.Comparator;
28  import java.util.Map;
29  import java.util.SortedSet;
30  import java.util.UUID;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  import java.util.concurrent.atomic.AtomicInteger;
33  
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FileSystem;
38  import org.apache.hadoop.fs.Path;
39  import org.apache.hadoop.hbase.Cell;
40  import org.apache.hadoop.hbase.CellComparator;
41  import org.apache.hadoop.hbase.CellUtil;
42  import org.apache.hadoop.hbase.HConstants;
43  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
44  import org.apache.hadoop.hbase.KeyValue;
45  import org.apache.hadoop.hbase.KeyValueUtil;
46  import org.apache.hadoop.hbase.classification.InterfaceAudience;
47  import org.apache.hadoop.hbase.client.Scan;
48  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
49  import org.apache.hadoop.hbase.io.TimeRange;
50  import org.apache.hadoop.hbase.io.hfile.BlockType;
51  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
52  import org.apache.hadoop.hbase.io.hfile.HFile;
53  import org.apache.hadoop.hbase.io.hfile.HFileBlock;
54  import org.apache.hadoop.hbase.io.hfile.HFileContext;
55  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
56  import org.apache.hadoop.hbase.nio.ByteBuff;
57  import org.apache.hadoop.hbase.regionserver.compactions.Compactor;
58  import org.apache.hadoop.hbase.util.BloomFilter;
59  import org.apache.hadoop.hbase.util.BloomFilterFactory;
60  import org.apache.hadoop.hbase.util.BloomFilterWriter;
61  import org.apache.hadoop.hbase.util.Bytes;
62  import org.apache.hadoop.hbase.util.Writables;
63  import org.apache.hadoop.io.WritableUtils;
64  
65  import com.google.common.annotations.VisibleForTesting;
66  import com.google.common.base.Function;
67  import com.google.common.base.Preconditions;
68  import com.google.common.collect.ImmutableList;
69  import com.google.common.collect.Ordering;
70  
71  /**
72   * A Store data file.  Stores usually have one or more of these files.  They
73   * are produced by flushing the memstore to disk.  To
74   * create, instantiate a writer using {@link StoreFile.WriterBuilder}
75   * and append data. Be sure to add any metadata before calling close on the
76   * Writer (Use the appendMetadata convenience methods). On close, a StoreFile
77   * is sitting in the Filesystem.  To refer to it, create a StoreFile instance
78   * passing filesystem and path.  To read, call {@link #createReader()}.
79   * <p>StoreFiles may also reference store files in another Store.
80   *
81   * The reason for this weird pattern where you use a different instance for the
82   * writer and a reader is that we write once but read a lot more.
83   */
84  @InterfaceAudience.LimitedPrivate("Coprocessor")
85  public class StoreFile {
86    private static final Log LOG = LogFactory.getLog(StoreFile.class.getName());
87  
88    // Keys for fileinfo values in HFile
89  
90    /** Max Sequence ID in FileInfo */
91    public static final byte [] MAX_SEQ_ID_KEY = Bytes.toBytes("MAX_SEQ_ID_KEY");
92  
93    /** Major compaction flag in FileInfo */
94    public static final byte[] MAJOR_COMPACTION_KEY =
95        Bytes.toBytes("MAJOR_COMPACTION_KEY");
96  
97    /** Minor compaction flag in FileInfo */
98    public static final byte[] EXCLUDE_FROM_MINOR_COMPACTION_KEY =
99        Bytes.toBytes("EXCLUDE_FROM_MINOR_COMPACTION");
100 
101   /** Bloom filter Type in FileInfo */
102   public static final byte[] BLOOM_FILTER_TYPE_KEY =
103       Bytes.toBytes("BLOOM_FILTER_TYPE");
104 
105   /** Delete Family Count in FileInfo */
106   public static final byte[] DELETE_FAMILY_COUNT =
107       Bytes.toBytes("DELETE_FAMILY_COUNT");
108 
109   /** Last Bloom filter key in FileInfo */
110   private static final byte[] LAST_BLOOM_KEY = Bytes.toBytes("LAST_BLOOM_KEY");
111 
112   /** Key for Timerange information in metadata*/
113   public static final byte[] TIMERANGE_KEY = Bytes.toBytes("TIMERANGE");
114 
115   /** Key for timestamp of earliest-put in metadata*/
116   public static final byte[] EARLIEST_PUT_TS = Bytes.toBytes("EARLIEST_PUT_TS");
117 
118   /** Key for the number of mob cells in metadata*/
119   public static final byte[] MOB_CELLS_COUNT = Bytes.toBytes("MOB_CELLS_COUNT");
120 
121   private final StoreFileInfo fileInfo;
122   private final FileSystem fs;
123 
124   // Block cache configuration and reference.
125   private final CacheConfig cacheConf;
126 
127   // Keys for metadata stored in backing HFile.
128   // Set when we obtain a Reader.
129   private long sequenceid = -1;
130 
131   // max of the MemstoreTS in the KV's in this store
132   // Set when we obtain a Reader.
133   private long maxMemstoreTS = -1;
134 
135   // firstKey, lastkey and cellComparator will be set when openReader.
136   private Cell firstKey;
137 
138   private Cell lastKey;
139 
140   private Comparator comparator;
141 
142   CacheConfig getCacheConf() {
143     return cacheConf;
144   }
145 
146   public Cell getFirstKey() {
147     return firstKey;
148   }
149 
150   public Cell getLastKey() {
151     return lastKey;
152   }
153 
154   public Comparator getComparator() {
155     return comparator;
156   }
157 
158   public long getMaxMemstoreTS() {
159     return maxMemstoreTS;
160   }
161 
162   public void setMaxMemstoreTS(long maxMemstoreTS) {
163     this.maxMemstoreTS = maxMemstoreTS;
164   }
165 
166   // If true, this file was product of a major compaction.  Its then set
167   // whenever you get a Reader.
168   private AtomicBoolean majorCompaction = null;
169 
170   // If true, this file should not be included in minor compactions.
171   // It's set whenever you get a Reader.
172   private boolean excludeFromMinorCompaction = false;
173 
174   /** Meta key set when store file is a result of a bulk load */
175   public static final byte[] BULKLOAD_TASK_KEY =
176     Bytes.toBytes("BULKLOAD_SOURCE_TASK");
177   public static final byte[] BULKLOAD_TIME_KEY =
178     Bytes.toBytes("BULKLOAD_TIMESTAMP");
179 
180   /**
181    * Map of the metadata entries in the corresponding HFile
182    */
183   private Map<byte[], byte[]> metadataMap;
184 
185   // StoreFile.Reader
186   private volatile Reader reader;
187 
188   /**
189    * Bloom filter type specified in column family configuration. Does not
190    * necessarily correspond to the Bloom filter type present in the HFile.
191    */
192   private final BloomType cfBloomType;
193 
194   /**
195    * Key for skipping resetting sequence id in metadata.
196    * For bulk loaded hfiles, the scanner resets the cell seqId with the latest one,
197    * if this metadata is set as true, the reset is skipped.
198    */
199   public static final byte[] SKIP_RESET_SEQ_ID = Bytes.toBytes("SKIP_RESET_SEQ_ID");
200 
201   /**
202    * Constructor, loads a reader and it's indices, etc. May allocate a
203    * substantial amount of ram depending on the underlying files (10-20MB?).
204    *
205    * @param fs  The current file system to use.
206    * @param p  The path of the file.
207    * @param conf  The current configuration.
208    * @param cacheConf  The cache configuration and block cache reference.
209    * @param cfBloomType The bloom type to use for this store file as specified
210    *          by column family configuration. This may or may not be the same
211    *          as the Bloom filter type actually present in the HFile, because
212    *          column family configuration might change. If this is
213    *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
214    * @throws IOException When opening the reader fails.
215    */
216   public StoreFile(final FileSystem fs, final Path p, final Configuration conf,
217         final CacheConfig cacheConf, final BloomType cfBloomType) throws IOException {
218     this(fs, new StoreFileInfo(conf, fs, p), conf, cacheConf, cfBloomType);
219   }
220 
221 
222   /**
223    * Constructor, loads a reader and it's indices, etc. May allocate a
224    * substantial amount of ram depending on the underlying files (10-20MB?).
225    *
226    * @param fs  The current file system to use.
227    * @param fileInfo  The store file information.
228    * @param conf  The current configuration.
229    * @param cacheConf  The cache configuration and block cache reference.
230    * @param cfBloomType The bloom type to use for this store file as specified
231    *          by column family configuration. This may or may not be the same
232    *          as the Bloom filter type actually present in the HFile, because
233    *          column family configuration might change. If this is
234    *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
235    * @throws IOException When opening the reader fails.
236    */
237   public StoreFile(final FileSystem fs, final StoreFileInfo fileInfo, final Configuration conf,
238       final CacheConfig cacheConf,  final BloomType cfBloomType) throws IOException {
239     this.fs = fs;
240     this.fileInfo = fileInfo;
241     this.cacheConf = cacheConf;
242 
243     if (BloomFilterFactory.isGeneralBloomEnabled(conf)) {
244       this.cfBloomType = cfBloomType;
245     } else {
246       LOG.info("Ignoring bloom filter check for file " + this.getPath() + ": " +
247           "cfBloomType=" + cfBloomType + " (disabled in config)");
248       this.cfBloomType = BloomType.NONE;
249     }
250   }
251 
252   /**
253    * Clone
254    * @param other The StoreFile to clone from
255    */
256   public StoreFile(final StoreFile other) {
257     this.fs = other.fs;
258     this.fileInfo = other.fileInfo;
259     this.cacheConf = other.cacheConf;
260     this.cfBloomType = other.cfBloomType;
261   }
262 
263   /**
264    * @return the StoreFile object associated to this StoreFile.
265    *         null if the StoreFile is not a reference.
266    */
267   public StoreFileInfo getFileInfo() {
268     return this.fileInfo;
269   }
270 
271   /**
272    * @return Path or null if this StoreFile was made with a Stream.
273    */
274   public Path getPath() {
275     return this.fileInfo.getPath();
276   }
277 
278   /**
279    * @return Returns the qualified path of this StoreFile
280    */
281   public Path getQualifiedPath() {
282     return this.fileInfo.getPath().makeQualified(fs);
283   }
284 
285   /**
286    * @return True if this is a StoreFile Reference; call
287    * after {@link #open(boolean canUseDropBehind)} else may get wrong answer.
288    */
289   public boolean isReference() {
290     return this.fileInfo.isReference();
291   }
292 
293   /**
294    * @return True if this file was made by a major compaction.
295    */
296   public boolean isMajorCompaction() {
297     if (this.majorCompaction == null) {
298       throw new NullPointerException("This has not been set yet");
299     }
300     return this.majorCompaction.get();
301   }
302 
303   /**
304    * @return True if this file should not be part of a minor compaction.
305    */
306   public boolean excludeFromMinorCompaction() {
307     return this.excludeFromMinorCompaction;
308   }
309 
310   /**
311    * @return This files maximum edit sequence id.
312    */
313   public long getMaxSequenceId() {
314     return this.sequenceid;
315   }
316 
317   public long getModificationTimeStamp() throws IOException {
318     return (fileInfo == null) ? 0 : fileInfo.getModificationTime();
319   }
320 
321   /**
322    * Only used by the Striped Compaction Policy
323    * @param key
324    * @return value associated with the metadata key
325    */
326   public byte[] getMetadataValue(byte[] key) {
327     return metadataMap.get(key);
328   }
329 
330   /**
331    * Return the largest memstoreTS found across all storefiles in
332    * the given list. Store files that were created by a mapreduce
333    * bulk load are ignored, as they do not correspond to any specific
334    * put operation, and thus do not have a memstoreTS associated with them.
335    * @return 0 if no non-bulk-load files are provided or, this is Store that
336    * does not yet have any store files.
337    */
338   public static long getMaxMemstoreTSInList(Collection<StoreFile> sfs) {
339     long max = 0;
340     for (StoreFile sf : sfs) {
341       if (!sf.isBulkLoadResult()) {
342         max = Math.max(max, sf.getMaxMemstoreTS());
343       }
344     }
345     return max;
346   }
347 
348   /**
349    * Return the highest sequence ID found across all storefiles in
350    * the given list.
351    * @param sfs
352    * @return 0 if no non-bulk-load files are provided or, this is Store that
353    * does not yet have any store files.
354    */
355   public static long getMaxSequenceIdInList(Collection<StoreFile> sfs) {
356     long max = 0;
357     for (StoreFile sf : sfs) {
358       max = Math.max(max, sf.getMaxSequenceId());
359     }
360     return max;
361   }
362 
363   /**
364    * Check if this storefile was created by bulk load.
365    * When a hfile is bulk loaded into HBase, we append
366    * {@code '_SeqId_<id-when-loaded>'} to the hfile name, unless
367    * "hbase.mapreduce.bulkload.assign.sequenceNumbers" is
368    * explicitly turned off.
369    * If "hbase.mapreduce.bulkload.assign.sequenceNumbers"
370    * is turned off, fall back to BULKLOAD_TIME_KEY.
371    * @return true if this storefile was created by bulk load.
372    */
373   boolean isBulkLoadResult() {
374     boolean bulkLoadedHFile = false;
375     String fileName = this.getPath().getName();
376     int startPos = fileName.indexOf("SeqId_");
377     if (startPos != -1) {
378       bulkLoadedHFile = true;
379     }
380     return bulkLoadedHFile || metadataMap.containsKey(BULKLOAD_TIME_KEY);
381   }
382 
383   @VisibleForTesting
384   public boolean isCompactedAway() {
385     if (this.reader != null) {
386       return this.reader.isCompactedAway();
387     }
388     return true;
389   }
390 
391   @VisibleForTesting
392   public int getRefCount() {
393     return this.reader.refCount.get();
394   }
395 
396   /**
397    * Return the timestamp at which this bulk load file was generated.
398    */
399   public long getBulkLoadTimestamp() {
400     byte[] bulkLoadTimestamp = metadataMap.get(BULKLOAD_TIME_KEY);
401     return (bulkLoadTimestamp == null) ? 0 : Bytes.toLong(bulkLoadTimestamp);
402   }
403 
404   /**
405    * @return the cached value of HDFS blocks distribution. The cached value is
406    * calculated when store file is opened.
407    */
408   public HDFSBlocksDistribution getHDFSBlockDistribution() {
409     return this.fileInfo.getHDFSBlockDistribution();
410   }
411 
412   /**
413    * Opens reader on this store file.  Called by Constructor.
414    * @return Reader for the store file.
415    * @throws IOException
416    * @see #closeReader(boolean)
417    */
418   private Reader open(boolean canUseDropBehind) throws IOException {
419     if (this.reader != null) {
420       throw new IllegalAccessError("Already open");
421     }
422 
423     // Open the StoreFile.Reader
424     this.reader = fileInfo.open(this.fs, this.cacheConf, canUseDropBehind);
425 
426     // Load up indices and fileinfo. This also loads Bloom filter type.
427     metadataMap = Collections.unmodifiableMap(this.reader.loadFileInfo());
428 
429     // Read in our metadata.
430     byte [] b = metadataMap.get(MAX_SEQ_ID_KEY);
431     if (b != null) {
432       // By convention, if halfhfile, top half has a sequence number > bottom
433       // half. Thats why we add one in below. Its done for case the two halves
434       // are ever merged back together --rare.  Without it, on open of store,
435       // since store files are distinguished by sequence id, the one half would
436       // subsume the other.
437       this.sequenceid = Bytes.toLong(b);
438       if (fileInfo.isTopReference()) {
439         this.sequenceid += 1;
440       }
441     }
442 
443     if (isBulkLoadResult()){
444       // generate the sequenceId from the fileName
445       // fileName is of the form <randomName>_SeqId_<id-when-loaded>_
446       String fileName = this.getPath().getName();
447       // Use lastIndexOf() to get the last, most recent bulk load seqId.
448       int startPos = fileName.lastIndexOf("SeqId_");
449       if (startPos != -1) {
450         this.sequenceid = Long.parseLong(fileName.substring(startPos + 6,
451             fileName.indexOf('_', startPos + 6)));
452         // Handle reference files as done above.
453         if (fileInfo.isTopReference()) {
454           this.sequenceid += 1;
455         }
456       }
457       // SKIP_RESET_SEQ_ID only works in bulk loaded file.
458       // In mob compaction, the hfile where the cells contain the path of a new mob file is bulk
459       // loaded to hbase, these cells have the same seqIds with the old ones. We do not want
460       // to reset new seqIds for them since this might make a mess of the visibility of cells that
461       // have the same row key but different seqIds.
462       this.reader.setSkipResetSeqId(isSkipResetSeqId(metadataMap.get(SKIP_RESET_SEQ_ID)));
463       this.reader.setBulkLoaded(true);
464     }
465     this.reader.setSequenceID(this.sequenceid);
466 
467     b = metadataMap.get(HFile.Writer.MAX_MEMSTORE_TS_KEY);
468     if (b != null) {
469       this.maxMemstoreTS = Bytes.toLong(b);
470     }
471 
472     b = metadataMap.get(MAJOR_COMPACTION_KEY);
473     if (b != null) {
474       boolean mc = Bytes.toBoolean(b);
475       if (this.majorCompaction == null) {
476         this.majorCompaction = new AtomicBoolean(mc);
477       } else {
478         this.majorCompaction.set(mc);
479       }
480     } else {
481       // Presume it is not major compacted if it doesn't explicity say so
482       // HFileOutputFormat explicitly sets the major compacted key.
483       this.majorCompaction = new AtomicBoolean(false);
484     }
485 
486     b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY);
487     this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b));
488 
489     BloomType hfileBloomType = reader.getBloomFilterType();
490     if (cfBloomType != BloomType.NONE) {
491       reader.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
492       if (hfileBloomType != cfBloomType) {
493         LOG.info("HFile Bloom filter type for "
494             + reader.getHFileReader().getName() + ": " + hfileBloomType
495             + ", but " + cfBloomType + " specified in column family "
496             + "configuration");
497       }
498     } else if (hfileBloomType != BloomType.NONE) {
499       LOG.info("Bloom filter turned off by CF config for "
500           + reader.getHFileReader().getName());
501     }
502 
503     // load delete family bloom filter
504     reader.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
505 
506     try {
507       byte [] timerangeBytes = metadataMap.get(TIMERANGE_KEY);
508       if (timerangeBytes != null) {
509         this.reader.timeRangeTracker = new TimeRangeTracker();
510         Writables.copyWritable(timerangeBytes, this.reader.timeRangeTracker);
511       }
512     } catch (IllegalArgumentException e) {
513       LOG.error("Error reading timestamp range data from meta -- " +
514           "proceeding without", e);
515       this.reader.timeRangeTracker = null;
516     }
517     // initialize so we can reuse them after reader closed.
518     firstKey = reader.getFirstKey();
519     lastKey = reader.getLastKey();
520     comparator = reader.getComparator();
521     return this.reader;
522   }
523 
524   public Reader createReader() throws IOException {
525     return createReader(false);
526   }
527 
528   /**
529    * @return Reader for StoreFile. creates if necessary
530    * @throws IOException
531    */
532   public Reader createReader(boolean canUseDropBehind) throws IOException {
533     if (this.reader == null) {
534       try {
535         this.reader = open(canUseDropBehind);
536       } catch (IOException e) {
537         try {
538           boolean evictOnClose =
539               cacheConf != null? cacheConf.shouldEvictOnClose(): true; 
540           this.closeReader(evictOnClose);
541         } catch (IOException ee) {
542         }
543         throw e;
544       }
545 
546     }
547     return this.reader;
548   }
549 
550   /**
551    * @return Current reader.  Must call createReader first else returns null.
552    * @see #createReader()
553    */
554   public Reader getReader() {
555     return this.reader;
556   }
557 
558   /**
559    * @param evictOnClose whether to evict blocks belonging to this file
560    * @throws IOException
561    */
562   public synchronized void closeReader(boolean evictOnClose)
563       throws IOException {
564     if (this.reader != null) {
565       this.reader.close(evictOnClose);
566       this.reader = null;
567     }
568   }
569 
570   /**
571    * Marks the status of the file as compactedAway.
572    */
573   public void markCompactedAway() {
574     if (this.reader != null) {
575       this.reader.markCompactedAway();
576     }
577   }
578 
579   /**
580    * Delete this file
581    * @throws IOException
582    */
583   public void deleteReader() throws IOException {
584     boolean evictOnClose =
585         cacheConf != null? cacheConf.shouldEvictOnClose(): true; 
586     closeReader(evictOnClose);
587     this.fs.delete(getPath(), true);
588   }
589 
590   @Override
591   public String toString() {
592     return this.fileInfo.toString();
593   }
594 
595   /**
596    * @return a length description of this StoreFile, suitable for debug output
597    */
598   public String toStringDetailed() {
599     StringBuilder sb = new StringBuilder();
600     sb.append(this.getPath().toString());
601     sb.append(", isReference=").append(isReference());
602     sb.append(", isBulkLoadResult=").append(isBulkLoadResult());
603     if (isBulkLoadResult()) {
604       sb.append(", bulkLoadTS=").append(getBulkLoadTimestamp());
605     } else {
606       sb.append(", seqid=").append(getMaxSequenceId());
607     }
608     sb.append(", majorCompaction=").append(isMajorCompaction());
609 
610     return sb.toString();
611   }
612 
613   /**
614    * Gets whether to skip resetting the sequence id for cells.
615    * @param skipResetSeqId The byte array of boolean.
616    * @return Whether to skip resetting the sequence id.
617    */
618   private boolean isSkipResetSeqId(byte[] skipResetSeqId) {
619     if (skipResetSeqId != null && skipResetSeqId.length == 1) {
620       return Bytes.toBoolean(skipResetSeqId);
621     }
622     return false;
623   }
624 
625   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="ICAST_INTEGER_MULTIPLY_CAST_TO_LONG",
626       justification="Will not overflow")
627   public static class WriterBuilder {
628     private final Configuration conf;
629     private final CacheConfig cacheConf;
630     private final FileSystem fs;
631 
632     private CellComparator comparator = CellComparator.COMPARATOR;
633     private BloomType bloomType = BloomType.NONE;
634     private long maxKeyCount = 0;
635     private Path dir;
636     private Path filePath;
637     private InetSocketAddress[] favoredNodes;
638     private HFileContext fileContext;
639 
640     public WriterBuilder(Configuration conf, CacheConfig cacheConf,
641         FileSystem fs) {
642       this.conf = conf;
643       this.cacheConf = cacheConf;
644       this.fs = fs;
645     }
646 
647     /**
648      * Use either this method or {@link #withFilePath}, but not both.
649      * @param dir Path to column family directory. The directory is created if
650      *          does not exist. The file is given a unique name within this
651      *          directory.
652      * @return this (for chained invocation)
653      */
654     public WriterBuilder withOutputDir(Path dir) {
655       Preconditions.checkNotNull(dir);
656       this.dir = dir;
657       return this;
658     }
659 
660     /**
661      * Use either this method or {@link #withOutputDir}, but not both.
662      * @param filePath the StoreFile path to write
663      * @return this (for chained invocation)
664      */
665     public WriterBuilder withFilePath(Path filePath) {
666       Preconditions.checkNotNull(filePath);
667       this.filePath = filePath;
668       return this;
669     }
670 
671     /**
672      * @param favoredNodes an array of favored nodes or possibly null
673      * @return this (for chained invocation)
674      */
675     public WriterBuilder withFavoredNodes(InetSocketAddress[] favoredNodes) {
676       this.favoredNodes = favoredNodes;
677       return this;
678     }
679 
680     public WriterBuilder withComparator(CellComparator comparator) {
681       Preconditions.checkNotNull(comparator);
682       this.comparator = comparator;
683       return this;
684     }
685 
686     public WriterBuilder withBloomType(BloomType bloomType) {
687       Preconditions.checkNotNull(bloomType);
688       this.bloomType = bloomType;
689       return this;
690     }
691 
692     /**
693      * @param maxKeyCount estimated maximum number of keys we expect to add
694      * @return this (for chained invocation)
695      */
696     public WriterBuilder withMaxKeyCount(long maxKeyCount) {
697       this.maxKeyCount = maxKeyCount;
698       return this;
699     }
700 
701     public WriterBuilder withFileContext(HFileContext fileContext) {
702       this.fileContext = fileContext;
703       return this;
704     }
705 
706     public WriterBuilder withShouldDropCacheBehind(boolean shouldDropCacheBehind/*NOT USED!!*/) {
707       // TODO: HAS NO EFFECT!!! FIX!!
708       return this;
709     }
710     /**
711      * Create a store file writer. Client is responsible for closing file when
712      * done. If metadata, add BEFORE closing using
713      * {@link Writer#appendMetadata}.
714      */
715     public Writer build() throws IOException {
716       if ((dir == null ? 0 : 1) + (filePath == null ? 0 : 1) != 1) {
717         throw new IllegalArgumentException("Either specify parent directory " +
718             "or file path");
719       }
720 
721       if (dir == null) {
722         dir = filePath.getParent();
723       }
724 
725       if (!fs.exists(dir)) {
726         fs.mkdirs(dir);
727       }
728 
729       if (filePath == null) {
730         filePath = getUniqueFile(fs, dir);
731         if (!BloomFilterFactory.isGeneralBloomEnabled(conf)) {
732           bloomType = BloomType.NONE;
733         }
734       }
735 
736       if (comparator == null) {
737         comparator = CellComparator.COMPARATOR;
738       }
739       return new Writer(fs, filePath,
740           conf, cacheConf, comparator, bloomType, maxKeyCount, favoredNodes, fileContext);
741     }
742   }
743 
744   /**
745    * @param fs
746    * @param dir Directory to create file in.
747    * @return random filename inside passed <code>dir</code>
748    */
749   public static Path getUniqueFile(final FileSystem fs, final Path dir)
750       throws IOException {
751     if (!fs.getFileStatus(dir).isDirectory()) {
752       throw new IOException("Expecting " + dir.toString() +
753         " to be a directory");
754     }
755     return new Path(dir, UUID.randomUUID().toString().replaceAll("-", ""));
756   }
757 
758   public Long getMinimumTimestamp() {
759     return (getReader().timeRangeTracker == null) ?
760         null :
761         getReader().timeRangeTracker.getMinimumTimestamp();
762   }
763 
764   /**
765    * Gets the approximate mid-point of this file that is optimal for use in splitting it.
766    * @param comparator Comparator used to compare KVs.
767    * @return The split point row, or null if splitting is not possible, or reader is null.
768    */
769   @SuppressWarnings("deprecation")
770   byte[] getFileSplitPoint(CellComparator comparator) throws IOException {
771     if (this.reader == null) {
772       LOG.warn("Storefile " + this + " Reader is null; cannot get split point");
773       return null;
774     }
775     // Get first, last, and mid keys.  Midkey is the key that starts block
776     // in middle of hfile.  Has column and timestamp.  Need to return just
777     // the row we want to split on as midkey.
778     Cell midkey = this.reader.midkey();
779     if (midkey != null) {
780       Cell firstKey = this.reader.getFirstKey();
781       Cell lastKey = this.reader.getLastKey();
782       // if the midkey is the same as the first or last keys, we cannot (ever) split this region.
783       if (comparator.compareRows(midkey, firstKey) == 0
784           || comparator.compareRows(midkey, lastKey) == 0) {
785         if (LOG.isDebugEnabled()) {
786           LOG.debug("cannot split because midkey is the same as first or last row");
787         }
788         return null;
789       }
790       return CellUtil.cloneRow(midkey);
791     }
792     return null;
793   }
794 
795   /**
796    * A StoreFile writer.  Use this to read/write HBase Store Files. It is package
797    * local because it is an implementation detail of the HBase regionserver.
798    */
799   public static class Writer implements Compactor.CellSink {
800     private final BloomFilterWriter generalBloomFilterWriter;
801     private final BloomFilterWriter deleteFamilyBloomFilterWriter;
802     private final BloomType bloomType;
803     private byte[] lastBloomKey;
804     private int lastBloomKeyOffset, lastBloomKeyLen;
805     private Cell lastCell = null;
806     private long earliestPutTs = HConstants.LATEST_TIMESTAMP;
807     private Cell lastDeleteFamilyCell = null;
808     private long deleteFamilyCnt = 0;
809 
810     TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
811     /* isTimeRangeTrackerSet keeps track if the timeRange has already been set
812      * When flushing a memstore, we set TimeRange and use this variable to
813      * indicate that it doesn't need to be calculated again while
814      * appending KeyValues.
815      * It is not set in cases of compactions when it is recalculated using only
816      * the appended KeyValues*/
817     boolean isTimeRangeTrackerSet = false;
818 
819     protected HFile.Writer writer;
820     private KeyValue.KeyOnlyKeyValue lastBloomKeyOnlyKV = null;
821 
822     /**
823      * Creates an HFile.Writer that also write helpful meta data.
824      * @param fs file system to write to
825      * @param path file name to create
826      * @param conf user configuration
827      * @param comparator key comparator
828      * @param bloomType bloom filter setting
829      * @param maxKeys the expected maximum number of keys to be added. Was used
830      *        for Bloom filter size in {@link HFile} format version 1.
831      * @param favoredNodes
832      * @param fileContext - The HFile context
833      * @throws IOException problem writing to FS
834      */
835     private Writer(FileSystem fs, Path path,
836         final Configuration conf,
837         CacheConfig cacheConf,
838         final CellComparator comparator, BloomType bloomType, long maxKeys,
839         InetSocketAddress[] favoredNodes, HFileContext fileContext)
840             throws IOException {
841       writer = HFile.getWriterFactory(conf, cacheConf)
842           .withPath(fs, path)
843           .withComparator(comparator)
844           .withFavoredNodes(favoredNodes)
845           .withFileContext(fileContext)
846           .create();
847 
848       generalBloomFilterWriter = BloomFilterFactory.createGeneralBloomAtWrite(
849           conf, cacheConf, bloomType,
850           (int) Math.min(maxKeys, Integer.MAX_VALUE), writer);
851 
852       if (generalBloomFilterWriter != null) {
853         this.bloomType = bloomType;
854         if(this.bloomType ==  BloomType.ROWCOL) {
855           lastBloomKeyOnlyKV = new KeyValue.KeyOnlyKeyValue();
856         }
857         if (LOG.isTraceEnabled()) LOG.trace("Bloom filter type for " + path + ": " +
858           this.bloomType + ", " + generalBloomFilterWriter.getClass().getSimpleName());
859       } else {
860         // Not using Bloom filters.
861         this.bloomType = BloomType.NONE;
862       }
863 
864       // initialize delete family Bloom filter when there is NO RowCol Bloom
865       // filter
866       if (this.bloomType != BloomType.ROWCOL) {
867         this.deleteFamilyBloomFilterWriter = BloomFilterFactory
868             .createDeleteBloomAtWrite(conf, cacheConf,
869                 (int) Math.min(maxKeys, Integer.MAX_VALUE), writer);
870       } else {
871         deleteFamilyBloomFilterWriter = null;
872       }
873       if (deleteFamilyBloomFilterWriter != null) {
874         if (LOG.isTraceEnabled()) LOG.trace("Delete Family Bloom filter type for " + path + ": "
875             + deleteFamilyBloomFilterWriter.getClass().getSimpleName());
876       }
877     }
878 
879     /**
880      * Writes meta data.
881      * Call before {@link #close()} since its written as meta data to this file.
882      * @param maxSequenceId Maximum sequence id.
883      * @param majorCompaction True if this file is product of a major compaction
884      * @throws IOException problem writing to FS
885      */
886     public void appendMetadata(final long maxSequenceId, final boolean majorCompaction)
887     throws IOException {
888       writer.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId));
889       writer.appendFileInfo(MAJOR_COMPACTION_KEY,
890           Bytes.toBytes(majorCompaction));
891       appendTrackedTimestampsToMetadata();
892     }
893 
894     /**
895      * Writes meta data.
896      * Call before {@link #close()} since its written as meta data to this file.
897      * @param maxSequenceId Maximum sequence id.
898      * @param majorCompaction True if this file is product of a major compaction
899      * @param mobCellsCount The number of mob cells.
900      * @throws IOException problem writing to FS
901      */
902     public void appendMetadata(final long maxSequenceId, final boolean majorCompaction,
903         final long mobCellsCount) throws IOException {
904       writer.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId));
905       writer.appendFileInfo(MAJOR_COMPACTION_KEY, Bytes.toBytes(majorCompaction));
906       writer.appendFileInfo(MOB_CELLS_COUNT, Bytes.toBytes(mobCellsCount));
907       appendTrackedTimestampsToMetadata();
908     }
909 
910     /**
911      * Add TimestampRange and earliest put timestamp to Metadata
912      */
913     public void appendTrackedTimestampsToMetadata() throws IOException {
914       appendFileInfo(TIMERANGE_KEY,WritableUtils.toByteArray(timeRangeTracker));
915       appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs));
916     }
917 
918     /**
919      * Set TimeRangeTracker
920      * @param trt
921      */
922     public void setTimeRangeTracker(final TimeRangeTracker trt) {
923       this.timeRangeTracker = trt;
924       isTimeRangeTrackerSet = true;
925     }
926 
927     /**
928      * Record the earlest Put timestamp.
929      *
930      * If the timeRangeTracker is not set,
931      * update TimeRangeTracker to include the timestamp of this key
932      * @param cell
933      */
934     public void trackTimestamps(final Cell cell) {
935       if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) {
936         earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp());
937       }
938       if (!isTimeRangeTrackerSet) {
939         timeRangeTracker.includeTimestamp(cell);
940       }
941     }
942 
943     private void appendGeneralBloomfilter(final Cell cell) throws IOException {
944       if (this.generalBloomFilterWriter != null) {
945         // only add to the bloom filter on a new, unique key
946         boolean newKey = true;
947         if (this.lastCell != null) {
948           switch(bloomType) {
949           case ROW:
950             newKey = ! CellUtil.matchingRows(cell, lastCell);
951             break;
952           case ROWCOL:
953             newKey = ! CellUtil.matchingRowColumn(cell, lastCell);
954             break;
955           case NONE:
956             newKey = false;
957             break;
958           default:
959             throw new IOException("Invalid Bloom filter type: " + bloomType +
960                 " (ROW or ROWCOL expected)");
961           }
962         }
963         if (newKey) {
964           /*
965            * http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue.png
966            * Key = RowLen + Row + FamilyLen + Column [Family + Qualifier] + TimeStamp
967            *
968            * 2 Types of Filtering:
969            *  1. Row = Row
970            *  2. RowCol = Row + Qualifier
971            */
972           byte[] bloomKey = null;
973           // Used with ROW_COL bloom
974           KeyValue bloomKeyKV = null;
975           int bloomKeyOffset, bloomKeyLen;
976 
977           switch (bloomType) {
978           case ROW:
979             bloomKey = cell.getRowArray();
980             bloomKeyOffset = cell.getRowOffset();
981             bloomKeyLen = cell.getRowLength();
982             break;
983           case ROWCOL:
984             // merge(row, qualifier)
985             // TODO: could save one buffer copy in case of compound Bloom
986             // filters when this involves creating a KeyValue
987             // TODO : Handle while writes also
988             bloomKeyKV = KeyValueUtil.createFirstOnRow(cell.getRowArray(), cell.getRowOffset(),
989                 cell.getRowLength(), 
990                 HConstants.EMPTY_BYTE_ARRAY, 0, 0, cell.getQualifierArray(),
991                 cell.getQualifierOffset(),
992                 cell.getQualifierLength());
993             bloomKey = bloomKeyKV.getBuffer();
994             bloomKeyOffset = bloomKeyKV.getKeyOffset();
995             bloomKeyLen = bloomKeyKV.getKeyLength();
996             break;
997           default:
998             throw new IOException("Invalid Bloom filter type: " + bloomType +
999                 " (ROW or ROWCOL expected)");
1000           }
1001           generalBloomFilterWriter.add(bloomKey, bloomKeyOffset, bloomKeyLen);
1002           if (lastBloomKey != null) {
1003             int res = 0;
1004             // hbase:meta does not have blooms. So we need not have special interpretation
1005             // of the hbase:meta cells.  We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom
1006             if (bloomType == BloomType.ROW) {
1007               res = Bytes.BYTES_RAWCOMPARATOR.compare(bloomKey, bloomKeyOffset, bloomKeyLen,
1008                   lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen);
1009             } else {
1010               // TODO : Caching of kv components becomes important in these cases
1011               res = CellComparator.COMPARATOR.compare(bloomKeyKV, lastBloomKeyOnlyKV);
1012             }
1013             if (res <= 0) {
1014               throw new IOException("Non-increasing Bloom keys: "
1015                   + Bytes.toStringBinary(bloomKey, bloomKeyOffset, bloomKeyLen) + " after "
1016                   + Bytes.toStringBinary(lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen));
1017             }
1018           }
1019           lastBloomKey = bloomKey;
1020           lastBloomKeyOffset = bloomKeyOffset;
1021           lastBloomKeyLen = bloomKeyLen;
1022           if (bloomType == BloomType.ROWCOL) {
1023             lastBloomKeyOnlyKV.setKey(bloomKey, bloomKeyOffset, bloomKeyLen);
1024           }
1025           this.lastCell = cell;
1026         }
1027       }
1028     }
1029 
1030     private void appendDeleteFamilyBloomFilter(final Cell cell)
1031         throws IOException {
1032       if (!CellUtil.isDeleteFamily(cell) && !CellUtil.isDeleteFamilyVersion(cell)) {
1033         return;
1034       }
1035 
1036       // increase the number of delete family in the store file
1037       deleteFamilyCnt++;
1038       if (null != this.deleteFamilyBloomFilterWriter) {
1039         boolean newKey = true;
1040         if (lastDeleteFamilyCell != null) {
1041           // hbase:meta does not have blooms. So we need not have special interpretation
1042           // of the hbase:meta cells
1043           newKey = !CellUtil.matchingRows(cell, lastDeleteFamilyCell);
1044         }
1045         if (newKey) {
1046           this.deleteFamilyBloomFilterWriter.add(cell.getRowArray(),
1047               cell.getRowOffset(), cell.getRowLength());
1048           this.lastDeleteFamilyCell = cell;
1049         }
1050       }
1051     }
1052 
1053     public void append(final Cell cell) throws IOException {
1054       appendGeneralBloomfilter(cell);
1055       appendDeleteFamilyBloomFilter(cell);
1056       writer.append(cell);
1057       trackTimestamps(cell);
1058     }
1059 
1060     public Path getPath() {
1061       return this.writer.getPath();
1062     }
1063 
1064     public boolean hasGeneralBloom() {
1065       return this.generalBloomFilterWriter != null;
1066     }
1067 
1068     /**
1069      * For unit testing only.
1070      *
1071      * @return the Bloom filter used by this writer.
1072      */
1073     BloomFilterWriter getGeneralBloomWriter() {
1074       return generalBloomFilterWriter;
1075     }
1076 
1077     private boolean closeBloomFilter(BloomFilterWriter bfw) throws IOException {
1078       boolean haveBloom = (bfw != null && bfw.getKeyCount() > 0);
1079       if (haveBloom) {
1080         bfw.compactBloom();
1081       }
1082       return haveBloom;
1083     }
1084 
1085     private boolean closeGeneralBloomFilter() throws IOException {
1086       boolean hasGeneralBloom = closeBloomFilter(generalBloomFilterWriter);
1087 
1088       // add the general Bloom filter writer and append file info
1089       if (hasGeneralBloom) {
1090         writer.addGeneralBloomFilter(generalBloomFilterWriter);
1091         writer.appendFileInfo(BLOOM_FILTER_TYPE_KEY,
1092             Bytes.toBytes(bloomType.toString()));
1093         if (lastBloomKey != null) {
1094           writer.appendFileInfo(LAST_BLOOM_KEY, Arrays.copyOfRange(
1095               lastBloomKey, lastBloomKeyOffset, lastBloomKeyOffset
1096                   + lastBloomKeyLen));
1097         }
1098       }
1099       return hasGeneralBloom;
1100     }
1101 
1102     private boolean closeDeleteFamilyBloomFilter() throws IOException {
1103       boolean hasDeleteFamilyBloom = closeBloomFilter(deleteFamilyBloomFilterWriter);
1104 
1105       // add the delete family Bloom filter writer
1106       if (hasDeleteFamilyBloom) {
1107         writer.addDeleteFamilyBloomFilter(deleteFamilyBloomFilterWriter);
1108       }
1109 
1110       // append file info about the number of delete family kvs
1111       // even if there is no delete family Bloom.
1112       writer.appendFileInfo(DELETE_FAMILY_COUNT,
1113           Bytes.toBytes(this.deleteFamilyCnt));
1114 
1115       return hasDeleteFamilyBloom;
1116     }
1117 
1118     public void close() throws IOException {
1119       boolean hasGeneralBloom = this.closeGeneralBloomFilter();
1120       boolean hasDeleteFamilyBloom = this.closeDeleteFamilyBloomFilter();
1121 
1122       writer.close();
1123 
1124       // Log final Bloom filter statistics. This needs to be done after close()
1125       // because compound Bloom filters might be finalized as part of closing.
1126       if (StoreFile.LOG.isTraceEnabled()) {
1127         StoreFile.LOG.trace((hasGeneralBloom ? "" : "NO ") + "General Bloom and " +
1128           (hasDeleteFamilyBloom ? "" : "NO ") + "DeleteFamily" + " was added to HFile " +
1129           getPath());
1130       }
1131 
1132     }
1133 
1134     public void appendFileInfo(byte[] key, byte[] value) throws IOException {
1135       writer.appendFileInfo(key, value);
1136     }
1137 
1138     /** For use in testing, e.g. {@link org.apache.hadoop.hbase.regionserver.CreateRandomStoreFile}
1139      */
1140     HFile.Writer getHFileWriter() {
1141       return writer;
1142     }
1143   }
1144 
1145   /**
1146    * Reader for a StoreFile.
1147    */
1148   public static class Reader {
1149     private static final Log LOG = LogFactory.getLog(Reader.class.getName());
1150 
1151     protected BloomFilter generalBloomFilter = null;
1152     protected BloomFilter deleteFamilyBloomFilter = null;
1153     protected BloomType bloomFilterType;
1154     private final HFile.Reader reader;
1155     protected TimeRangeTracker timeRangeTracker = null;
1156     protected long sequenceID = -1;
1157     private byte[] lastBloomKey;
1158     private long deleteFamilyCnt = -1;
1159     private boolean bulkLoadResult = false;
1160     private KeyValue.KeyOnlyKeyValue lastBloomKeyOnlyKV = null;
1161     private boolean skipResetSeqId = true;
1162     // Counter that is incremented every time a scanner is created on the
1163     // store file.  It is decremented when the scan on the store file is
1164     // done.
1165     private AtomicInteger refCount = new AtomicInteger(0);
1166     // Indicates if the file got compacted
1167     private volatile boolean compactedAway = false;
1168 
1169     public Reader(FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf)
1170         throws IOException {
1171       reader = HFile.createReader(fs, path, cacheConf, conf);
1172       bloomFilterType = BloomType.NONE;
1173     }
1174 
1175     void markCompactedAway() {
1176       this.compactedAway = true;
1177     }
1178 
1179     public Reader(FileSystem fs, Path path, FSDataInputStreamWrapper in, long size,
1180         CacheConfig cacheConf, Configuration conf) throws IOException {
1181       reader = HFile.createReader(fs, path, in, size, cacheConf, conf);
1182       bloomFilterType = BloomType.NONE;
1183     }
1184 
1185     public void setReplicaStoreFile(boolean isPrimaryReplicaStoreFile) {
1186       reader.setPrimaryReplicaReader(isPrimaryReplicaStoreFile);
1187     }
1188     public boolean isPrimaryReplicaReader() {
1189       return reader.isPrimaryReplicaReader();
1190     }
1191 
1192     /**
1193      * ONLY USE DEFAULT CONSTRUCTOR FOR UNIT TESTS
1194      */
1195     Reader() {
1196       this.reader = null;
1197     }
1198 
1199     public CellComparator getComparator() {
1200       return reader.getComparator();
1201     }
1202 
1203     /**
1204      * Get a scanner to scan over this StoreFile. Do not use
1205      * this overload if using this scanner for compactions.
1206      *
1207      * @param cacheBlocks should this scanner cache blocks?
1208      * @param pread use pread (for highly concurrent small readers)
1209      * @return a scanner
1210      */
1211     public StoreFileScanner getStoreFileScanner(boolean cacheBlocks,
1212                                                boolean pread) {
1213       return getStoreFileScanner(cacheBlocks, pread, false,
1214         // 0 is passed as readpoint because this method is only used by test
1215         // where StoreFile is directly operated upon
1216         0);
1217     }
1218 
1219     /**
1220      * Get a scanner to scan over this StoreFile.
1221      *
1222      * @param cacheBlocks should this scanner cache blocks?
1223      * @param pread use pread (for highly concurrent small readers)
1224      * @param isCompaction is scanner being used for compaction?
1225      * @return a scanner
1226      */
1227     public StoreFileScanner getStoreFileScanner(boolean cacheBlocks,
1228                                                boolean pread,
1229                                                boolean isCompaction, long readPt) {
1230       // Increment the ref count
1231       refCount.incrementAndGet();
1232       return new StoreFileScanner(this,
1233                                  getScanner(cacheBlocks, pread, isCompaction),
1234                                  !isCompaction, reader.hasMVCCInfo(), readPt);
1235     }
1236 
1237     /**
1238      * Decrement the ref count associated with the reader when ever a scanner associated
1239      * with the reader is closed
1240      */
1241     void decrementRefCount() {
1242       refCount.decrementAndGet();
1243     }
1244 
1245     /**
1246      * @return true if the file is still used in reads
1247      */
1248     public boolean isReferencedInReads() {
1249       return refCount.get() != 0;
1250     }
1251  
1252     /**
1253      * @return true if the file is compacted
1254      */
1255     public boolean isCompactedAway() {
1256       return this.compactedAway;
1257     }
1258 
1259     /**
1260      * Warning: Do not write further code which depends on this call. Instead
1261      * use getStoreFileScanner() which uses the StoreFileScanner class/interface
1262      * which is the preferred way to scan a store with higher level concepts.
1263      *
1264      * @param cacheBlocks should we cache the blocks?
1265      * @param pread use pread (for concurrent small readers)
1266      * @return the underlying HFileScanner
1267      */
1268     @Deprecated
1269     public HFileScanner getScanner(boolean cacheBlocks, boolean pread) {
1270       return getScanner(cacheBlocks, pread, false);
1271     }
1272 
1273     /**
1274      * Warning: Do not write further code which depends on this call. Instead
1275      * use getStoreFileScanner() which uses the StoreFileScanner class/interface
1276      * which is the preferred way to scan a store with higher level concepts.
1277      *
1278      * @param cacheBlocks
1279      *          should we cache the blocks?
1280      * @param pread
1281      *          use pread (for concurrent small readers)
1282      * @param isCompaction
1283      *          is scanner being used for compaction?
1284      * @return the underlying HFileScanner
1285      */
1286     @Deprecated
1287     public HFileScanner getScanner(boolean cacheBlocks, boolean pread,
1288         boolean isCompaction) {
1289       return reader.getScanner(cacheBlocks, pread, isCompaction);
1290     }
1291 
1292     public void close(boolean evictOnClose) throws IOException {
1293       reader.close(evictOnClose);
1294     }
1295 
1296     /**
1297      * Check if this storeFile may contain keys within the TimeRange that
1298      * have not expired (i.e. not older than oldestUnexpiredTS).
1299      * @param timeRange the timeRange to restrict
1300      * @param oldestUnexpiredTS the oldest timestamp that is not expired, as
1301      *          determined by the column family's TTL
1302      * @return false if queried keys definitely don't exist in this StoreFile
1303      */
1304     boolean passesTimerangeFilter(TimeRange timeRange, long oldestUnexpiredTS) {
1305       if (timeRangeTracker == null) {
1306         return true;
1307       } else {
1308         return timeRangeTracker.includesTimeRange(timeRange) &&
1309             timeRangeTracker.getMaximumTimestamp() >= oldestUnexpiredTS;
1310       }
1311     }
1312 
1313     /**
1314      * Checks whether the given scan passes the Bloom filter (if present). Only
1315      * checks Bloom filters for single-row or single-row-column scans. Bloom
1316      * filter checking for multi-gets is implemented as part of the store
1317      * scanner system (see {@link StoreFileScanner#seekExactly}) and uses
1318      * the lower-level API {@link #passesGeneralRowBloomFilter(byte[], int, int)}
1319      * and {@link #passesGeneralRowColBloomFilter(Cell)}.
1320      *
1321      * @param scan the scan specification. Used to determine the row, and to
1322      *          check whether this is a single-row ("get") scan.
1323      * @param columns the set of columns. Only used for row-column Bloom
1324      *          filters.
1325      * @return true if the scan with the given column set passes the Bloom
1326      *         filter, or if the Bloom filter is not applicable for the scan.
1327      *         False if the Bloom filter is applicable and the scan fails it.
1328      */
1329      boolean passesBloomFilter(Scan scan,
1330         final SortedSet<byte[]> columns) {
1331       // Multi-column non-get scans will use Bloom filters through the
1332       // lower-level API function that this function calls.
1333       if (!scan.isGetScan()) {
1334         return true;
1335       }
1336 
1337       byte[] row = scan.getStartRow();
1338       switch (this.bloomFilterType) {
1339         case ROW:
1340           return passesGeneralRowBloomFilter(row, 0, row.length);
1341 
1342         case ROWCOL:
1343           if (columns != null && columns.size() == 1) {
1344             byte[] column = columns.first();
1345             // create the required fake key
1346             Cell kvKey = KeyValueUtil.createFirstOnRow(row, 0, row.length,
1347               HConstants.EMPTY_BYTE_ARRAY, 0, 0, column, 0,
1348               column.length);
1349             return passesGeneralRowColBloomFilter(kvKey);
1350           }
1351 
1352           // For multi-column queries the Bloom filter is checked from the
1353           // seekExact operation.
1354           return true;
1355 
1356         default:
1357           return true;
1358       }
1359     }
1360 
1361     public boolean passesDeleteFamilyBloomFilter(byte[] row, int rowOffset,
1362         int rowLen) {
1363       // Cache Bloom filter as a local variable in case it is set to null by
1364       // another thread on an IO error.
1365       BloomFilter bloomFilter = this.deleteFamilyBloomFilter;
1366 
1367       // Empty file or there is no delete family at all
1368       if (reader.getTrailer().getEntryCount() == 0 || deleteFamilyCnt == 0) {
1369         return false;
1370       }
1371 
1372       if (bloomFilter == null) {
1373         return true;
1374       }
1375 
1376       try {
1377         if (!bloomFilter.supportsAutoLoading()) {
1378           return true;
1379         }
1380         return bloomFilter.contains(row, rowOffset, rowLen, null);
1381       } catch (IllegalArgumentException e) {
1382         LOG.error("Bad Delete Family bloom filter data -- proceeding without",
1383             e);
1384         setDeleteFamilyBloomFilterFaulty();
1385       }
1386 
1387       return true;
1388     }
1389 
1390     /**
1391      * A method for checking Bloom filters. Called directly from
1392      * StoreFileScanner in case of a multi-column query.
1393      *
1394      * @param row
1395      * @param rowOffset
1396      * @param rowLen
1397      * @return True if passes
1398      */
1399     public boolean passesGeneralRowBloomFilter(byte[] row, int rowOffset, int rowLen) {
1400       BloomFilter bloomFilter = this.generalBloomFilter;
1401       if (bloomFilter == null) {
1402         return true;
1403       }
1404 
1405       // Used in ROW bloom
1406       byte[] key = null;
1407       if (rowOffset != 0 || rowLen != row.length) {
1408         throw new AssertionError(
1409             "For row-only Bloom filters the row " + "must occupy the whole array");
1410       }
1411       key = row;
1412       return checkGeneralBloomFilter(key, null, bloomFilter);
1413     }
1414 
1415     /**
1416      * A method for checking Bloom filters. Called directly from
1417      * StoreFileScanner in case of a multi-column query.
1418      *
1419      * @param cell
1420      *          the cell to check if present in BloomFilter
1421      * @return True if passes
1422      */
1423     public boolean passesGeneralRowColBloomFilter(Cell cell) {
1424       BloomFilter bloomFilter = this.generalBloomFilter;
1425       if (bloomFilter == null) {
1426         return true;
1427       }
1428       // Used in ROW_COL bloom
1429       Cell kvKey = null;
1430       // Already if the incoming key is a fake rowcol key then use it as it is
1431       if (cell.getTypeByte() == KeyValue.Type.Maximum.getCode() && cell.getFamilyLength() == 0) {
1432         kvKey = cell;
1433       } else {
1434         kvKey = CellUtil.createFirstOnRowCol(cell);
1435       }
1436       return checkGeneralBloomFilter(null, kvKey, bloomFilter);
1437     }
1438 
1439     private boolean checkGeneralBloomFilter(byte[] key, Cell kvKey, BloomFilter bloomFilter) {
1440       // Empty file
1441       if (reader.getTrailer().getEntryCount() == 0)
1442         return false;
1443       HFileBlock bloomBlock = null;
1444       try {
1445         boolean shouldCheckBloom;
1446         ByteBuff bloom;
1447         if (bloomFilter.supportsAutoLoading()) {
1448           bloom = null;
1449           shouldCheckBloom = true;
1450         } else {
1451           bloomBlock = reader.getMetaBlock(HFile.BLOOM_FILTER_DATA_KEY, true);
1452           bloom = bloomBlock.getBufferWithoutHeader();
1453           shouldCheckBloom = bloom != null;
1454         }
1455 
1456         if (shouldCheckBloom) {
1457           boolean exists;
1458 
1459           // Whether the primary Bloom key is greater than the last Bloom key
1460           // from the file info. For row-column Bloom filters this is not yet
1461           // a sufficient condition to return false.
1462           boolean keyIsAfterLast = (lastBloomKey != null);
1463           // hbase:meta does not have blooms. So we need not have special interpretation
1464           // of the hbase:meta cells.  We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom
1465           if (keyIsAfterLast) {
1466             if (bloomFilterType == BloomType.ROW) {
1467               keyIsAfterLast = (Bytes.BYTES_RAWCOMPARATOR.compare(key, lastBloomKey) > 0);
1468             } else {
1469               keyIsAfterLast = (CellComparator.COMPARATOR.compare(kvKey, lastBloomKeyOnlyKV)) > 0;
1470             }
1471           }
1472 
1473           if (bloomFilterType == BloomType.ROWCOL) {
1474             // Since a Row Delete is essentially a DeleteFamily applied to all
1475             // columns, a file might be skipped if using row+col Bloom filter.
1476             // In order to ensure this file is included an additional check is
1477             // required looking only for a row bloom.
1478             Cell rowBloomKey = CellUtil.createFirstOnRow(kvKey);
1479             // hbase:meta does not have blooms. So we need not have special interpretation
1480             // of the hbase:meta cells.  We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom
1481             if (keyIsAfterLast
1482                 && (CellComparator.COMPARATOR.compare(rowBloomKey, lastBloomKeyOnlyKV)) > 0) {
1483               exists = false;
1484             } else {
1485               exists =
1486                   bloomFilter.contains(kvKey, bloom) ||
1487                   bloomFilter.contains(rowBloomKey, bloom);
1488             }
1489           } else {
1490             exists = !keyIsAfterLast
1491                 && bloomFilter.contains(key, 0, key.length, bloom);
1492           }
1493 
1494           return exists;
1495         }
1496       } catch (IOException e) {
1497         LOG.error("Error reading bloom filter data -- proceeding without",
1498             e);
1499         setGeneralBloomFilterFaulty();
1500       } catch (IllegalArgumentException e) {
1501         LOG.error("Bad bloom filter data -- proceeding without", e);
1502         setGeneralBloomFilterFaulty();
1503       } finally {
1504         // Return the bloom block so that its ref count can be decremented.
1505         reader.returnBlock(bloomBlock);
1506       }
1507       return true;
1508     }
1509 
1510     /**
1511      * Checks whether the given scan rowkey range overlaps with the current storefile's
1512      * @param scan the scan specification. Used to determine the rowkey range.
1513      * @return true if there is overlap, false otherwise
1514      */
1515     public boolean passesKeyRangeFilter(Scan scan) {
1516       if (this.getFirstKey() == null || this.getLastKey() == null) {
1517         // the file is empty
1518         return false;
1519       }
1520       if (Bytes.equals(scan.getStartRow(), HConstants.EMPTY_START_ROW)
1521           && Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) {
1522         return true;
1523       }
1524       byte[] smallestScanRow = scan.isReversed() ? scan.getStopRow() : scan.getStartRow();
1525       byte[] largestScanRow = scan.isReversed() ? scan.getStartRow() : scan.getStopRow();
1526       Cell firstKeyKV = this.getFirstKey();
1527       Cell lastKeyKV = this.getLastKey();
1528       boolean nonOverLapping = (getComparator().compareRows(firstKeyKV,
1529           largestScanRow, 0, largestScanRow.length) > 0 
1530           && !Bytes
1531           .equals(scan.isReversed() ? scan.getStartRow() : scan.getStopRow(),
1532               HConstants.EMPTY_END_ROW))
1533           || getComparator().compareRows(lastKeyKV, smallestScanRow, 0, smallestScanRow.length) < 0;
1534       return !nonOverLapping;
1535     }
1536 
1537     public Map<byte[], byte[]> loadFileInfo() throws IOException {
1538       Map<byte [], byte []> fi = reader.loadFileInfo();
1539 
1540       byte[] b = fi.get(BLOOM_FILTER_TYPE_KEY);
1541       if (b != null) {
1542         bloomFilterType = BloomType.valueOf(Bytes.toString(b));
1543       }
1544 
1545       lastBloomKey = fi.get(LAST_BLOOM_KEY);
1546       if(bloomFilterType == BloomType.ROWCOL) {
1547         lastBloomKeyOnlyKV = new KeyValue.KeyOnlyKeyValue(lastBloomKey, 0, lastBloomKey.length);
1548       }
1549       byte[] cnt = fi.get(DELETE_FAMILY_COUNT);
1550       if (cnt != null) {
1551         deleteFamilyCnt = Bytes.toLong(cnt);
1552       }
1553 
1554       return fi;
1555     }
1556 
1557     public void loadBloomfilter() {
1558       this.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
1559       this.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
1560     }
1561 
1562     private void loadBloomfilter(BlockType blockType) {
1563       try {
1564         if (blockType == BlockType.GENERAL_BLOOM_META) {
1565           if (this.generalBloomFilter != null)
1566             return; // Bloom has been loaded
1567 
1568           DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
1569           if (bloomMeta != null) {
1570             // sanity check for NONE Bloom filter
1571             if (bloomFilterType == BloomType.NONE) {
1572               throw new IOException(
1573                   "valid bloom filter type not found in FileInfo");
1574             } else {
1575               generalBloomFilter = BloomFilterFactory.createFromMeta(bloomMeta,
1576                   reader);
1577               if (LOG.isTraceEnabled()) {
1578                 LOG.trace("Loaded " + bloomFilterType.toString() + " "
1579                   + generalBloomFilter.getClass().getSimpleName()
1580                   + " metadata for " + reader.getName());
1581               }
1582             }
1583           }
1584         } else if (blockType == BlockType.DELETE_FAMILY_BLOOM_META) {
1585           if (this.deleteFamilyBloomFilter != null)
1586             return; // Bloom has been loaded
1587 
1588           DataInput bloomMeta = reader.getDeleteBloomFilterMetadata();
1589           if (bloomMeta != null) {
1590             deleteFamilyBloomFilter = BloomFilterFactory.createFromMeta(
1591                 bloomMeta, reader);
1592             LOG.info("Loaded Delete Family Bloom ("
1593                 + deleteFamilyBloomFilter.getClass().getSimpleName()
1594                 + ") metadata for " + reader.getName());
1595           }
1596         } else {
1597           throw new RuntimeException("Block Type: " + blockType.toString()
1598               + "is not supported for Bloom filter");
1599         }
1600       } catch (IOException e) {
1601         LOG.error("Error reading bloom filter meta for " + blockType
1602             + " -- proceeding without", e);
1603         setBloomFilterFaulty(blockType);
1604       } catch (IllegalArgumentException e) {
1605         LOG.error("Bad bloom filter meta " + blockType
1606             + " -- proceeding without", e);
1607         setBloomFilterFaulty(blockType);
1608       }
1609     }
1610 
1611     private void setBloomFilterFaulty(BlockType blockType) {
1612       if (blockType == BlockType.GENERAL_BLOOM_META) {
1613         setGeneralBloomFilterFaulty();
1614       } else if (blockType == BlockType.DELETE_FAMILY_BLOOM_META) {
1615         setDeleteFamilyBloomFilterFaulty();
1616       }
1617     }
1618 
1619     /**
1620      * The number of Bloom filter entries in this store file, or an estimate
1621      * thereof, if the Bloom filter is not loaded. This always returns an upper
1622      * bound of the number of Bloom filter entries.
1623      *
1624      * @return an estimate of the number of Bloom filter entries in this file
1625      */
1626     public long getFilterEntries() {
1627       return generalBloomFilter != null ? generalBloomFilter.getKeyCount()
1628           : reader.getEntries();
1629     }
1630 
1631     public void setGeneralBloomFilterFaulty() {
1632       generalBloomFilter = null;
1633     }
1634 
1635     public void setDeleteFamilyBloomFilterFaulty() {
1636       this.deleteFamilyBloomFilter = null;
1637     }
1638 
1639     public Cell getLastKey() {
1640       return reader.getLastKey();
1641     }
1642 
1643     public byte[] getLastRowKey() {
1644       return reader.getLastRowKey();
1645     }
1646 
1647     public Cell midkey() throws IOException {
1648       return reader.midkey();
1649     }
1650 
1651     public long length() {
1652       return reader.length();
1653     }
1654 
1655     public long getTotalUncompressedBytes() {
1656       return reader.getTrailer().getTotalUncompressedBytes();
1657     }
1658 
1659     public long getEntries() {
1660       return reader.getEntries();
1661     }
1662 
1663     public long getDeleteFamilyCnt() {
1664       return deleteFamilyCnt;
1665     }
1666 
1667     public Cell getFirstKey() {
1668       return reader.getFirstKey();
1669     }
1670 
1671     public long indexSize() {
1672       return reader.indexSize();
1673     }
1674 
1675     public BloomType getBloomFilterType() {
1676       return this.bloomFilterType;
1677     }
1678 
1679     public long getSequenceID() {
1680       return sequenceID;
1681     }
1682 
1683     public void setSequenceID(long sequenceID) {
1684       this.sequenceID = sequenceID;
1685     }
1686 
1687     public void setBulkLoaded(boolean bulkLoadResult) {
1688       this.bulkLoadResult = bulkLoadResult;
1689     }
1690 
1691     public boolean isBulkLoaded() {
1692       return this.bulkLoadResult;
1693     }
1694 
1695     BloomFilter getGeneralBloomFilter() {
1696       return generalBloomFilter;
1697     }
1698 
1699     long getUncompressedDataIndexSize() {
1700       return reader.getTrailer().getUncompressedDataIndexSize();
1701     }
1702 
1703     public long getTotalBloomSize() {
1704       if (generalBloomFilter == null)
1705         return 0;
1706       return generalBloomFilter.getByteSize();
1707     }
1708 
1709     public int getHFileVersion() {
1710       return reader.getTrailer().getMajorVersion();
1711     }
1712 
1713     public int getHFileMinorVersion() {
1714       return reader.getTrailer().getMinorVersion();
1715     }
1716 
1717     public HFile.Reader getHFileReader() {
1718       return reader;
1719     }
1720 
1721     void disableBloomFilterForTesting() {
1722       generalBloomFilter = null;
1723       this.deleteFamilyBloomFilter = null;
1724     }
1725 
1726     public long getMaxTimestamp() {
1727       return timeRangeTracker == null ? Long.MAX_VALUE : timeRangeTracker.getMaximumTimestamp();
1728     }
1729 
1730     boolean isSkipResetSeqId() {
1731       return skipResetSeqId;
1732     }
1733 
1734     void setSkipResetSeqId(boolean skipResetSeqId) {
1735       this.skipResetSeqId = skipResetSeqId;
1736     }
1737   }
1738 
1739   /**
1740    * Useful comparators for comparing StoreFiles.
1741    */
1742   public abstract static class Comparators {
1743     /**
1744      * Comparator that compares based on the Sequence Ids of the
1745      * the StoreFiles. Bulk loads that did not request a seq ID
1746      * are given a seq id of -1; thus, they are placed before all non-
1747      * bulk loads, and bulk loads with sequence Id. Among these files,
1748      * the size is used to determine the ordering, then bulkLoadTime.
1749      * If there are ties, the path name is used as a tie-breaker.
1750      */
1751     public static final Comparator<StoreFile> SEQ_ID =
1752       Ordering.compound(ImmutableList.of(
1753           Ordering.natural().onResultOf(new GetSeqId()),
1754           Ordering.natural().onResultOf(new GetFileSize()).reverse(),
1755           Ordering.natural().onResultOf(new GetBulkTime()),
1756           Ordering.natural().onResultOf(new GetPathName())
1757       ));
1758 
1759     private static class GetSeqId implements Function<StoreFile, Long> {
1760       @Override
1761       public Long apply(StoreFile sf) {
1762         return sf.getMaxSequenceId();
1763       }
1764     }
1765 
1766     private static class GetFileSize implements Function<StoreFile, Long> {
1767       @Override
1768       public Long apply(StoreFile sf) {
1769         if (sf.getReader() != null) {
1770           return sf.getReader().length();
1771         } else {
1772           // the reader may be null for the compacted files and if the archiving
1773           // had failed.
1774           return -1L;
1775         }
1776       }
1777     }
1778 
1779     private static class GetBulkTime implements Function<StoreFile, Long> {
1780       @Override
1781       public Long apply(StoreFile sf) {
1782         if (!sf.isBulkLoadResult()) return Long.MAX_VALUE;
1783         return sf.getBulkLoadTimestamp();
1784       }
1785     }
1786 
1787     private static class GetPathName implements Function<StoreFile, String> {
1788       @Override
1789       public String apply(StoreFile sf) {
1790         return sf.getPath().getName();
1791       }
1792     }
1793   }
1794 }