View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver;
20  
21  import java.io.DataInput;
22  import java.io.IOException;
23  import java.net.InetSocketAddress;
24  import java.util.Arrays;
25  import java.util.Collection;
26  import java.util.Collections;
27  import java.util.Comparator;
28  import java.util.Map;
29  import java.util.SortedSet;
30  import java.util.UUID;
31  import java.util.concurrent.atomic.AtomicBoolean;
32  
33  import org.apache.commons.logging.Log;
34  import org.apache.commons.logging.LogFactory;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FileSystem;
37  import org.apache.hadoop.fs.Path;
38  import org.apache.hadoop.hbase.Cell;
39  import org.apache.hadoop.hbase.CellUtil;
40  import org.apache.hadoop.hbase.HConstants;
41  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
42  import org.apache.hadoop.hbase.KeyValue;
43  import org.apache.hadoop.hbase.CellComparator;
44  import org.apache.hadoop.hbase.KeyValueUtil;
45  import org.apache.hadoop.hbase.classification.InterfaceAudience;
46  import org.apache.hadoop.hbase.client.Scan;
47  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
48  import org.apache.hadoop.hbase.io.hfile.BlockType;
49  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
50  import org.apache.hadoop.hbase.io.hfile.HFile;
51  import org.apache.hadoop.hbase.io.hfile.HFileContext;
52  import org.apache.hadoop.hbase.io.hfile.HFileScanner;
53  import org.apache.hadoop.hbase.nio.ByteBuff;
54  import org.apache.hadoop.hbase.regionserver.compactions.Compactor;
55  import org.apache.hadoop.hbase.util.BloomFilter;
56  import org.apache.hadoop.hbase.util.BloomFilterFactory;
57  import org.apache.hadoop.hbase.util.BloomFilterWriter;
58  import org.apache.hadoop.hbase.util.Bytes;
59  import org.apache.hadoop.hbase.util.Writables;
60  import org.apache.hadoop.io.WritableUtils;
61  import org.apache.hadoop.hbase.io.hfile.HFileBlock;
62  
63  import com.google.common.base.Function;
64  import com.google.common.base.Preconditions;
65  import com.google.common.collect.ImmutableList;
66  import com.google.common.collect.Ordering;
67  
68  /**
69   * A Store data file.  Stores usually have one or more of these files.  They
70   * are produced by flushing the memstore to disk.  To
71   * create, instantiate a writer using {@link StoreFile.WriterBuilder}
72   * and append data. Be sure to add any metadata before calling close on the
73   * Writer (Use the appendMetadata convenience methods). On close, a StoreFile
74   * is sitting in the Filesystem.  To refer to it, create a StoreFile instance
75   * passing filesystem and path.  To read, call {@link #createReader()}.
76   * <p>StoreFiles may also reference store files in another Store.
77   *
78   * The reason for this weird pattern where you use a different instance for the
79   * writer and a reader is that we write once but read a lot more.
80   */
81  @InterfaceAudience.LimitedPrivate("Coprocessor")
82  public class StoreFile {
83    private static final Log LOG = LogFactory.getLog(StoreFile.class.getName());
84  
85    // Keys for fileinfo values in HFile
86  
87    /** Max Sequence ID in FileInfo */
88    public static final byte [] MAX_SEQ_ID_KEY = Bytes.toBytes("MAX_SEQ_ID_KEY");
89  
90    /** Major compaction flag in FileInfo */
91    public static final byte[] MAJOR_COMPACTION_KEY =
92        Bytes.toBytes("MAJOR_COMPACTION_KEY");
93  
94    /** Minor compaction flag in FileInfo */
95    public static final byte[] EXCLUDE_FROM_MINOR_COMPACTION_KEY =
96        Bytes.toBytes("EXCLUDE_FROM_MINOR_COMPACTION");
97  
98    /** Bloom filter Type in FileInfo */
99    public static final byte[] BLOOM_FILTER_TYPE_KEY =
100       Bytes.toBytes("BLOOM_FILTER_TYPE");
101 
102   /** Delete Family Count in FileInfo */
103   public static final byte[] DELETE_FAMILY_COUNT =
104       Bytes.toBytes("DELETE_FAMILY_COUNT");
105 
106   /** Last Bloom filter key in FileInfo */
107   private static final byte[] LAST_BLOOM_KEY = Bytes.toBytes("LAST_BLOOM_KEY");
108 
109   /** Key for Timerange information in metadata*/
110   public static final byte[] TIMERANGE_KEY = Bytes.toBytes("TIMERANGE");
111 
112   /** Key for timestamp of earliest-put in metadata*/
113   public static final byte[] EARLIEST_PUT_TS = Bytes.toBytes("EARLIEST_PUT_TS");
114 
115   /** Key for the number of mob cells in metadata*/
116   public static final byte[] MOB_CELLS_COUNT = Bytes.toBytes("MOB_CELLS_COUNT");
117 
118   private final StoreFileInfo fileInfo;
119   private final FileSystem fs;
120 
121   // Block cache configuration and reference.
122   private final CacheConfig cacheConf;
123 
124   // Keys for metadata stored in backing HFile.
125   // Set when we obtain a Reader.
126   private long sequenceid = -1;
127 
128   // max of the MemstoreTS in the KV's in this store
129   // Set when we obtain a Reader.
130   private long maxMemstoreTS = -1;
131 
132   public long getMaxMemstoreTS() {
133     return maxMemstoreTS;
134   }
135 
136   public void setMaxMemstoreTS(long maxMemstoreTS) {
137     this.maxMemstoreTS = maxMemstoreTS;
138   }
139 
140   // If true, this file was product of a major compaction.  Its then set
141   // whenever you get a Reader.
142   private AtomicBoolean majorCompaction = null;
143 
144   // If true, this file should not be included in minor compactions.
145   // It's set whenever you get a Reader.
146   private boolean excludeFromMinorCompaction = false;
147 
148   /** Meta key set when store file is a result of a bulk load */
149   public static final byte[] BULKLOAD_TASK_KEY =
150     Bytes.toBytes("BULKLOAD_SOURCE_TASK");
151   public static final byte[] BULKLOAD_TIME_KEY =
152     Bytes.toBytes("BULKLOAD_TIMESTAMP");
153 
154   /**
155    * Map of the metadata entries in the corresponding HFile
156    */
157   private Map<byte[], byte[]> metadataMap;
158 
159   // StoreFile.Reader
160   private volatile Reader reader;
161 
162   /**
163    * Bloom filter type specified in column family configuration. Does not
164    * necessarily correspond to the Bloom filter type present in the HFile.
165    */
166   private final BloomType cfBloomType;
167 
168   /**
169    * Key for skipping resetting sequence id in metadata.
170    * For bulk loaded hfiles, the scanner resets the cell seqId with the latest one,
171    * if this metadata is set as true, the reset is skipped.
172    */
173   public static final byte[] SKIP_RESET_SEQ_ID = Bytes.toBytes("SKIP_RESET_SEQ_ID");
174 
175   /**
176    * Constructor, loads a reader and it's indices, etc. May allocate a
177    * substantial amount of ram depending on the underlying files (10-20MB?).
178    *
179    * @param fs  The current file system to use.
180    * @param p  The path of the file.
181    * @param conf  The current configuration.
182    * @param cacheConf  The cache configuration and block cache reference.
183    * @param cfBloomType The bloom type to use for this store file as specified
184    *          by column family configuration. This may or may not be the same
185    *          as the Bloom filter type actually present in the HFile, because
186    *          column family configuration might change. If this is
187    *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
188    * @throws IOException When opening the reader fails.
189    */
190   public StoreFile(final FileSystem fs, final Path p, final Configuration conf,
191         final CacheConfig cacheConf, final BloomType cfBloomType) throws IOException {
192     this(fs, new StoreFileInfo(conf, fs, p), conf, cacheConf, cfBloomType);
193   }
194 
195 
196   /**
197    * Constructor, loads a reader and it's indices, etc. May allocate a
198    * substantial amount of ram depending on the underlying files (10-20MB?).
199    *
200    * @param fs  The current file system to use.
201    * @param fileInfo  The store file information.
202    * @param conf  The current configuration.
203    * @param cacheConf  The cache configuration and block cache reference.
204    * @param cfBloomType The bloom type to use for this store file as specified
205    *          by column family configuration. This may or may not be the same
206    *          as the Bloom filter type actually present in the HFile, because
207    *          column family configuration might change. If this is
208    *          {@link BloomType#NONE}, the existing Bloom filter is ignored.
209    * @throws IOException When opening the reader fails.
210    */
211   public StoreFile(final FileSystem fs, final StoreFileInfo fileInfo, final Configuration conf,
212       final CacheConfig cacheConf,  final BloomType cfBloomType) throws IOException {
213     this.fs = fs;
214     this.fileInfo = fileInfo;
215     this.cacheConf = cacheConf;
216 
217     if (BloomFilterFactory.isGeneralBloomEnabled(conf)) {
218       this.cfBloomType = cfBloomType;
219     } else {
220       LOG.info("Ignoring bloom filter check for file " + this.getPath() + ": " +
221           "cfBloomType=" + cfBloomType + " (disabled in config)");
222       this.cfBloomType = BloomType.NONE;
223     }
224   }
225 
226   /**
227    * Clone
228    * @param other The StoreFile to clone from
229    */
230   public StoreFile(final StoreFile other) {
231     this.fs = other.fs;
232     this.fileInfo = other.fileInfo;
233     this.cacheConf = other.cacheConf;
234     this.cfBloomType = other.cfBloomType;
235   }
236 
237   /**
238    * @return the StoreFile object associated to this StoreFile.
239    *         null if the StoreFile is not a reference.
240    */
241   public StoreFileInfo getFileInfo() {
242     return this.fileInfo;
243   }
244 
245   /**
246    * @return Path or null if this StoreFile was made with a Stream.
247    */
248   public Path getPath() {
249     return this.fileInfo.getPath();
250   }
251 
252   /**
253    * @return Returns the qualified path of this StoreFile
254    */
255   public Path getQualifiedPath() {
256     return this.fileInfo.getPath().makeQualified(fs);
257   }
258 
259   /**
260    * @return True if this is a StoreFile Reference; call
261    * after {@link #open(boolean canUseDropBehind)} else may get wrong answer.
262    */
263   public boolean isReference() {
264     return this.fileInfo.isReference();
265   }
266 
267   /**
268    * @return True if this file was made by a major compaction.
269    */
270   public boolean isMajorCompaction() {
271     if (this.majorCompaction == null) {
272       throw new NullPointerException("This has not been set yet");
273     }
274     return this.majorCompaction.get();
275   }
276 
277   /**
278    * @return True if this file should not be part of a minor compaction.
279    */
280   public boolean excludeFromMinorCompaction() {
281     return this.excludeFromMinorCompaction;
282   }
283 
284   /**
285    * @return This files maximum edit sequence id.
286    */
287   public long getMaxSequenceId() {
288     return this.sequenceid;
289   }
290 
291   public long getModificationTimeStamp() throws IOException {
292     return (fileInfo == null) ? 0 : fileInfo.getModificationTime();
293   }
294 
295   /**
296    * Only used by the Striped Compaction Policy
297    * @param key
298    * @return value associated with the metadata key
299    */
300   public byte[] getMetadataValue(byte[] key) {
301     return metadataMap.get(key);
302   }
303 
304   /**
305    * Return the largest memstoreTS found across all storefiles in
306    * the given list. Store files that were created by a mapreduce
307    * bulk load are ignored, as they do not correspond to any specific
308    * put operation, and thus do not have a memstoreTS associated with them.
309    * @return 0 if no non-bulk-load files are provided or, this is Store that
310    * does not yet have any store files.
311    */
312   public static long getMaxMemstoreTSInList(Collection<StoreFile> sfs) {
313     long max = 0;
314     for (StoreFile sf : sfs) {
315       if (!sf.isBulkLoadResult()) {
316         max = Math.max(max, sf.getMaxMemstoreTS());
317       }
318     }
319     return max;
320   }
321 
322   /**
323    * Return the highest sequence ID found across all storefiles in
324    * the given list.
325    * @param sfs
326    * @return 0 if no non-bulk-load files are provided or, this is Store that
327    * does not yet have any store files.
328    */
329   public static long getMaxSequenceIdInList(Collection<StoreFile> sfs) {
330     long max = 0;
331     for (StoreFile sf : sfs) {
332       max = Math.max(max, sf.getMaxSequenceId());
333     }
334     return max;
335   }
336 
337   /**
338    * Check if this storefile was created by bulk load.
339    * When a hfile is bulk loaded into HBase, we append
340    * '_SeqId_<id-when-loaded>' to the hfile name, unless
341    * "hbase.mapreduce.bulkload.assign.sequenceNumbers" is
342    * explicitly turned off.
343    * If "hbase.mapreduce.bulkload.assign.sequenceNumbers"
344    * is turned off, fall back to BULKLOAD_TIME_KEY.
345    * @return true if this storefile was created by bulk load.
346    */
347   boolean isBulkLoadResult() {
348     boolean bulkLoadedHFile = false;
349     String fileName = this.getPath().getName();
350     int startPos = fileName.indexOf("SeqId_");
351     if (startPos != -1) {
352       bulkLoadedHFile = true;
353     }
354     return bulkLoadedHFile || metadataMap.containsKey(BULKLOAD_TIME_KEY);
355   }
356 
357   /**
358    * Return the timestamp at which this bulk load file was generated.
359    */
360   public long getBulkLoadTimestamp() {
361     byte[] bulkLoadTimestamp = metadataMap.get(BULKLOAD_TIME_KEY);
362     return (bulkLoadTimestamp == null) ? 0 : Bytes.toLong(bulkLoadTimestamp);
363   }
364 
365   /**
366    * @return the cached value of HDFS blocks distribution. The cached value is
367    * calculated when store file is opened.
368    */
369   public HDFSBlocksDistribution getHDFSBlockDistribution() {
370     return this.fileInfo.getHDFSBlockDistribution();
371   }
372 
373   /**
374    * Opens reader on this store file.  Called by Constructor.
375    * @return Reader for the store file.
376    * @throws IOException
377    * @see #closeReader(boolean)
378    */
379   private Reader open(boolean canUseDropBehind) throws IOException {
380     if (this.reader != null) {
381       throw new IllegalAccessError("Already open");
382     }
383 
384     // Open the StoreFile.Reader
385     this.reader = fileInfo.open(this.fs, this.cacheConf, canUseDropBehind);
386 
387     // Load up indices and fileinfo. This also loads Bloom filter type.
388     metadataMap = Collections.unmodifiableMap(this.reader.loadFileInfo());
389 
390     // Read in our metadata.
391     byte [] b = metadataMap.get(MAX_SEQ_ID_KEY);
392     if (b != null) {
393       // By convention, if halfhfile, top half has a sequence number > bottom
394       // half. Thats why we add one in below. Its done for case the two halves
395       // are ever merged back together --rare.  Without it, on open of store,
396       // since store files are distinguished by sequence id, the one half would
397       // subsume the other.
398       this.sequenceid = Bytes.toLong(b);
399       if (fileInfo.isTopReference()) {
400         this.sequenceid += 1;
401       }
402     }
403 
404     if (isBulkLoadResult()){
405       // generate the sequenceId from the fileName
406       // fileName is of the form <randomName>_SeqId_<id-when-loaded>_
407       String fileName = this.getPath().getName();
408       // Use lastIndexOf() to get the last, most recent bulk load seqId.
409       int startPos = fileName.lastIndexOf("SeqId_");
410       if (startPos != -1) {
411         this.sequenceid = Long.parseLong(fileName.substring(startPos + 6,
412             fileName.indexOf('_', startPos + 6)));
413         // Handle reference files as done above.
414         if (fileInfo.isTopReference()) {
415           this.sequenceid += 1;
416         }
417       }
418       // SKIP_RESET_SEQ_ID only works in bulk loaded file.
419       // In mob compaction, the hfile where the cells contain the path of a new mob file is bulk
420       // loaded to hbase, these cells have the same seqIds with the old ones. We do not want
421       // to reset new seqIds for them since this might make a mess of the visibility of cells that
422       // have the same row key but different seqIds.
423       this.reader.setSkipResetSeqId(isSkipResetSeqId(metadataMap.get(SKIP_RESET_SEQ_ID)));
424       this.reader.setBulkLoaded(true);
425     }
426     this.reader.setSequenceID(this.sequenceid);
427 
428     b = metadataMap.get(HFile.Writer.MAX_MEMSTORE_TS_KEY);
429     if (b != null) {
430       this.maxMemstoreTS = Bytes.toLong(b);
431     }
432 
433     b = metadataMap.get(MAJOR_COMPACTION_KEY);
434     if (b != null) {
435       boolean mc = Bytes.toBoolean(b);
436       if (this.majorCompaction == null) {
437         this.majorCompaction = new AtomicBoolean(mc);
438       } else {
439         this.majorCompaction.set(mc);
440       }
441     } else {
442       // Presume it is not major compacted if it doesn't explicity say so
443       // HFileOutputFormat explicitly sets the major compacted key.
444       this.majorCompaction = new AtomicBoolean(false);
445     }
446 
447     b = metadataMap.get(EXCLUDE_FROM_MINOR_COMPACTION_KEY);
448     this.excludeFromMinorCompaction = (b != null && Bytes.toBoolean(b));
449 
450     BloomType hfileBloomType = reader.getBloomFilterType();
451     if (cfBloomType != BloomType.NONE) {
452       reader.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
453       if (hfileBloomType != cfBloomType) {
454         LOG.info("HFile Bloom filter type for "
455             + reader.getHFileReader().getName() + ": " + hfileBloomType
456             + ", but " + cfBloomType + " specified in column family "
457             + "configuration");
458       }
459     } else if (hfileBloomType != BloomType.NONE) {
460       LOG.info("Bloom filter turned off by CF config for "
461           + reader.getHFileReader().getName());
462     }
463 
464     // load delete family bloom filter
465     reader.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
466 
467     try {
468       byte [] timerangeBytes = metadataMap.get(TIMERANGE_KEY);
469       if (timerangeBytes != null) {
470         this.reader.timeRangeTracker = new TimeRangeTracker();
471         Writables.copyWritable(timerangeBytes, this.reader.timeRangeTracker);
472       }
473     } catch (IllegalArgumentException e) {
474       LOG.error("Error reading timestamp range data from meta -- " +
475           "proceeding without", e);
476       this.reader.timeRangeTracker = null;
477     }
478     return this.reader;
479   }
480 
481   public Reader createReader() throws IOException {
482     return createReader(false);
483   }
484 
485   /**
486    * @return Reader for StoreFile. creates if necessary
487    * @throws IOException
488    */
489   public Reader createReader(boolean canUseDropBehind) throws IOException {
490     if (this.reader == null) {
491       try {
492         this.reader = open(canUseDropBehind);
493       } catch (IOException e) {
494         try {
495           this.closeReader(true);
496         } catch (IOException ee) {
497         }
498         throw e;
499       }
500 
501     }
502     return this.reader;
503   }
504 
505   /**
506    * @return Current reader.  Must call createReader first else returns null.
507    * @see #createReader()
508    */
509   public Reader getReader() {
510     return this.reader;
511   }
512 
513   /**
514    * @param evictOnClose whether to evict blocks belonging to this file
515    * @throws IOException
516    */
517   public synchronized void closeReader(boolean evictOnClose)
518       throws IOException {
519     if (this.reader != null) {
520       this.reader.close(evictOnClose);
521       this.reader = null;
522     }
523   }
524 
525   /**
526    * Delete this file
527    * @throws IOException
528    */
529   public void deleteReader() throws IOException {
530     closeReader(true);
531     this.fs.delete(getPath(), true);
532   }
533 
534   @Override
535   public String toString() {
536     return this.fileInfo.toString();
537   }
538 
539   /**
540    * @return a length description of this StoreFile, suitable for debug output
541    */
542   public String toStringDetailed() {
543     StringBuilder sb = new StringBuilder();
544     sb.append(this.getPath().toString());
545     sb.append(", isReference=").append(isReference());
546     sb.append(", isBulkLoadResult=").append(isBulkLoadResult());
547     if (isBulkLoadResult()) {
548       sb.append(", bulkLoadTS=").append(getBulkLoadTimestamp());
549     } else {
550       sb.append(", seqid=").append(getMaxSequenceId());
551     }
552     sb.append(", majorCompaction=").append(isMajorCompaction());
553 
554     return sb.toString();
555   }
556 
557   /**
558    * Gets whether to skip resetting the sequence id for cells.
559    * @param skipResetSeqId The byte array of boolean.
560    * @return Whether to skip resetting the sequence id.
561    */
562   private boolean isSkipResetSeqId(byte[] skipResetSeqId) {
563     if (skipResetSeqId != null && skipResetSeqId.length == 1) {
564       return Bytes.toBoolean(skipResetSeqId);
565     }
566     return false;
567   }
568 
569   public static class WriterBuilder {
570     private final Configuration conf;
571     private final CacheConfig cacheConf;
572     private final FileSystem fs;
573 
574     private CellComparator comparator = CellComparator.COMPARATOR;
575     private BloomType bloomType = BloomType.NONE;
576     private long maxKeyCount = 0;
577     private Path dir;
578     private Path filePath;
579     private InetSocketAddress[] favoredNodes;
580     private HFileContext fileContext;
581     private boolean shouldDropCacheBehind = false;
582 
583     public WriterBuilder(Configuration conf, CacheConfig cacheConf,
584         FileSystem fs) {
585       this.conf = conf;
586       this.cacheConf = cacheConf;
587       this.fs = fs;
588     }
589 
590     /**
591      * Use either this method or {@link #withFilePath}, but not both.
592      * @param dir Path to column family directory. The directory is created if
593      *          does not exist. The file is given a unique name within this
594      *          directory.
595      * @return this (for chained invocation)
596      */
597     public WriterBuilder withOutputDir(Path dir) {
598       Preconditions.checkNotNull(dir);
599       this.dir = dir;
600       return this;
601     }
602 
603     /**
604      * Use either this method or {@link #withOutputDir}, but not both.
605      * @param filePath the StoreFile path to write
606      * @return this (for chained invocation)
607      */
608     public WriterBuilder withFilePath(Path filePath) {
609       Preconditions.checkNotNull(filePath);
610       this.filePath = filePath;
611       return this;
612     }
613 
614     /**
615      * @param favoredNodes an array of favored nodes or possibly null
616      * @return this (for chained invocation)
617      */
618     public WriterBuilder withFavoredNodes(InetSocketAddress[] favoredNodes) {
619       this.favoredNodes = favoredNodes;
620       return this;
621     }
622 
623     public WriterBuilder withComparator(CellComparator comparator) {
624       Preconditions.checkNotNull(comparator);
625       this.comparator = comparator;
626       return this;
627     }
628 
629     public WriterBuilder withBloomType(BloomType bloomType) {
630       Preconditions.checkNotNull(bloomType);
631       this.bloomType = bloomType;
632       return this;
633     }
634 
635     /**
636      * @param maxKeyCount estimated maximum number of keys we expect to add
637      * @return this (for chained invocation)
638      */
639     public WriterBuilder withMaxKeyCount(long maxKeyCount) {
640       this.maxKeyCount = maxKeyCount;
641       return this;
642     }
643 
644     public WriterBuilder withFileContext(HFileContext fileContext) {
645       this.fileContext = fileContext;
646       return this;
647     }
648 
649     public WriterBuilder withShouldDropCacheBehind(boolean shouldDropCacheBehind) {
650       this.shouldDropCacheBehind = shouldDropCacheBehind;
651       return this;
652     }
653     /**
654      * Create a store file writer. Client is responsible for closing file when
655      * done. If metadata, add BEFORE closing using
656      * {@link Writer#appendMetadata}.
657      */
658     public Writer build() throws IOException {
659       if ((dir == null ? 0 : 1) + (filePath == null ? 0 : 1) != 1) {
660         throw new IllegalArgumentException("Either specify parent directory " +
661             "or file path");
662       }
663 
664       if (dir == null) {
665         dir = filePath.getParent();
666       }
667 
668       if (!fs.exists(dir)) {
669         fs.mkdirs(dir);
670       }
671 
672       if (filePath == null) {
673         filePath = getUniqueFile(fs, dir);
674         if (!BloomFilterFactory.isGeneralBloomEnabled(conf)) {
675           bloomType = BloomType.NONE;
676         }
677       }
678 
679       if (comparator == null) {
680         comparator = CellComparator.COMPARATOR;
681       }
682       return new Writer(fs, filePath,
683           conf, cacheConf, comparator, bloomType, maxKeyCount, favoredNodes, fileContext);
684     }
685   }
686 
687   /**
688    * @param fs
689    * @param dir Directory to create file in.
690    * @return random filename inside passed <code>dir</code>
691    */
692   public static Path getUniqueFile(final FileSystem fs, final Path dir)
693       throws IOException {
694     if (!fs.getFileStatus(dir).isDirectory()) {
695       throw new IOException("Expecting " + dir.toString() +
696         " to be a directory");
697     }
698     return new Path(dir, UUID.randomUUID().toString().replaceAll("-", ""));
699   }
700 
701   public Long getMinimumTimestamp() {
702     return (getReader().timeRangeTracker == null) ?
703         null :
704         getReader().timeRangeTracker.getMinimumTimestamp();
705   }
706 
707   /**
708    * Gets the approximate mid-point of this file that is optimal for use in splitting it.
709    * @param comparator Comparator used to compare KVs.
710    * @return The split point row, or null if splitting is not possible, or reader is null.
711    */
712   @SuppressWarnings("deprecation")
713   byte[] getFileSplitPoint(CellComparator comparator) throws IOException {
714     if (this.reader == null) {
715       LOG.warn("Storefile " + this + " Reader is null; cannot get split point");
716       return null;
717     }
718     // Get first, last, and mid keys.  Midkey is the key that starts block
719     // in middle of hfile.  Has column and timestamp.  Need to return just
720     // the row we want to split on as midkey.
721     Cell midkey = this.reader.midkey();
722     if (midkey != null) {
723       Cell firstKey = this.reader.getFirstKey();
724       Cell lastKey = this.reader.getLastKey();
725       // if the midkey is the same as the first or last keys, we cannot (ever) split this region.
726       if (comparator.compareRows(midkey, firstKey) == 0
727           || comparator.compareRows(midkey, lastKey) == 0) {
728         if (LOG.isDebugEnabled()) {
729           LOG.debug("cannot split because midkey is the same as first or last row");
730         }
731         return null;
732       }
733       return CellUtil.cloneRow(midkey);
734     }
735     return null;
736   }
737 
738   /**
739    * A StoreFile writer.  Use this to read/write HBase Store Files. It is package
740    * local because it is an implementation detail of the HBase regionserver.
741    */
742   public static class Writer implements Compactor.CellSink {
743     private final BloomFilterWriter generalBloomFilterWriter;
744     private final BloomFilterWriter deleteFamilyBloomFilterWriter;
745     private final BloomType bloomType;
746     private byte[] lastBloomKey;
747     private int lastBloomKeyOffset, lastBloomKeyLen;
748     private Cell lastCell = null;
749     private long earliestPutTs = HConstants.LATEST_TIMESTAMP;
750     private Cell lastDeleteFamilyCell = null;
751     private long deleteFamilyCnt = 0;
752 
753     /** Bytes per Checksum */
754     protected int bytesPerChecksum;
755 
756     TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
757     /* isTimeRangeTrackerSet keeps track if the timeRange has already been set
758      * When flushing a memstore, we set TimeRange and use this variable to
759      * indicate that it doesn't need to be calculated again while
760      * appending KeyValues.
761      * It is not set in cases of compactions when it is recalculated using only
762      * the appended KeyValues*/
763     boolean isTimeRangeTrackerSet = false;
764 
765     protected HFile.Writer writer;
766     private KeyValue.KeyOnlyKeyValue lastBloomKeyOnlyKV = null;
767 
768     /**
769      * Creates an HFile.Writer that also write helpful meta data.
770      * @param fs file system to write to
771      * @param path file name to create
772      * @param conf user configuration
773      * @param comparator key comparator
774      * @param bloomType bloom filter setting
775      * @param maxKeys the expected maximum number of keys to be added. Was used
776      *        for Bloom filter size in {@link HFile} format version 1.
777      * @param favoredNodes
778      * @param fileContext - The HFile context
779      * @throws IOException problem writing to FS
780      */
781     private Writer(FileSystem fs, Path path,
782         final Configuration conf,
783         CacheConfig cacheConf,
784         final CellComparator comparator, BloomType bloomType, long maxKeys,
785         InetSocketAddress[] favoredNodes, HFileContext fileContext)
786             throws IOException {
787       writer = HFile.getWriterFactory(conf, cacheConf)
788           .withPath(fs, path)
789           .withComparator(comparator)
790           .withFavoredNodes(favoredNodes)
791           .withFileContext(fileContext)
792           .create();
793 
794       generalBloomFilterWriter = BloomFilterFactory.createGeneralBloomAtWrite(
795           conf, cacheConf, bloomType,
796           (int) Math.min(maxKeys, Integer.MAX_VALUE), writer);
797 
798       if (generalBloomFilterWriter != null) {
799         this.bloomType = bloomType;
800         if(this.bloomType ==  BloomType.ROWCOL) {
801           lastBloomKeyOnlyKV = new KeyValue.KeyOnlyKeyValue();
802         }
803         if (LOG.isTraceEnabled()) LOG.trace("Bloom filter type for " + path + ": " +
804           this.bloomType + ", " + generalBloomFilterWriter.getClass().getSimpleName());
805       } else {
806         // Not using Bloom filters.
807         this.bloomType = BloomType.NONE;
808       }
809 
810       // initialize delete family Bloom filter when there is NO RowCol Bloom
811       // filter
812       if (this.bloomType != BloomType.ROWCOL) {
813         this.deleteFamilyBloomFilterWriter = BloomFilterFactory
814             .createDeleteBloomAtWrite(conf, cacheConf,
815                 (int) Math.min(maxKeys, Integer.MAX_VALUE), writer);
816       } else {
817         deleteFamilyBloomFilterWriter = null;
818       }
819       if (deleteFamilyBloomFilterWriter != null) {
820         if (LOG.isTraceEnabled()) LOG.trace("Delete Family Bloom filter type for " + path + ": "
821             + deleteFamilyBloomFilterWriter.getClass().getSimpleName());
822       }
823     }
824 
825     /**
826      * Writes meta data.
827      * Call before {@link #close()} since its written as meta data to this file.
828      * @param maxSequenceId Maximum sequence id.
829      * @param majorCompaction True if this file is product of a major compaction
830      * @throws IOException problem writing to FS
831      */
832     public void appendMetadata(final long maxSequenceId, final boolean majorCompaction)
833     throws IOException {
834       writer.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId));
835       writer.appendFileInfo(MAJOR_COMPACTION_KEY,
836           Bytes.toBytes(majorCompaction));
837       appendTrackedTimestampsToMetadata();
838     }
839 
840     /**
841      * Writes meta data.
842      * Call before {@link #close()} since its written as meta data to this file.
843      * @param maxSequenceId Maximum sequence id.
844      * @param majorCompaction True if this file is product of a major compaction
845      * @param mobCellsCount The number of mob cells.
846      * @throws IOException problem writing to FS
847      */
848     public void appendMetadata(final long maxSequenceId, final boolean majorCompaction,
849         final long mobCellsCount) throws IOException {
850       writer.appendFileInfo(MAX_SEQ_ID_KEY, Bytes.toBytes(maxSequenceId));
851       writer.appendFileInfo(MAJOR_COMPACTION_KEY, Bytes.toBytes(majorCompaction));
852       writer.appendFileInfo(MOB_CELLS_COUNT, Bytes.toBytes(mobCellsCount));
853       appendTrackedTimestampsToMetadata();
854     }
855 
856     /**
857      * Add TimestampRange and earliest put timestamp to Metadata
858      */
859     public void appendTrackedTimestampsToMetadata() throws IOException {
860       appendFileInfo(TIMERANGE_KEY,WritableUtils.toByteArray(timeRangeTracker));
861       appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs));
862     }
863 
864     /**
865      * Set TimeRangeTracker
866      * @param trt
867      */
868     public void setTimeRangeTracker(final TimeRangeTracker trt) {
869       this.timeRangeTracker = trt;
870       isTimeRangeTrackerSet = true;
871     }
872 
873     /**
874      * Record the earlest Put timestamp.
875      *
876      * If the timeRangeTracker is not set,
877      * update TimeRangeTracker to include the timestamp of this key
878      * @param cell
879      */
880     public void trackTimestamps(final Cell cell) {
881       if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) {
882         earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp());
883       }
884       if (!isTimeRangeTrackerSet) {
885         timeRangeTracker.includeTimestamp(cell);
886       }
887     }
888 
889     private void appendGeneralBloomfilter(final Cell cell) throws IOException {
890       if (this.generalBloomFilterWriter != null) {
891         // only add to the bloom filter on a new, unique key
892         boolean newKey = true;
893         if (this.lastCell != null) {
894           switch(bloomType) {
895           case ROW:
896             newKey = ! CellUtil.matchingRows(cell, lastCell);
897             break;
898           case ROWCOL:
899             newKey = ! CellUtil.matchingRowColumn(cell, lastCell);
900             break;
901           case NONE:
902             newKey = false;
903             break;
904           default:
905             throw new IOException("Invalid Bloom filter type: " + bloomType +
906                 " (ROW or ROWCOL expected)");
907           }
908         }
909         if (newKey) {
910           /*
911            * http://2.bp.blogspot.com/_Cib_A77V54U/StZMrzaKufI/AAAAAAAAADo/ZhK7bGoJdMQ/s400/KeyValue.png
912            * Key = RowLen + Row + FamilyLen + Column [Family + Qualifier] + TimeStamp
913            *
914            * 2 Types of Filtering:
915            *  1. Row = Row
916            *  2. RowCol = Row + Qualifier
917            */
918           byte[] bloomKey = null;
919           // Used with ROW_COL bloom
920           KeyValue bloomKeyKV = null;
921           int bloomKeyOffset, bloomKeyLen;
922 
923           switch (bloomType) {
924           case ROW:
925             bloomKey = cell.getRowArray();
926             bloomKeyOffset = cell.getRowOffset();
927             bloomKeyLen = cell.getRowLength();
928             break;
929           case ROWCOL:
930             // merge(row, qualifier)
931             // TODO: could save one buffer copy in case of compound Bloom
932             // filters when this involves creating a KeyValue
933             // TODO : Handle while writes also
934             bloomKeyKV = KeyValueUtil.createFirstOnRow(cell.getRowArray(), cell.getRowOffset(),
935                 cell.getRowLength(), 
936                 HConstants.EMPTY_BYTE_ARRAY, 0, 0, cell.getQualifierArray(),
937                 cell.getQualifierOffset(),
938                 cell.getQualifierLength());
939             bloomKey = bloomKeyKV.getBuffer();
940             bloomKeyOffset = bloomKeyKV.getKeyOffset();
941             bloomKeyLen = bloomKeyKV.getKeyLength();
942             break;
943           default:
944             throw new IOException("Invalid Bloom filter type: " + bloomType +
945                 " (ROW or ROWCOL expected)");
946           }
947           generalBloomFilterWriter.add(bloomKey, bloomKeyOffset, bloomKeyLen);
948           if (lastBloomKey != null) {
949             int res = 0;
950             // hbase:meta does not have blooms. So we need not have special interpretation
951             // of the hbase:meta cells.  We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom
952             if (bloomType == BloomType.ROW) {
953               res = Bytes.BYTES_RAWCOMPARATOR.compare(bloomKey, bloomKeyOffset, bloomKeyLen,
954                   lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen);
955             } else {
956               // TODO : Caching of kv components becomes important in these cases
957               res = CellComparator.COMPARATOR.compare(bloomKeyKV, lastBloomKeyOnlyKV);
958             }
959             if (res <= 0) {
960               throw new IOException("Non-increasing Bloom keys: "
961                   + Bytes.toStringBinary(bloomKey, bloomKeyOffset, bloomKeyLen) + " after "
962                   + Bytes.toStringBinary(lastBloomKey, lastBloomKeyOffset, lastBloomKeyLen));
963             }
964           }
965           lastBloomKey = bloomKey;
966           lastBloomKeyOffset = bloomKeyOffset;
967           lastBloomKeyLen = bloomKeyLen;
968           if (bloomType == BloomType.ROWCOL) {
969             lastBloomKeyOnlyKV.setKey(bloomKey, bloomKeyOffset, bloomKeyLen);
970           }
971           this.lastCell = cell;
972         }
973       }
974     }
975 
976     private void appendDeleteFamilyBloomFilter(final Cell cell)
977         throws IOException {
978       if (!CellUtil.isDeleteFamily(cell) && !CellUtil.isDeleteFamilyVersion(cell)) {
979         return;
980       }
981 
982       // increase the number of delete family in the store file
983       deleteFamilyCnt++;
984       if (null != this.deleteFamilyBloomFilterWriter) {
985         boolean newKey = true;
986         if (lastDeleteFamilyCell != null) {
987           // hbase:meta does not have blooms. So we need not have special interpretation
988           // of the hbase:meta cells
989           newKey = !CellUtil.matchingRows(cell, lastDeleteFamilyCell);
990         }
991         if (newKey) {
992           this.deleteFamilyBloomFilterWriter.add(cell.getRowArray(),
993               cell.getRowOffset(), cell.getRowLength());
994           this.lastDeleteFamilyCell = cell;
995         }
996       }
997     }
998 
999     public void append(final Cell cell) throws IOException {
1000       appendGeneralBloomfilter(cell);
1001       appendDeleteFamilyBloomFilter(cell);
1002       writer.append(cell);
1003       trackTimestamps(cell);
1004     }
1005 
1006     public Path getPath() {
1007       return this.writer.getPath();
1008     }
1009 
1010     public boolean hasGeneralBloom() {
1011       return this.generalBloomFilterWriter != null;
1012     }
1013 
1014     /**
1015      * For unit testing only.
1016      *
1017      * @return the Bloom filter used by this writer.
1018      */
1019     BloomFilterWriter getGeneralBloomWriter() {
1020       return generalBloomFilterWriter;
1021     }
1022 
1023     private boolean closeBloomFilter(BloomFilterWriter bfw) throws IOException {
1024       boolean haveBloom = (bfw != null && bfw.getKeyCount() > 0);
1025       if (haveBloom) {
1026         bfw.compactBloom();
1027       }
1028       return haveBloom;
1029     }
1030 
1031     private boolean closeGeneralBloomFilter() throws IOException {
1032       boolean hasGeneralBloom = closeBloomFilter(generalBloomFilterWriter);
1033 
1034       // add the general Bloom filter writer and append file info
1035       if (hasGeneralBloom) {
1036         writer.addGeneralBloomFilter(generalBloomFilterWriter);
1037         writer.appendFileInfo(BLOOM_FILTER_TYPE_KEY,
1038             Bytes.toBytes(bloomType.toString()));
1039         if (lastBloomKey != null) {
1040           writer.appendFileInfo(LAST_BLOOM_KEY, Arrays.copyOfRange(
1041               lastBloomKey, lastBloomKeyOffset, lastBloomKeyOffset
1042                   + lastBloomKeyLen));
1043         }
1044       }
1045       return hasGeneralBloom;
1046     }
1047 
1048     private boolean closeDeleteFamilyBloomFilter() throws IOException {
1049       boolean hasDeleteFamilyBloom = closeBloomFilter(deleteFamilyBloomFilterWriter);
1050 
1051       // add the delete family Bloom filter writer
1052       if (hasDeleteFamilyBloom) {
1053         writer.addDeleteFamilyBloomFilter(deleteFamilyBloomFilterWriter);
1054       }
1055 
1056       // append file info about the number of delete family kvs
1057       // even if there is no delete family Bloom.
1058       writer.appendFileInfo(DELETE_FAMILY_COUNT,
1059           Bytes.toBytes(this.deleteFamilyCnt));
1060 
1061       return hasDeleteFamilyBloom;
1062     }
1063 
1064     public void close() throws IOException {
1065       boolean hasGeneralBloom = this.closeGeneralBloomFilter();
1066       boolean hasDeleteFamilyBloom = this.closeDeleteFamilyBloomFilter();
1067 
1068       writer.close();
1069 
1070       // Log final Bloom filter statistics. This needs to be done after close()
1071       // because compound Bloom filters might be finalized as part of closing.
1072       if (StoreFile.LOG.isTraceEnabled()) {
1073         StoreFile.LOG.trace((hasGeneralBloom ? "" : "NO ") + "General Bloom and " +
1074           (hasDeleteFamilyBloom ? "" : "NO ") + "DeleteFamily" + " was added to HFile " +
1075           getPath());
1076       }
1077 
1078     }
1079 
1080     public void appendFileInfo(byte[] key, byte[] value) throws IOException {
1081       writer.appendFileInfo(key, value);
1082     }
1083 
1084     /** For use in testing, e.g. {@link org.apache.hadoop.hbase.regionserver.CreateRandomStoreFile}
1085      */
1086     HFile.Writer getHFileWriter() {
1087       return writer;
1088     }
1089   }
1090 
1091   /**
1092    * Reader for a StoreFile.
1093    */
1094   public static class Reader {
1095     private static final Log LOG = LogFactory.getLog(Reader.class.getName());
1096 
1097     protected BloomFilter generalBloomFilter = null;
1098     protected BloomFilter deleteFamilyBloomFilter = null;
1099     protected BloomType bloomFilterType;
1100     private final HFile.Reader reader;
1101     protected TimeRangeTracker timeRangeTracker = null;
1102     protected long sequenceID = -1;
1103     private byte[] lastBloomKey;
1104     private long deleteFamilyCnt = -1;
1105     private boolean bulkLoadResult = false;
1106     private KeyValue.KeyOnlyKeyValue lastBloomKeyOnlyKV = null;
1107     private boolean skipResetSeqId = true;
1108 
1109     public Reader(FileSystem fs, Path path, CacheConfig cacheConf, Configuration conf)
1110         throws IOException {
1111       reader = HFile.createReader(fs, path, cacheConf, conf);
1112       bloomFilterType = BloomType.NONE;
1113     }
1114 
1115     public Reader(FileSystem fs, Path path, FSDataInputStreamWrapper in, long size,
1116         CacheConfig cacheConf, Configuration conf) throws IOException {
1117       reader = HFile.createReader(fs, path, in, size, cacheConf, conf);
1118       bloomFilterType = BloomType.NONE;
1119     }
1120 
1121     /**
1122      * ONLY USE DEFAULT CONSTRUCTOR FOR UNIT TESTS
1123      */
1124     Reader() {
1125       this.reader = null;
1126     }
1127 
1128     public CellComparator getComparator() {
1129       return reader.getComparator();
1130     }
1131 
1132     /**
1133      * Get a scanner to scan over this StoreFile. Do not use
1134      * this overload if using this scanner for compactions.
1135      *
1136      * @param cacheBlocks should this scanner cache blocks?
1137      * @param pread use pread (for highly concurrent small readers)
1138      * @return a scanner
1139      */
1140     public StoreFileScanner getStoreFileScanner(boolean cacheBlocks,
1141                                                boolean pread) {
1142       return getStoreFileScanner(cacheBlocks, pread, false,
1143         // 0 is passed as readpoint because this method is only used by test
1144         // where StoreFile is directly operated upon
1145         0);
1146     }
1147 
1148     /**
1149      * Get a scanner to scan over this StoreFile.
1150      *
1151      * @param cacheBlocks should this scanner cache blocks?
1152      * @param pread use pread (for highly concurrent small readers)
1153      * @param isCompaction is scanner being used for compaction?
1154      * @return a scanner
1155      */
1156     public StoreFileScanner getStoreFileScanner(boolean cacheBlocks,
1157                                                boolean pread,
1158                                                boolean isCompaction, long readPt) {
1159       return new StoreFileScanner(this,
1160                                  getScanner(cacheBlocks, pread, isCompaction),
1161                                  !isCompaction, reader.hasMVCCInfo(), readPt);
1162     }
1163 
1164     /**
1165      * Warning: Do not write further code which depends on this call. Instead
1166      * use getStoreFileScanner() which uses the StoreFileScanner class/interface
1167      * which is the preferred way to scan a store with higher level concepts.
1168      *
1169      * @param cacheBlocks should we cache the blocks?
1170      * @param pread use pread (for concurrent small readers)
1171      * @return the underlying HFileScanner
1172      */
1173     @Deprecated
1174     public HFileScanner getScanner(boolean cacheBlocks, boolean pread) {
1175       return getScanner(cacheBlocks, pread, false);
1176     }
1177 
1178     /**
1179      * Warning: Do not write further code which depends on this call. Instead
1180      * use getStoreFileScanner() which uses the StoreFileScanner class/interface
1181      * which is the preferred way to scan a store with higher level concepts.
1182      *
1183      * @param cacheBlocks
1184      *          should we cache the blocks?
1185      * @param pread
1186      *          use pread (for concurrent small readers)
1187      * @param isCompaction
1188      *          is scanner being used for compaction?
1189      * @return the underlying HFileScanner
1190      */
1191     @Deprecated
1192     public HFileScanner getScanner(boolean cacheBlocks, boolean pread,
1193         boolean isCompaction) {
1194       return reader.getScanner(cacheBlocks, pread, isCompaction);
1195     }
1196 
1197     public void close(boolean evictOnClose) throws IOException {
1198       reader.close(evictOnClose);
1199     }
1200 
1201     /**
1202      * Check if this storeFile may contain keys within the TimeRange that
1203      * have not expired (i.e. not older than oldestUnexpiredTS).
1204      * @param scan the current scan
1205      * @param oldestUnexpiredTS the oldest timestamp that is not expired, as
1206      *          determined by the column family's TTL
1207      * @return false if queried keys definitely don't exist in this StoreFile
1208      */
1209     boolean passesTimerangeFilter(Scan scan, long oldestUnexpiredTS) {
1210       if (timeRangeTracker == null) {
1211         return true;
1212       } else {
1213         return timeRangeTracker.includesTimeRange(scan.getTimeRange()) &&
1214             timeRangeTracker.getMaximumTimestamp() >= oldestUnexpiredTS;
1215       }
1216     }
1217 
1218     /**
1219      * Checks whether the given scan passes the Bloom filter (if present). Only
1220      * checks Bloom filters for single-row or single-row-column scans. Bloom
1221      * filter checking for multi-gets is implemented as part of the store
1222      * scanner system (see {@link StoreFileScanner#seekExactly}) and uses
1223      * the lower-level API {@link #passesGeneralRowBloomFilter(byte[], int, int)}
1224      * and {@link #passesGeneralRowColBloomFilter(Cell)}.
1225      *
1226      * @param scan the scan specification. Used to determine the row, and to
1227      *          check whether this is a single-row ("get") scan.
1228      * @param columns the set of columns. Only used for row-column Bloom
1229      *          filters.
1230      * @return true if the scan with the given column set passes the Bloom
1231      *         filter, or if the Bloom filter is not applicable for the scan.
1232      *         False if the Bloom filter is applicable and the scan fails it.
1233      */
1234      boolean passesBloomFilter(Scan scan,
1235         final SortedSet<byte[]> columns) {
1236       // Multi-column non-get scans will use Bloom filters through the
1237       // lower-level API function that this function calls.
1238       if (!scan.isGetScan()) {
1239         return true;
1240       }
1241 
1242       byte[] row = scan.getStartRow();
1243       switch (this.bloomFilterType) {
1244         case ROW:
1245           return passesGeneralRowBloomFilter(row, 0, row.length);
1246 
1247         case ROWCOL:
1248           if (columns != null && columns.size() == 1) {
1249             byte[] column = columns.first();
1250             // create the required fake key
1251             Cell kvKey = KeyValueUtil.createFirstOnRow(row, 0, row.length,
1252               HConstants.EMPTY_BYTE_ARRAY, 0, 0, column, 0,
1253               column.length);
1254             return passesGeneralRowColBloomFilter(kvKey);
1255           }
1256 
1257           // For multi-column queries the Bloom filter is checked from the
1258           // seekExact operation.
1259           return true;
1260 
1261         default:
1262           return true;
1263       }
1264     }
1265 
1266     public boolean passesDeleteFamilyBloomFilter(byte[] row, int rowOffset,
1267         int rowLen) {
1268       // Cache Bloom filter as a local variable in case it is set to null by
1269       // another thread on an IO error.
1270       BloomFilter bloomFilter = this.deleteFamilyBloomFilter;
1271 
1272       // Empty file or there is no delete family at all
1273       if (reader.getTrailer().getEntryCount() == 0 || deleteFamilyCnt == 0) {
1274         return false;
1275       }
1276 
1277       if (bloomFilter == null) {
1278         return true;
1279       }
1280 
1281       try {
1282         if (!bloomFilter.supportsAutoLoading()) {
1283           return true;
1284         }
1285         return bloomFilter.contains(row, rowOffset, rowLen, null);
1286       } catch (IllegalArgumentException e) {
1287         LOG.error("Bad Delete Family bloom filter data -- proceeding without",
1288             e);
1289         setDeleteFamilyBloomFilterFaulty();
1290       }
1291 
1292       return true;
1293     }
1294 
1295     /**
1296      * A method for checking Bloom filters. Called directly from
1297      * StoreFileScanner in case of a multi-column query.
1298      *
1299      * @param row
1300      * @param rowOffset
1301      * @param rowLen
1302      * @return True if passes
1303      */
1304     public boolean passesGeneralRowBloomFilter(byte[] row, int rowOffset, int rowLen) {
1305       BloomFilter bloomFilter = this.generalBloomFilter;
1306       if (bloomFilter == null) {
1307         return true;
1308       }
1309 
1310       // Used in ROW bloom
1311       byte[] key = null;
1312       if (rowOffset != 0 || rowLen != row.length) {
1313         throw new AssertionError(
1314             "For row-only Bloom filters the row " + "must occupy the whole array");
1315       }
1316       key = row;
1317       return checkGeneralBloomFilter(key, null, bloomFilter);
1318     }
1319 
1320     /**
1321      * A method for checking Bloom filters. Called directly from
1322      * StoreFileScanner in case of a multi-column query.
1323      *
1324      * @param cell
1325      *          the cell to check if present in BloomFilter
1326      * @return True if passes
1327      */
1328     public boolean passesGeneralRowColBloomFilter(Cell cell) {
1329       BloomFilter bloomFilter = this.generalBloomFilter;
1330       if (bloomFilter == null) {
1331         return true;
1332       }
1333       // Used in ROW_COL bloom
1334       Cell kvKey = null;
1335       // Already if the incoming key is a fake rowcol key then use it as it is
1336       if (cell.getTypeByte() == KeyValue.Type.Maximum.getCode() && cell.getFamilyLength() == 0) {
1337         kvKey = cell;
1338       } else {
1339         kvKey = CellUtil.createFirstOnRowCol(cell);
1340       }
1341       return checkGeneralBloomFilter(null, kvKey, bloomFilter);
1342     }
1343 
1344     private boolean checkGeneralBloomFilter(byte[] key, Cell kvKey, BloomFilter bloomFilter) {
1345       // Empty file
1346       if (reader.getTrailer().getEntryCount() == 0)
1347         return false;
1348       HFileBlock bloomBlock = null;
1349       try {
1350         boolean shouldCheckBloom;
1351         ByteBuff bloom;
1352         if (bloomFilter.supportsAutoLoading()) {
1353           bloom = null;
1354           shouldCheckBloom = true;
1355         } else {
1356           bloomBlock = reader.getMetaBlock(HFile.BLOOM_FILTER_DATA_KEY, true);
1357           bloom = bloomBlock.getBufferWithoutHeader();
1358           shouldCheckBloom = bloom != null;
1359         }
1360 
1361         if (shouldCheckBloom) {
1362           boolean exists;
1363 
1364           // Whether the primary Bloom key is greater than the last Bloom key
1365           // from the file info. For row-column Bloom filters this is not yet
1366           // a sufficient condition to return false.
1367           boolean keyIsAfterLast = (lastBloomKey != null);
1368           // hbase:meta does not have blooms. So we need not have special interpretation
1369           // of the hbase:meta cells.  We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom
1370           if (keyIsAfterLast) {
1371             if (bloomFilterType == BloomType.ROW) {
1372               keyIsAfterLast = (Bytes.BYTES_RAWCOMPARATOR.compare(key, lastBloomKey) > 0);
1373             } else {
1374               keyIsAfterLast = (CellComparator.COMPARATOR.compare(kvKey, lastBloomKeyOnlyKV)) > 0;
1375             }
1376           }
1377 
1378           if (bloomFilterType == BloomType.ROWCOL) {
1379             // Since a Row Delete is essentially a DeleteFamily applied to all
1380             // columns, a file might be skipped if using row+col Bloom filter.
1381             // In order to ensure this file is included an additional check is
1382             // required looking only for a row bloom.
1383             Cell rowBloomKey = CellUtil.createFirstOnRow(kvKey);
1384             // hbase:meta does not have blooms. So we need not have special interpretation
1385             // of the hbase:meta cells.  We can safely use Bytes.BYTES_RAWCOMPARATOR for ROW Bloom
1386             if (keyIsAfterLast
1387                 && (CellComparator.COMPARATOR.compare(rowBloomKey, lastBloomKeyOnlyKV)) > 0) {
1388               exists = false;
1389             } else {
1390               exists =
1391                   bloomFilter.contains(kvKey, bloom) ||
1392                   bloomFilter.contains(rowBloomKey, bloom);
1393             }
1394           } else {
1395             exists = !keyIsAfterLast
1396                 && bloomFilter.contains(key, 0, key.length, bloom);
1397           }
1398 
1399           return exists;
1400         }
1401       } catch (IOException e) {
1402         LOG.error("Error reading bloom filter data -- proceeding without",
1403             e);
1404         setGeneralBloomFilterFaulty();
1405       } catch (IllegalArgumentException e) {
1406         LOG.error("Bad bloom filter data -- proceeding without", e);
1407         setGeneralBloomFilterFaulty();
1408       } finally {
1409         // Return the bloom block so that its ref count can be decremented.
1410         reader.returnBlock(bloomBlock);
1411       }
1412       return true;
1413     }
1414 
1415     /**
1416      * Checks whether the given scan rowkey range overlaps with the current storefile's
1417      * @param scan the scan specification. Used to determine the rowkey range.
1418      * @return true if there is overlap, false otherwise
1419      */
1420     public boolean passesKeyRangeFilter(Scan scan) {
1421       if (this.getFirstKey() == null || this.getLastKey() == null) {
1422         // the file is empty
1423         return false;
1424       }
1425       if (Bytes.equals(scan.getStartRow(), HConstants.EMPTY_START_ROW)
1426           && Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) {
1427         return true;
1428       }
1429       byte[] smallestScanRow = scan.isReversed() ? scan.getStopRow() : scan.getStartRow();
1430       byte[] largestScanRow = scan.isReversed() ? scan.getStartRow() : scan.getStopRow();
1431       Cell firstKeyKV = this.getFirstKey();
1432       Cell lastKeyKV = this.getLastKey();
1433       boolean nonOverLapping = (getComparator().compareRows(firstKeyKV,
1434           largestScanRow, 0, largestScanRow.length) > 0 
1435           && !Bytes
1436           .equals(scan.isReversed() ? scan.getStartRow() : scan.getStopRow(),
1437               HConstants.EMPTY_END_ROW))
1438           || getComparator().compareRows(lastKeyKV, smallestScanRow, 0, smallestScanRow.length) < 0;
1439       return !nonOverLapping;
1440     }
1441 
1442     public Map<byte[], byte[]> loadFileInfo() throws IOException {
1443       Map<byte [], byte []> fi = reader.loadFileInfo();
1444 
1445       byte[] b = fi.get(BLOOM_FILTER_TYPE_KEY);
1446       if (b != null) {
1447         bloomFilterType = BloomType.valueOf(Bytes.toString(b));
1448       }
1449 
1450       lastBloomKey = fi.get(LAST_BLOOM_KEY);
1451       if(bloomFilterType == BloomType.ROWCOL) {
1452         lastBloomKeyOnlyKV = new KeyValue.KeyOnlyKeyValue(lastBloomKey, 0, lastBloomKey.length);
1453       }
1454       byte[] cnt = fi.get(DELETE_FAMILY_COUNT);
1455       if (cnt != null) {
1456         deleteFamilyCnt = Bytes.toLong(cnt);
1457       }
1458 
1459       return fi;
1460     }
1461 
1462     public void loadBloomfilter() {
1463       this.loadBloomfilter(BlockType.GENERAL_BLOOM_META);
1464       this.loadBloomfilter(BlockType.DELETE_FAMILY_BLOOM_META);
1465     }
1466 
1467     private void loadBloomfilter(BlockType blockType) {
1468       try {
1469         if (blockType == BlockType.GENERAL_BLOOM_META) {
1470           if (this.generalBloomFilter != null)
1471             return; // Bloom has been loaded
1472 
1473           DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
1474           if (bloomMeta != null) {
1475             // sanity check for NONE Bloom filter
1476             if (bloomFilterType == BloomType.NONE) {
1477               throw new IOException(
1478                   "valid bloom filter type not found in FileInfo");
1479             } else {
1480               generalBloomFilter = BloomFilterFactory.createFromMeta(bloomMeta,
1481                   reader);
1482               if (LOG.isTraceEnabled()) {
1483                 LOG.trace("Loaded " + bloomFilterType.toString() + " "
1484                   + generalBloomFilter.getClass().getSimpleName()
1485                   + " metadata for " + reader.getName());
1486               }
1487             }
1488           }
1489         } else if (blockType == BlockType.DELETE_FAMILY_BLOOM_META) {
1490           if (this.deleteFamilyBloomFilter != null)
1491             return; // Bloom has been loaded
1492 
1493           DataInput bloomMeta = reader.getDeleteBloomFilterMetadata();
1494           if (bloomMeta != null) {
1495             deleteFamilyBloomFilter = BloomFilterFactory.createFromMeta(
1496                 bloomMeta, reader);
1497             LOG.info("Loaded Delete Family Bloom ("
1498                 + deleteFamilyBloomFilter.getClass().getSimpleName()
1499                 + ") metadata for " + reader.getName());
1500           }
1501         } else {
1502           throw new RuntimeException("Block Type: " + blockType.toString()
1503               + "is not supported for Bloom filter");
1504         }
1505       } catch (IOException e) {
1506         LOG.error("Error reading bloom filter meta for " + blockType
1507             + " -- proceeding without", e);
1508         setBloomFilterFaulty(blockType);
1509       } catch (IllegalArgumentException e) {
1510         LOG.error("Bad bloom filter meta " + blockType
1511             + " -- proceeding without", e);
1512         setBloomFilterFaulty(blockType);
1513       }
1514     }
1515 
1516     private void setBloomFilterFaulty(BlockType blockType) {
1517       if (blockType == BlockType.GENERAL_BLOOM_META) {
1518         setGeneralBloomFilterFaulty();
1519       } else if (blockType == BlockType.DELETE_FAMILY_BLOOM_META) {
1520         setDeleteFamilyBloomFilterFaulty();
1521       }
1522     }
1523 
1524     /**
1525      * The number of Bloom filter entries in this store file, or an estimate
1526      * thereof, if the Bloom filter is not loaded. This always returns an upper
1527      * bound of the number of Bloom filter entries.
1528      *
1529      * @return an estimate of the number of Bloom filter entries in this file
1530      */
1531     public long getFilterEntries() {
1532       return generalBloomFilter != null ? generalBloomFilter.getKeyCount()
1533           : reader.getEntries();
1534     }
1535 
1536     public void setGeneralBloomFilterFaulty() {
1537       generalBloomFilter = null;
1538     }
1539 
1540     public void setDeleteFamilyBloomFilterFaulty() {
1541       this.deleteFamilyBloomFilter = null;
1542     }
1543 
1544     public Cell getLastKey() {
1545       return reader.getLastKey();
1546     }
1547 
1548     public byte[] getLastRowKey() {
1549       return reader.getLastRowKey();
1550     }
1551 
1552     public Cell midkey() throws IOException {
1553       return reader.midkey();
1554     }
1555 
1556     public long length() {
1557       return reader.length();
1558     }
1559 
1560     public long getTotalUncompressedBytes() {
1561       return reader.getTrailer().getTotalUncompressedBytes();
1562     }
1563 
1564     public long getEntries() {
1565       return reader.getEntries();
1566     }
1567 
1568     public long getDeleteFamilyCnt() {
1569       return deleteFamilyCnt;
1570     }
1571 
1572     public Cell getFirstKey() {
1573       return reader.getFirstKey();
1574     }
1575 
1576     public long indexSize() {
1577       return reader.indexSize();
1578     }
1579 
1580     public BloomType getBloomFilterType() {
1581       return this.bloomFilterType;
1582     }
1583 
1584     public long getSequenceID() {
1585       return sequenceID;
1586     }
1587 
1588     public void setSequenceID(long sequenceID) {
1589       this.sequenceID = sequenceID;
1590     }
1591 
1592     public void setBulkLoaded(boolean bulkLoadResult) {
1593       this.bulkLoadResult = bulkLoadResult;
1594     }
1595 
1596     public boolean isBulkLoaded() {
1597       return this.bulkLoadResult;
1598     }
1599 
1600     BloomFilter getGeneralBloomFilter() {
1601       return generalBloomFilter;
1602     }
1603 
1604     long getUncompressedDataIndexSize() {
1605       return reader.getTrailer().getUncompressedDataIndexSize();
1606     }
1607 
1608     public long getTotalBloomSize() {
1609       if (generalBloomFilter == null)
1610         return 0;
1611       return generalBloomFilter.getByteSize();
1612     }
1613 
1614     public int getHFileVersion() {
1615       return reader.getTrailer().getMajorVersion();
1616     }
1617 
1618     public int getHFileMinorVersion() {
1619       return reader.getTrailer().getMinorVersion();
1620     }
1621 
1622     public HFile.Reader getHFileReader() {
1623       return reader;
1624     }
1625 
1626     void disableBloomFilterForTesting() {
1627       generalBloomFilter = null;
1628       this.deleteFamilyBloomFilter = null;
1629     }
1630 
1631     public long getMaxTimestamp() {
1632       return timeRangeTracker == null ? Long.MAX_VALUE : timeRangeTracker.getMaximumTimestamp();
1633     }
1634 
1635     boolean isSkipResetSeqId() {
1636       return skipResetSeqId;
1637     }
1638 
1639     void setSkipResetSeqId(boolean skipResetSeqId) {
1640       this.skipResetSeqId = skipResetSeqId;
1641     }
1642   }
1643 
1644   /**
1645    * Useful comparators for comparing StoreFiles.
1646    */
1647   public abstract static class Comparators {
1648     /**
1649      * Comparator that compares based on the Sequence Ids of the
1650      * the StoreFiles. Bulk loads that did not request a seq ID
1651      * are given a seq id of -1; thus, they are placed before all non-
1652      * bulk loads, and bulk loads with sequence Id. Among these files,
1653      * the size is used to determine the ordering, then bulkLoadTime.
1654      * If there are ties, the path name is used as a tie-breaker.
1655      */
1656     public static final Comparator<StoreFile> SEQ_ID =
1657       Ordering.compound(ImmutableList.of(
1658           Ordering.natural().onResultOf(new GetSeqId()),
1659           Ordering.natural().onResultOf(new GetFileSize()).reverse(),
1660           Ordering.natural().onResultOf(new GetBulkTime()),
1661           Ordering.natural().onResultOf(new GetPathName())
1662       ));
1663 
1664     private static class GetSeqId implements Function<StoreFile, Long> {
1665       @Override
1666       public Long apply(StoreFile sf) {
1667         return sf.getMaxSequenceId();
1668       }
1669     }
1670 
1671     private static class GetFileSize implements Function<StoreFile, Long> {
1672       @Override
1673       public Long apply(StoreFile sf) {
1674         return sf.getReader().length();
1675       }
1676     }
1677 
1678     private static class GetBulkTime implements Function<StoreFile, Long> {
1679       @Override
1680       public Long apply(StoreFile sf) {
1681         if (!sf.isBulkLoadResult()) return Long.MAX_VALUE;
1682         return sf.getBulkLoadTimestamp();
1683       }
1684     }
1685 
1686     private static class GetPathName implements Function<StoreFile, String> {
1687       @Override
1688       public String apply(StoreFile sf) {
1689         return sf.getPath().getName();
1690       }
1691     }
1692   }
1693 }