View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.Collection;
22  import java.util.List;
23  import java.util.NavigableSet;
24  
25  import org.apache.hadoop.fs.FileSystem;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.Cell;
28  import org.apache.hadoop.hbase.CellComparator;
29  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
30  import org.apache.hadoop.hbase.HColumnDescriptor;
31  import org.apache.hadoop.hbase.HRegionInfo;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.hbase.classification.InterfaceStability;
35  import org.apache.hadoop.hbase.client.Scan;
36  import org.apache.hadoop.hbase.conf.PropagatingConfigurationObserver;
37  import org.apache.hadoop.hbase.io.HeapSize;
38  import org.apache.hadoop.hbase.io.compress.Compression;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
41  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
42  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
43  import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
44  import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
45  import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController;
46  import org.apache.hadoop.hbase.security.User;
47  
48  /**
49   * Interface for objects that hold a column family in a Region. Its a memstore and a set of zero or
50   * more StoreFiles, which stretch backwards over time.
51   */
52  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
53  @InterfaceStability.Evolving
54  public interface Store extends HeapSize, StoreConfigInformation, PropagatingConfigurationObserver {
55  
56    /* The default priority for user-specified compaction requests.
57     * The user gets top priority unless we have blocking compactions. (Pri <= 0)
58     */ int PRIORITY_USER = 1;
59    int NO_PRIORITY = Integer.MIN_VALUE;
60  
61    // General Accessors
62    CellComparator getComparator();
63  
64    Collection<StoreFile> getStorefiles();
65  
66    /**
67     * Close all the readers We don't need to worry about subsequent requests because the Region
68     * holds a write lock that will prevent any more reads or writes.
69     * @return the {@link StoreFile StoreFiles} that were previously being used.
70     * @throws IOException on failure
71     */
72    Collection<StoreFile> close() throws IOException;
73  
74    /**
75     * Return a scanner for both the memstore and the HStore files. Assumes we are not in a
76     * compaction.
77     * @param scan Scan to apply when scanning the stores
78     * @param targetCols columns to scan
79     * @return a scanner over the current key values
80     * @throws IOException on failure
81     */
82    KeyValueScanner getScanner(Scan scan, final NavigableSet<byte[]> targetCols, long readPt)
83        throws IOException;
84  
85    /**
86     * Get all scanners with no filtering based on TTL (that happens further down
87     * the line).
88     * @param cacheBlocks
89     * @param isGet
90     * @param usePread
91     * @param isCompaction
92     * @param matcher
93     * @param startRow
94     * @param stopRow
95     * @param readPt
96     * @return all scanners for this store
97     */
98    List<KeyValueScanner> getScanners(
99      boolean cacheBlocks,
100     boolean isGet,
101     boolean usePread,
102     boolean isCompaction,
103     ScanQueryMatcher matcher,
104     byte[] startRow,
105     byte[] stopRow,
106     long readPt
107   ) throws IOException;
108 
109   /**
110    * Create scanners on the given files and if needed on the memstore with no filtering based on TTL
111    * (that happens further down the line).
112    * @param files the list of files on which the scanners has to be created
113    * @param cacheBlocks cache the blocks or not
114    * @param isGet true if it is get, false if not
115    * @param usePread true to use pread, false if not
116    * @param isCompaction true if the scanner is created for compaction
117    * @param matcher the scan query matcher
118    * @param startRow the start row
119    * @param stopRow the stop row
120    * @param readPt the read point of the current scan
121    * @param includeMemstoreScanner true if memstore has to be included
122    * @return scanners on the given files and on the memstore if specified
123    */
124    List<KeyValueScanner> getScanners(List<StoreFile> files, boolean cacheBlocks, boolean isGet,
125           boolean usePread, boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow,
126           byte[] stopRow, long readPt, boolean includeMemstoreScanner) throws IOException;
127 
128   ScanInfo getScanInfo();
129 
130   /**
131    * Adds or replaces the specified KeyValues.
132    * <p>
133    * For each KeyValue specified, if a cell with the same row, family, and qualifier exists in
134    * MemStore, it will be replaced. Otherwise, it will just be inserted to MemStore.
135    * <p>
136    * This operation is atomic on each KeyValue (row/family/qualifier) but not necessarily atomic
137    * across all of them.
138    * @param cells
139    * @param readpoint readpoint below which we can safely remove duplicate KVs
140    * @return memstore size delta
141    * @throws IOException
142    */
143   long upsert(Iterable<Cell> cells, long readpoint) throws IOException;
144 
145   /**
146    * Adds a value to the memstore
147    * @param cell
148    * @return memstore size delta
149    */
150   long add(Cell cell);
151 
152   /**
153    * When was the last edit done in the memstore
154    */
155   long timeOfOldestEdit();
156 
157   FileSystem getFileSystem();
158 
159 
160   /**
161    * @param maxKeyCount
162    * @param compression Compression algorithm to use
163    * @param isCompaction whether we are creating a new file in a compaction
164    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
165    * @return Writer for a new StoreFile in the tmp dir.
166    */
167   StoreFileWriter createWriterInTmp(
168       long maxKeyCount,
169       Compression.Algorithm compression,
170       boolean isCompaction,
171       boolean includeMVCCReadpoint,
172       boolean includesTags
173   ) throws IOException;
174 
175   /**
176    * @param maxKeyCount
177    * @param compression Compression algorithm to use
178    * @param isCompaction whether we are creating a new file in a compaction
179    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
180    * @param shouldDropBehind should the writer drop caches behind writes
181    * @return Writer for a new StoreFile in the tmp dir.
182    */
183   StoreFileWriter createWriterInTmp(
184     long maxKeyCount,
185     Compression.Algorithm compression,
186     boolean isCompaction,
187     boolean includeMVCCReadpoint,
188     boolean includesTags,
189     boolean shouldDropBehind
190   ) throws IOException;
191 
192   /**
193    * @param maxKeyCount
194    * @param compression Compression algorithm to use
195    * @param isCompaction whether we are creating a new file in a compaction
196    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
197    * @param shouldDropBehind should the writer drop caches behind writes
198    * @param trt Ready-made timetracker to use.
199    * @return Writer for a new StoreFile in the tmp dir.
200    */
201   StoreFileWriter createWriterInTmp(
202     long maxKeyCount,
203     Compression.Algorithm compression,
204     boolean isCompaction,
205     boolean includeMVCCReadpoint,
206     boolean includesTags,
207     boolean shouldDropBehind,
208     final TimeRangeTracker trt
209   ) throws IOException;
210
211   // Compaction oriented methods
212
213   boolean throttleCompaction(long compactionSize);
214
215   /**
216    * getter for CompactionProgress object
217    * @return CompactionProgress object; can be null
218    */
219   CompactionProgress getCompactionProgress();
220
221   CompactionContext requestCompaction() throws IOException;
222
223   /**
224    * @deprecated see requestCompaction(int, CompactionRequest, User)
225    */
226   @Deprecated
227   CompactionContext requestCompaction(int priority, CompactionRequest baseRequest)
228       throws IOException;
229
230   CompactionContext requestCompaction(int priority, CompactionRequest baseRequest, User user)
231       throws IOException;
232
233   void cancelRequestedCompaction(CompactionContext compaction);
234
235   /**
236    * @deprecated see compact(CompactionContext, ThroughputController, User)
237    */
238   @Deprecated
239   List<StoreFile> compact(CompactionContext compaction,
240       ThroughputController throughputController) throws IOException;
241 
242   List<StoreFile> compact(CompactionContext compaction,
243     ThroughputController throughputController, User user) throws IOException;
244
245   /**
246    * @return true if we should run a major compaction.
247    */
248   boolean isMajorCompaction() throws IOException;
249
250   void triggerMajorCompaction();
251
252   /**
253    * See if there's too much store files in this store
254    * @return true if number of store files is greater than the number defined in minFilesToCompact
255    */
256   boolean needsCompaction();
257
258   int getCompactPriority();
259
260   StoreFlushContext createFlushContext(long cacheFlushId);
261
262   /**
263    * Call to complete a compaction. Its for the case where we find in the WAL a compaction
264    * that was not finished.  We could find one recovering a WAL after a regionserver crash.
265    * See HBASE-2331.
266    * @param compaction the descriptor for compaction
267    * @param pickCompactionFiles whether or not pick up the new compaction output files and
268    * add it to the store
269    * @param removeFiles whether to remove/archive files from filesystem
270    */
271   void replayCompactionMarker(CompactionDescriptor compaction, boolean pickCompactionFiles,
272       boolean removeFiles)
273       throws IOException;
274
275   // Split oriented methods
276 
277   boolean canSplit();
278
279   /**
280    * Determines if Store should be split
281    * @return byte[] if store should be split, null otherwise.
282    */
283   byte[] getSplitPoint();
284
285   // Bulk Load methods
286
287   /**
288    * This throws a WrongRegionException if the HFile does not fit in this region, or an
289    * InvalidHFileException if the HFile is not valid.
290    */
291   void assertBulkLoadHFileOk(Path srcPath) throws IOException;
292
293   /**
294    * This method should only be called from Region. It is assumed that the ranges of values in the
295    * HFile fit within the stores assigned region. (assertBulkLoadHFileOk checks this)
296    *
297    * @param srcPathStr
298    * @param sequenceId sequence Id associated with the HFile
299    */
300   Path bulkLoadHFile(String srcPathStr, long sequenceId) throws IOException;
301
302   // General accessors into the state of the store
303   // TODO abstract some of this out into a metrics class
304
305   /**
306    * @return <tt>true</tt> if the store has any underlying reference files to older HFiles
307    */
308   boolean hasReferences();
309
310   /**
311    * @return The size of this store's memstore, in bytes
312    */
313   long getMemStoreSize();
314
315   /**
316    * @return The amount of memory we could flush from this memstore; usually this is equal to
317    * {@link #getMemStoreSize()} unless we are carrying snapshots and then it will be the size of
318    * outstanding snapshots.
319    */
320   long getFlushableSize();
321
322   /**
323    * Returns the memstore snapshot size
324    * @return size of the memstore snapshot
325    */
326   long getSnapshotSize();
327
328   HColumnDescriptor getFamily();
329
330   /**
331    * @return The maximum sequence id in all store files.
332    */
333   long getMaxSequenceId();
334 
335   /**
336    * @return The maximum memstoreTS in all store files.
337    */
338   long getMaxMemstoreTS();
339 
340   /**
341    * @return the data block encoder
342    */
343   HFileDataBlockEncoder getDataBlockEncoder();
344 
345   /** @return aggregate size of all HStores used in the last compaction */
346   long getLastCompactSize();
347
348   /** @return aggregate size of HStore */
349   long getSize();
350
351   /**
352    * @return Count of store files
353    */
354   int getStorefilesCount();
355
356   /**
357    * @return Max age of store files in this store
358    */
359   long getMaxStoreFileAge();
360
361   /**
362    * @return Min age of store files in this store
363    */
364   long getMinStoreFileAge();
365
366   /**
367    *  @return Average age of store files in this store, 0 if no store files
368    */
369   long getAvgStoreFileAge();
370
371   /**
372    *  @return Number of reference files in this store
373    */
374   long getNumReferenceFiles();
375
376   /**
377    *  @return Number of HFiles in this store
378    */
379   long getNumHFiles();
380
381   /**
382    * @return The size of the store files, in bytes, uncompressed.
383    */
384   long getStoreSizeUncompressed();
385
386   /**
387    * @return The size of the store files, in bytes.
388    */
389   long getStorefilesSize();
390
391   /**
392    * @return The size of the store file indexes, in bytes.
393    */
394   long getStorefilesIndexSize();
395
396   /**
397    * Returns the total size of all index blocks in the data block indexes, including the root level,
398    * intermediate levels, and the leaf level for multi-level indexes, or just the root level for
399    * single-level indexes.
400    * @return the total size of block indexes in the store
401    */
402   long getTotalStaticIndexSize();
403
404   /**
405    * Returns the total byte size of all Bloom filter bit arrays. For compound Bloom filters even the
406    * Bloom blocks currently not loaded into the block cache are counted.
407    * @return the total size of all Bloom filters in the store
408    */
409   long getTotalStaticBloomSize();
410
411   // Test-helper methods
412
413   /**
414    * Used for tests.
415    * @return cache configuration for this Store.
416    */
417   CacheConfig getCacheConfig();
418 
419   /**
420    * @return the parent region info hosting this store
421    */
422   HRegionInfo getRegionInfo();
423
424   RegionCoprocessorHost getCoprocessorHost();
425
426   boolean areWritesEnabled();
427 
428   /**
429    * @return The smallest mvcc readPoint across all the scanners in this
430    * region. Writes older than this readPoint, are included  in every
431    * read operation.
432    */
433   long getSmallestReadPoint();
434
435   String getColumnFamilyName();
436
437   TableName getTableName();
438
439   /**
440    * @return The number of cells flushed to disk
441    */
442   long getFlushedCellsCount();
443
444   /**
445    * @return The total size of data flushed to disk, in bytes
446    */
447   long getFlushedCellsSize();
448
449   /**
450    * @return The total size of out output files on disk, in bytes
451    */
452   long getFlushedOutputFileSize();
453
454   /**
455    * @return The number of cells processed during minor compactions
456    */
457   long getCompactedCellsCount();
458
459   /**
460    * @return The total amount of data processed during minor compactions, in bytes
461    */
462   long getCompactedCellsSize();
463
464   /**
465    * @return The number of cells processed during major compactions
466    */
467   long getMajorCompactedCellsCount();
468
469   /**
470    * @return The total amount of data processed during major compactions, in bytes
471    */
472   long getMajorCompactedCellsSize();
473
474   /*
475    * @param o Observer who wants to know about changes in set of Readers
476    */
477   void addChangedReaderObserver(ChangedReadersObserver o);
478
479   /*
480    * @param o Observer no longer interested in changes in set of Readers.
481    */
482   void deleteChangedReaderObserver(ChangedReadersObserver o);
483
484   /**
485    * @return Whether this store has too many store files.
486    */
487   boolean hasTooManyStoreFiles();
488
489   /**
490    * Checks the underlying store files, and opens the files that  have not
491    * been opened, and removes the store file readers for store files no longer
492    * available. Mainly used by secondary region replicas to keep up to date with
493    * the primary region files.
494    * @throws IOException
495    */
496   void refreshStoreFiles() throws IOException;
497
498   /**
499    * This value can represent the degree of emergency of compaction for this store. It should be
500    * greater than or equal to 0.0, any value greater than 1.0 means we have too many store files.
501    * <ul>
502    * <li>if getStorefilesCount &lt;= getMinFilesToCompact, return 0.0</li>
503    * <li>return (getStorefilesCount - getMinFilesToCompact) / (blockingFileCount -
504    * getMinFilesToCompact)</li>
505    * </ul>
506    * <p>
507    * And for striped stores, we should calculate this value by the files in each stripe separately
508    * and return the maximum value.
509    * <p>
510    * It is similar to {@link #getCompactPriority()} except that it is more suitable to use in a
511    * linear formula.
512    */
513   double getCompactionPressure();
514
515    /**
516     * Replaces the store files that the store has with the given files. Mainly used by
517     * secondary region replicas to keep up to date with
518     * the primary region files.
519     * @throws IOException
520     */
521   void refreshStoreFiles(Collection<String> newFiles) throws IOException;
522 
523   void bulkLoadHFile(StoreFileInfo fileInfo) throws IOException;
524
525   boolean isPrimaryReplicaStore();
526
527   /**
528    * Closes and archives the compacted files under this store
529    */
530   void closeAndArchiveCompactedFiles() throws IOException;
531
532   /**
533    * This method is called when it is clear that the flush to disk is completed.
534    * The store may do any post-flush actions at this point.
535    * One example is to update the wal with sequence number that is known only at the store level.
536    */
537   void finalizeFlush();
538
539   MemStore getMemStore();
540 }