View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.Collection;
22  import java.util.List;
23  import java.util.NavigableSet;
24  
25  import org.apache.hadoop.fs.FileSystem;
26  import org.apache.hadoop.fs.Path;
27  import org.apache.hadoop.hbase.Cell;
28  import org.apache.hadoop.hbase.CellComparator;
29  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
30  import org.apache.hadoop.hbase.HColumnDescriptor;
31  import org.apache.hadoop.hbase.HRegionInfo;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.hbase.classification.InterfaceStability;
35  import org.apache.hadoop.hbase.client.Scan;
36  import org.apache.hadoop.hbase.conf.PropagatingConfigurationObserver;
37  import org.apache.hadoop.hbase.io.HeapSize;
38  import org.apache.hadoop.hbase.io.compress.Compression;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
41  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
42  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
43  import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
44  import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
45  import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher;
46  import org.apache.hadoop.hbase.regionserver.throttle.ThroughputController;
47  import org.apache.hadoop.hbase.security.User;
48
49  /**
50   * Interface for objects that hold a column family in a Region. Its a memstore and a set of zero or
51   * more StoreFiles, which stretch backwards over time.
52   */
53  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
54  @InterfaceStability.Evolving
55  public interface Store extends HeapSize, StoreConfigInformation, PropagatingConfigurationObserver {
56
57    /* The default priority for user-specified compaction requests.
58     * The user gets top priority unless we have blocking compactions. (Pri <= 0)
59     */ int PRIORITY_USER = 1;
60    int NO_PRIORITY = Integer.MIN_VALUE;
61
62    // General Accessors
63    CellComparator getComparator();
64
65    Collection<StoreFile> getStorefiles();
66
67    /**
68     * Close all the readers We don't need to worry about subsequent requests because the Region
69     * holds a write lock that will prevent any more reads or writes.
70     * @return the {@link StoreFile StoreFiles} that were previously being used.
71     * @throws IOException on failure
72     */
73    Collection<StoreFile> close() throws IOException;
74
75    /**
76     * Return a scanner for both the memstore and the HStore files. Assumes we are not in a
77     * compaction.
78     * @param scan Scan to apply when scanning the stores
79     * @param targetCols columns to scan
80     * @return a scanner over the current key values
81     * @throws IOException on failure
82     */
83    KeyValueScanner getScanner(Scan scan, final NavigableSet<byte[]> targetCols, long readPt)
84        throws IOException;
85
86    /**
87     * Get all scanners with no filtering based on TTL (that happens further down
88     * the line).
89     * @param cacheBlocks
90     * @param isGet
91     * @param usePread
92     * @param isCompaction
93     * @param matcher
94     * @param startRow
95     * @param stopRow
96     * @param readPt
97     * @return all scanners for this store
98     */
99    List<KeyValueScanner> getScanners(
100     boolean cacheBlocks,
101     boolean isGet,
102     boolean usePread,
103     boolean isCompaction,
104     ScanQueryMatcher matcher,
105     byte[] startRow,
106     byte[] stopRow,
107     long readPt
108   ) throws IOException;
109
110   /**
111    * Create scanners on the given files and if needed on the memstore with no filtering based on TTL
112    * (that happens further down the line).
113    * @param files the list of files on which the scanners has to be created
114    * @param cacheBlocks cache the blocks or not
115    * @param isGet true if it is get, false if not
116    * @param usePread true to use pread, false if not
117    * @param isCompaction true if the scanner is created for compaction
118    * @param matcher the scan query matcher
119    * @param startRow the start row
120    * @param stopRow the stop row
121    * @param readPt the read point of the current scan
122    * @param includeMemstoreScanner true if memstore has to be included
123    * @return scanners on the given files and on the memstore if specified
124    */
125    List<KeyValueScanner> getScanners(List<StoreFile> files, boolean cacheBlocks, boolean isGet,
126           boolean usePread, boolean isCompaction, ScanQueryMatcher matcher, byte[] startRow,
127           byte[] stopRow, long readPt, boolean includeMemstoreScanner) throws IOException;
128
129   ScanInfo getScanInfo();
130
131   /**
132    * Adds or replaces the specified KeyValues.
133    * <p>
134    * For each KeyValue specified, if a cell with the same row, family, and qualifier exists in
135    * MemStore, it will be replaced. Otherwise, it will just be inserted to MemStore.
136    * <p>
137    * This operation is atomic on each KeyValue (row/family/qualifier) but not necessarily atomic
138    * across all of them.
139    * @param cells
140    * @param readpoint readpoint below which we can safely remove duplicate KVs
141    * @return memstore size delta
142    * @throws IOException
143    */
144   long upsert(Iterable<Cell> cells, long readpoint) throws IOException;
145
146   /**
147    * Adds a value to the memstore
148    * @param cell
149    * @return memstore size delta
150    */
151   long add(Cell cell);
152
153   /**
154    * When was the last edit done in the memstore
155    */
156   long timeOfOldestEdit();
157
158   FileSystem getFileSystem();
159 
160
161   /**
162    * @param maxKeyCount
163    * @param compression Compression algorithm to use
164    * @param isCompaction whether we are creating a new file in a compaction
165    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
166    * @return Writer for a new StoreFile in the tmp dir.
167    */
168   StoreFileWriter createWriterInTmp(
169       long maxKeyCount,
170       Compression.Algorithm compression,
171       boolean isCompaction,
172       boolean includeMVCCReadpoint,
173       boolean includesTags
174   ) throws IOException;
175
176   /**
177    * @param maxKeyCount
178    * @param compression Compression algorithm to use
179    * @param isCompaction whether we are creating a new file in a compaction
180    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
181    * @param shouldDropBehind should the writer drop caches behind writes
182    * @return Writer for a new StoreFile in the tmp dir.
183    */
184   StoreFileWriter createWriterInTmp(
185     long maxKeyCount,
186     Compression.Algorithm compression,
187     boolean isCompaction,
188     boolean includeMVCCReadpoint,
189     boolean includesTags,
190     boolean shouldDropBehind
191   ) throws IOException;
192
193   /**
194    * @param maxKeyCount
195    * @param compression Compression algorithm to use
196    * @param isCompaction whether we are creating a new file in a compaction
197    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
198    * @param shouldDropBehind should the writer drop caches behind writes
199    * @param trt Ready-made timetracker to use.
200    * @return Writer for a new StoreFile in the tmp dir.
201    */
202   StoreFileWriter createWriterInTmp(
203     long maxKeyCount,
204     Compression.Algorithm compression,
205     boolean isCompaction,
206     boolean includeMVCCReadpoint,
207     boolean includesTags,
208     boolean shouldDropBehind,
209     final TimeRangeTracker trt
210   ) throws IOException;
211
212   // Compaction oriented methods
213
214   boolean throttleCompaction(long compactionSize);
215
216   /**
217    * getter for CompactionProgress object
218    * @return CompactionProgress object; can be null
219    */
220   CompactionProgress getCompactionProgress();
221
222   CompactionContext requestCompaction() throws IOException;
223
224   /**
225    * @deprecated see requestCompaction(int, CompactionRequest, User)
226    */
227   @Deprecated
228   CompactionContext requestCompaction(int priority, CompactionRequest baseRequest)
229       throws IOException;
230
231   CompactionContext requestCompaction(int priority, CompactionRequest baseRequest, User user)
232       throws IOException;
233
234   void cancelRequestedCompaction(CompactionContext compaction);
235
236   /**
237    * @deprecated see compact(CompactionContext, ThroughputController, User)
238    */
239   @Deprecated
240   List<StoreFile> compact(CompactionContext compaction,
241       ThroughputController throughputController) throws IOException;
242
243   List<StoreFile> compact(CompactionContext compaction,
244     ThroughputController throughputController, User user) throws IOException;
245
246   /**
247    * @return true if we should run a major compaction.
248    */
249   boolean isMajorCompaction() throws IOException;
250
251   void triggerMajorCompaction();
252
253   /**
254    * See if there's too much store files in this store
255    * @return true if number of store files is greater than the number defined in minFilesToCompact
256    */
257   boolean needsCompaction();
258
259   int getCompactPriority();
260
261   StoreFlushContext createFlushContext(long cacheFlushId);
262
263   /**
264    * Call to complete a compaction. Its for the case where we find in the WAL a compaction
265    * that was not finished.  We could find one recovering a WAL after a regionserver crash.
266    * See HBASE-2331.
267    * @param compaction the descriptor for compaction
268    * @param pickCompactionFiles whether or not pick up the new compaction output files and
269    * add it to the store
270    * @param removeFiles whether to remove/archive files from filesystem
271    */
272   void replayCompactionMarker(CompactionDescriptor compaction, boolean pickCompactionFiles,
273       boolean removeFiles)
274       throws IOException;
275
276   // Split oriented methods
277
278   boolean canSplit();
279
280   /**
281    * Determines if Store should be split
282    * @return byte[] if store should be split, null otherwise.
283    */
284   byte[] getSplitPoint();
285
286   // Bulk Load methods
287
288   /**
289    * This throws a WrongRegionException if the HFile does not fit in this region, or an
290    * InvalidHFileException if the HFile is not valid.
291    */
292   void assertBulkLoadHFileOk(Path srcPath) throws IOException;
293
294   /**
295    * This method should only be called from Region. It is assumed that the ranges of values in the
296    * HFile fit within the stores assigned region. (assertBulkLoadHFileOk checks this)
297    *
298    * @param srcPathStr
299    * @param sequenceId sequence Id associated with the HFile
300    */
301   Path bulkLoadHFile(String srcPathStr, long sequenceId) throws IOException;
302
303   // General accessors into the state of the store
304   // TODO abstract some of this out into a metrics class
305
306   /**
307    * @return <tt>true</tt> if the store has any underlying reference files to older HFiles
308    */
309   boolean hasReferences();
310
311   /**
312    * @return The size of this store's memstore, in bytes
313    */
314   long getMemStoreSize();
315
316   /**
317    * @return The amount of memory we could flush from this memstore; usually this is equal to
318    * {@link #getMemStoreSize()} unless we are carrying snapshots and then it will be the size of
319    * outstanding snapshots.
320    */
321   long getFlushableSize();
322
323   /**
324    * Returns the memstore snapshot size
325    * @return size of the memstore snapshot
326    */
327   long getSnapshotSize();
328
329   HColumnDescriptor getFamily();
330
331   /**
332    * @return The maximum sequence id in all store files.
333    */
334   long getMaxSequenceId();
335
336   /**
337    * @return The maximum memstoreTS in all store files.
338    */
339   long getMaxMemstoreTS();
340
341   /**
342    * @return the data block encoder
343    */
344   HFileDataBlockEncoder getDataBlockEncoder();
345
346   /** @return aggregate size of all HStores used in the last compaction */
347   long getLastCompactSize();
348
349   /** @return aggregate size of HStore */
350   long getSize();
351
352   /**
353    * @return Count of store files
354    */
355   int getStorefilesCount();
356
357   /**
358    * @return Max age of store files in this store
359    */
360   long getMaxStoreFileAge();
361
362   /**
363    * @return Min age of store files in this store
364    */
365   long getMinStoreFileAge();
366
367   /**
368    *  @return Average age of store files in this store, 0 if no store files
369    */
370   long getAvgStoreFileAge();
371
372   /**
373    *  @return Number of reference files in this store
374    */
375   long getNumReferenceFiles();
376
377   /**
378    *  @return Number of HFiles in this store
379    */
380   long getNumHFiles();
381
382   /**
383    * @return The size of the store files, in bytes, uncompressed.
384    */
385   long getStoreSizeUncompressed();
386
387   /**
388    * @return The size of the store files, in bytes.
389    */
390   long getStorefilesSize();
391
392   /**
393    * @return The size of the store file indexes, in bytes.
394    */
395   long getStorefilesIndexSize();
396
397   /**
398    * Returns the total size of all index blocks in the data block indexes, including the root level,
399    * intermediate levels, and the leaf level for multi-level indexes, or just the root level for
400    * single-level indexes.
401    * @return the total size of block indexes in the store
402    */
403   long getTotalStaticIndexSize();
404
405   /**
406    * Returns the total byte size of all Bloom filter bit arrays. For compound Bloom filters even the
407    * Bloom blocks currently not loaded into the block cache are counted.
408    * @return the total size of all Bloom filters in the store
409    */
410   long getTotalStaticBloomSize();
411
412   // Test-helper methods
413
414   /**
415    * Used for tests.
416    * @return cache configuration for this Store.
417    */
418   CacheConfig getCacheConfig();
419
420   /**
421    * @return the parent region info hosting this store
422    */
423   HRegionInfo getRegionInfo();
424
425   RegionCoprocessorHost getCoprocessorHost();
426
427   boolean areWritesEnabled();
428
429   /**
430    * @return The smallest mvcc readPoint across all the scanners in this
431    * region. Writes older than this readPoint, are included  in every
432    * read operation.
433    */
434   long getSmallestReadPoint();
435
436   String getColumnFamilyName();
437
438   TableName getTableName();
439
440   /**
441    * @return The number of cells flushed to disk
442    */
443   long getFlushedCellsCount();
444
445   /**
446    * @return The total size of data flushed to disk, in bytes
447    */
448   long getFlushedCellsSize();
449
450   /**
451    * @return The total size of out output files on disk, in bytes
452    */
453   long getFlushedOutputFileSize();
454
455   /**
456    * @return The number of cells processed during minor compactions
457    */
458   long getCompactedCellsCount();
459
460   /**
461    * @return The total amount of data processed during minor compactions, in bytes
462    */
463   long getCompactedCellsSize();
464
465   /**
466    * @return The number of cells processed during major compactions
467    */
468   long getMajorCompactedCellsCount();
469
470   /**
471    * @return The total amount of data processed during major compactions, in bytes
472    */
473   long getMajorCompactedCellsSize();
474
475   /*
476    * @param o Observer who wants to know about changes in set of Readers
477    */
478   void addChangedReaderObserver(ChangedReadersObserver o);
479
480   /*
481    * @param o Observer no longer interested in changes in set of Readers.
482    */
483   void deleteChangedReaderObserver(ChangedReadersObserver o);
484
485   /**
486    * @return Whether this store has too many store files.
487    */
488   boolean hasTooManyStoreFiles();
489
490   /**
491    * Checks the underlying store files, and opens the files that  have not
492    * been opened, and removes the store file readers for store files no longer
493    * available. Mainly used by secondary region replicas to keep up to date with
494    * the primary region files.
495    * @throws IOException
496    */
497   void refreshStoreFiles() throws IOException;
498
499   /**
500    * This value can represent the degree of emergency of compaction for this store. It should be
501    * greater than or equal to 0.0, any value greater than 1.0 means we have too many store files.
502    * <ul>
503    * <li>if getStorefilesCount &lt;= getMinFilesToCompact, return 0.0</li>
504    * <li>return (getStorefilesCount - getMinFilesToCompact) / (blockingFileCount -
505    * getMinFilesToCompact)</li>
506    * </ul>
507    * <p>
508    * And for striped stores, we should calculate this value by the files in each stripe separately
509    * and return the maximum value.
510    * <p>
511    * It is similar to {@link #getCompactPriority()} except that it is more suitable to use in a
512    * linear formula.
513    */
514   double getCompactionPressure();
515
516    /**
517     * Replaces the store files that the store has with the given files. Mainly used by
518     * secondary region replicas to keep up to date with
519     * the primary region files.
520     * @throws IOException
521     */
522   void refreshStoreFiles(Collection<String> newFiles) throws IOException;
523
524   void bulkLoadHFile(StoreFileInfo fileInfo) throws IOException;
525
526   boolean isPrimaryReplicaStore();
527
528   /**
529    * Closes and archives the compacted files under this store
530    */
531   void closeAndArchiveCompactedFiles() throws IOException;
532
533   /**
534    * This method is called when it is clear that the flush to disk is completed.
535    * The store may do any post-flush actions at this point.
536    * One example is to update the wal with sequence number that is known only at the store level.
537    */
538   void finalizeFlush();
539
540   MemStore getMemStore();
541 }