View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.Collection;
22  import java.util.List;
23  import java.util.NavigableSet;
24  
25  import org.apache.hadoop.classification.InterfaceAudience;
26  import org.apache.hadoop.classification.InterfaceStability;
27  import org.apache.hadoop.fs.FileSystem;
28  import org.apache.hadoop.fs.Path;
29  import org.apache.hadoop.hbase.Cell;
30  import org.apache.hadoop.hbase.TableName;
31  import org.apache.hadoop.hbase.HColumnDescriptor;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.KeyValue;
34  import org.apache.hadoop.hbase.client.Scan;
35  import org.apache.hadoop.hbase.io.HeapSize;
36  import org.apache.hadoop.hbase.io.compress.Compression;
37  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
38  import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
39  import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor;
40  import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
41  import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress;
42  import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
43  import org.apache.hadoop.hbase.util.Pair;
44  
45  /**
46   * Interface for objects that hold a column family in a Region. Its a memstore and a set of zero or
47   * more StoreFiles, which stretch backwards over time.
48   */
49  @InterfaceAudience.Private
50  @InterfaceStability.Evolving
51  public interface Store extends HeapSize, StoreConfigInformation {
52  
53    /* The default priority for user-specified compaction requests.
54     * The user gets top priority unless we have blocking compactions. (Pri <= 0)
55     */ int PRIORITY_USER = 1;
56    int NO_PRIORITY = Integer.MIN_VALUE;
57  
58    // General Accessors
59    KeyValue.KVComparator getComparator();
60  
61    Collection<StoreFile> getStorefiles();
62  
63    /**
64     * Close all the readers We don't need to worry about subsequent requests because the HRegion
65     * holds a write lock that will prevent any more reads or writes.
66     * @return the {@link StoreFile StoreFiles} that were previously being used.
67     * @throws IOException on failure
68     */
69    Collection<StoreFile> close() throws IOException;
70  
71    /**
72     * Return a scanner for both the memstore and the HStore files. Assumes we are not in a
73     * compaction.
74     * @param scan Scan to apply when scanning the stores
75     * @param targetCols columns to scan
76     * @return a scanner over the current key values
77     * @throws IOException on failure
78     */
79    KeyValueScanner getScanner(Scan scan, final NavigableSet<byte[]> targetCols, long readPt)
80        throws IOException;
81  
82    /**
83     * Get all scanners with no filtering based on TTL (that happens further down
84     * the line).
85     * @param cacheBlocks
86     * @param isGet
87     * @param usePread
88     * @param isCompaction
89     * @param matcher
90     * @param startRow
91     * @param stopRow
92     * @param readPt
93     * @return all scanners for this store
94     */
95    List<KeyValueScanner> getScanners(
96      boolean cacheBlocks,
97      boolean isGet,
98      boolean usePread,
99      boolean isCompaction,
100     ScanQueryMatcher matcher,
101     byte[] startRow,
102     byte[] stopRow,
103     long readPt
104   ) throws IOException;
105 
106   ScanInfo getScanInfo();
107 
108   /**
109    * Adds or replaces the specified KeyValues.
110    * <p>
111    * For each KeyValue specified, if a cell with the same row, family, and qualifier exists in
112    * MemStore, it will be replaced. Otherwise, it will just be inserted to MemStore.
113    * <p>
114    * This operation is atomic on each KeyValue (row/family/qualifier) but not necessarily atomic
115    * across all of them.
116    * @param cells
117    * @param readpoint readpoint below which we can safely remove duplicate KVs
118    * @return memstore size delta
119    * @throws IOException
120    */
121   long upsert(Iterable<Cell> cells, long readpoint) throws IOException;
122 
123   /**
124    * Adds a value to the memstore
125    * @param kv
126    * @return memstore size delta & newly added KV which maybe different than the passed in KV
127    */
128   Pair<Long, Cell> add(KeyValue kv);
129 
130   /**
131    * When was the last edit done in the memstore
132    */
133   long timeOfOldestEdit();
134 
135   /**
136    * Removes a kv from the memstore. The KeyValue is removed only if its key & memstoreTS match the
137    * key & memstoreTS value of the kv parameter.
138    * @param kv
139    */
140   void rollback(final KeyValue kv);
141 
142   /**
143    * Find the key that matches <i>row</i> exactly, or the one that immediately precedes it. WARNING:
144    * Only use this method on a table where writes occur with strictly increasing timestamps. This
145    * method assumes this pattern of writes in order to make it reasonably performant. Also our
146    * search is dependent on the axiom that deletes are for cells that are in the container that
147    * follows whether a memstore snapshot or a storefile, not for the current container: i.e. we'll
148    * see deletes before we come across cells we are to delete. Presumption is that the
149    * memstore#kvset is processed before memstore#snapshot and so on.
150    * @param row The row key of the targeted row.
151    * @return Found keyvalue or null if none found.
152    * @throws IOException
153    */
154   KeyValue getRowKeyAtOrBefore(final byte[] row) throws IOException;
155 
156   FileSystem getFileSystem();
157 
158   /*
159    * @param maxKeyCount
160    * @param compression Compression algorithm to use
161    * @param isCompaction whether we are creating a new file in a compaction
162    * @param includeMVCCReadpoint whether we should out the MVCC readpoint
163    * @return Writer for a new StoreFile in the tmp dir.
164    */
165   StoreFile.Writer createWriterInTmp(
166     long maxKeyCount,
167     Compression.Algorithm compression,
168     boolean isCompaction,
169     boolean includeMVCCReadpoint,
170     boolean includesTags
171   ) throws IOException;
172 
173   // Compaction oriented methods
174 
175   boolean throttleCompaction(long compactionSize);
176 
177   /**
178    * getter for CompactionProgress object
179    * @return CompactionProgress object; can be null
180    */
181   CompactionProgress getCompactionProgress();
182 
183   CompactionContext requestCompaction() throws IOException;
184 
185   CompactionContext requestCompaction(int priority, CompactionRequest baseRequest)
186       throws IOException;
187 
188   void cancelRequestedCompaction(CompactionContext compaction);
189 
190   List<StoreFile> compact(CompactionContext compaction) throws IOException;
191 
192   /**
193    * @return true if we should run a major compaction.
194    */
195   boolean isMajorCompaction() throws IOException;
196 
197   void triggerMajorCompaction();
198 
199   /**
200    * See if there's too much store files in this store
201    * @return true if number of store files is greater than the number defined in minFilesToCompact
202    */
203   boolean needsCompaction();
204 
205   int getCompactPriority();
206 
207   StoreFlushContext createFlushContext(long cacheFlushId);
208 
209   /**
210    * Call to complete a compaction. Its for the case where we find in the WAL a compaction
211    * that was not finished.  We could find one recovering a WAL after a regionserver crash.
212    * See HBASE-2331.
213    * @param compaction
214    */
215   void completeCompactionMarker(CompactionDescriptor compaction)
216       throws IOException;
217 
218   // Split oriented methods
219 
220   boolean canSplit();
221 
222   /**
223    * Determines if Store should be split
224    * @return byte[] if store should be split, null otherwise.
225    */
226   byte[] getSplitPoint();
227 
228   // Bulk Load methods
229 
230   /**
231    * This throws a WrongRegionException if the HFile does not fit in this region, or an
232    * InvalidHFileException if the HFile is not valid.
233    */
234   void assertBulkLoadHFileOk(Path srcPath) throws IOException;
235 
236   /**
237    * This method should only be called from HRegion. It is assumed that the ranges of values in the
238    * HFile fit within the stores assigned region. (assertBulkLoadHFileOk checks this)
239    *
240    * @param srcPathStr
241    * @param sequenceId sequence Id associated with the HFile
242    */
243   void bulkLoadHFile(String srcPathStr, long sequenceId) throws IOException;
244 
245   // General accessors into the state of the store
246   // TODO abstract some of this out into a metrics class
247 
248   /**
249    * @return <tt>true</tt> if the store has any underlying reference files to older HFiles
250    */
251   boolean hasReferences();
252 
253   /**
254    * @return The size of this store's memstore, in bytes
255    */
256   long getMemStoreSize();
257 
258   /**
259    * @return The amount of memory we could flush from this memstore; usually this is equal to
260    * {@link #getMemStoreSize()} unless we are carrying snapshots and then it will be the size of
261    * outstanding snapshots.
262    */
263   long getFlushableSize();
264 
265   HColumnDescriptor getFamily();
266 
267   /**
268    * @return The maximum memstoreTS in all store files.
269    */
270   long getMaxMemstoreTS();
271 
272   /**
273    * @return the data block encoder
274    */
275   HFileDataBlockEncoder getDataBlockEncoder();
276 
277   /** @return aggregate size of all HStores used in the last compaction */
278   long getLastCompactSize();
279 
280   /** @return aggregate size of HStore */
281   long getSize();
282 
283   /**
284    * @return Count of store files
285    */
286   int getStorefilesCount();
287 
288   /**
289    * @return The size of the store files, in bytes, uncompressed.
290    */
291   long getStoreSizeUncompressed();
292 
293   /**
294    * @return The size of the store files, in bytes.
295    */
296   long getStorefilesSize();
297 
298   /**
299    * @return The size of the store file indexes, in bytes.
300    */
301   long getStorefilesIndexSize();
302 
303   /**
304    * Returns the total size of all index blocks in the data block indexes, including the root level,
305    * intermediate levels, and the leaf level for multi-level indexes, or just the root level for
306    * single-level indexes.
307    * @return the total size of block indexes in the store
308    */
309   long getTotalStaticIndexSize();
310 
311   /**
312    * Returns the total byte size of all Bloom filter bit arrays. For compound Bloom filters even the
313    * Bloom blocks currently not loaded into the block cache are counted.
314    * @return the total size of all Bloom filters in the store
315    */
316   long getTotalStaticBloomSize();
317 
318   // Test-helper methods
319 
320   /**
321    * Used for tests.
322    * @return cache configuration for this Store.
323    */
324   CacheConfig getCacheConfig();
325 
326   /**
327    * @return the parent region info hosting this store
328    */
329   HRegionInfo getRegionInfo();
330 
331   RegionCoprocessorHost getCoprocessorHost();
332 
333   boolean areWritesEnabled();
334 
335   /**
336    * @return The smallest mvcc readPoint across all the scanners in this
337    * region. Writes older than this readPoint, are included  in every
338    * read operation.
339    */
340   long getSmallestReadPoint();
341 
342   String getColumnFamilyName();
343 
344   TableName getTableName();
345 
346   /*
347    * @param o Observer who wants to know about changes in set of Readers
348    */
349   void addChangedReaderObserver(ChangedReadersObserver o);
350 
351   /*
352    * @param o Observer no longer interested in changes in set of Readers.
353    */
354   void deleteChangedReaderObserver(ChangedReadersObserver o);
355 
356   /**
357    * @return Whether this store has too many store files.
358    */
359   boolean hasTooManyStoreFiles();
360 }