1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hbase.regionserver; 19 20 import java.io.IOException; 21 import java.util.Collection; 22 import java.util.List; 23 import java.util.NavigableSet; 24 25 import org.apache.hadoop.hbase.classification.InterfaceAudience; 26 import org.apache.hadoop.hbase.classification.InterfaceStability; 27 import org.apache.hadoop.fs.FileSystem; 28 import org.apache.hadoop.fs.Path; 29 import org.apache.hadoop.hbase.Cell; 30 import org.apache.hadoop.hbase.HBaseInterfaceAudience; 31 import org.apache.hadoop.hbase.HColumnDescriptor; 32 import org.apache.hadoop.hbase.HRegionInfo; 33 import org.apache.hadoop.hbase.KeyValue; 34 import org.apache.hadoop.hbase.TableName; 35 import org.apache.hadoop.hbase.client.Scan; 36 import org.apache.hadoop.hbase.conf.PropagatingConfigurationObserver; 37 import org.apache.hadoop.hbase.io.HeapSize; 38 import org.apache.hadoop.hbase.io.compress.Compression; 39 import org.apache.hadoop.hbase.io.hfile.CacheConfig; 40 import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder; 41 import org.apache.hadoop.hbase.protobuf.generated.WALProtos.CompactionDescriptor; 42 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; 43 import org.apache.hadoop.hbase.regionserver.compactions.CompactionProgress; 44 import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest; 45 import org.apache.hadoop.hbase.regionserver.compactions.CompactionThroughputController; 46 import org.apache.hadoop.hbase.security.User; 47 import org.apache.hadoop.hbase.util.Pair; 48 49 /** 50 * Interface for objects that hold a column family in a Region. Its a memstore and a set of zero or 51 * more StoreFiles, which stretch backwards over time. 52 */ 53 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC) 54 @InterfaceStability.Evolving 55 public interface Store extends HeapSize, StoreConfigInformation, PropagatingConfigurationObserver { 56 57 /* The default priority for user-specified compaction requests. 58 * The user gets top priority unless we have blocking compactions. (Pri <= 0) 59 */ int PRIORITY_USER = 1; 60 int NO_PRIORITY = Integer.MIN_VALUE; 61 62 // General Accessors 63 KeyValue.KVComparator getComparator(); 64 65 Collection<StoreFile> getStorefiles(); 66 67 /** 68 * Close all the readers We don't need to worry about subsequent requests because the Region 69 * holds a write lock that will prevent any more reads or writes. 70 * @return the {@link StoreFile StoreFiles} that were previously being used. 71 * @throws IOException on failure 72 */ 73 Collection<StoreFile> close() throws IOException; 74 75 /** 76 * Return a scanner for both the memstore and the HStore files. Assumes we are not in a 77 * compaction. 78 * @param scan Scan to apply when scanning the stores 79 * @param targetCols columns to scan 80 * @return a scanner over the current key values 81 * @throws IOException on failure 82 */ 83 KeyValueScanner getScanner(Scan scan, final NavigableSet<byte[]> targetCols, long readPt) 84 throws IOException; 85 86 /** 87 * Get all scanners with no filtering based on TTL (that happens further down 88 * the line). 89 * @param cacheBlocks 90 * @param isGet 91 * @param usePread 92 * @param isCompaction 93 * @param matcher 94 * @param startRow 95 * @param stopRow 96 * @param readPt 97 * @return all scanners for this store 98 */ 99 List<KeyValueScanner> getScanners( 100 boolean cacheBlocks, 101 boolean isGet, 102 boolean usePread, 103 boolean isCompaction, 104 ScanQueryMatcher matcher, 105 byte[] startRow, 106 byte[] stopRow, 107 long readPt 108 ) throws IOException; 109 110 ScanInfo getScanInfo(); 111 112 /** 113 * Adds or replaces the specified KeyValues. 114 * <p> 115 * For each KeyValue specified, if a cell with the same row, family, and qualifier exists in 116 * MemStore, it will be replaced. Otherwise, it will just be inserted to MemStore. 117 * <p> 118 * This operation is atomic on each KeyValue (row/family/qualifier) but not necessarily atomic 119 * across all of them. 120 * @param cells 121 * @param readpoint readpoint below which we can safely remove duplicate KVs 122 * @return memstore size delta 123 * @throws IOException 124 */ 125 long upsert(Iterable<Cell> cells, long readpoint) throws IOException; 126 127 /** 128 * Adds a value to the memstore 129 * @param cell 130 * @return memstore size delta & newly added KV which maybe different than the passed in KV 131 */ 132 Pair<Long, Cell> add(Cell cell); 133 134 /** 135 * When was the last edit done in the memstore 136 */ 137 long timeOfOldestEdit(); 138 139 /** 140 * Removes a Cell from the memstore. The Cell is removed only if its key & memstoreTS match the 141 * key & memstoreTS value of the cell parameter. 142 * @param cell 143 */ 144 void rollback(final Cell cell); 145 146 /** 147 * Find the key that matches <i>row</i> exactly, or the one that immediately precedes it. WARNING: 148 * Only use this method on a table where writes occur with strictly increasing timestamps. This 149 * method assumes this pattern of writes in order to make it reasonably performant. Also our 150 * search is dependent on the axiom that deletes are for cells that are in the container that 151 * follows whether a memstore snapshot or a storefile, not for the current container: i.e. we'll 152 * see deletes before we come across cells we are to delete. Presumption is that the 153 * memstore#kvset is processed before memstore#snapshot and so on. 154 * @param row The row key of the targeted row. 155 * @return Found Cell or null if none found. 156 * @throws IOException 157 */ 158 Cell getRowKeyAtOrBefore(final byte[] row) throws IOException; 159 160 FileSystem getFileSystem(); 161 162 /* 163 * @param maxKeyCount 164 * @param compression Compression algorithm to use 165 * @param isCompaction whether we are creating a new file in a compaction 166 * @param includeMVCCReadpoint whether we should out the MVCC readpoint 167 * @return Writer for a new StoreFile in the tmp dir. 168 */ 169 StoreFile.Writer createWriterInTmp( 170 long maxKeyCount, 171 Compression.Algorithm compression, 172 boolean isCompaction, 173 boolean includeMVCCReadpoint, 174 boolean includesTags 175 ) throws IOException; 176 177 // Compaction oriented methods 178 179 boolean throttleCompaction(long compactionSize); 180 181 /** 182 * getter for CompactionProgress object 183 * @return CompactionProgress object; can be null 184 */ 185 CompactionProgress getCompactionProgress(); 186 187 CompactionContext requestCompaction() throws IOException; 188 189 /** 190 * @deprecated see requestCompaction(int, CompactionRequest, User) 191 */ 192 @Deprecated 193 CompactionContext requestCompaction(int priority, CompactionRequest baseRequest) 194 throws IOException; 195 196 CompactionContext requestCompaction(int priority, CompactionRequest baseRequest, User user) 197 throws IOException; 198 199 void cancelRequestedCompaction(CompactionContext compaction); 200 201 /** 202 * @deprecated see compact(CompactionContext, CompactionThroughputController, User) 203 */ 204 @Deprecated 205 List<StoreFile> compact(CompactionContext compaction, 206 CompactionThroughputController throughputController) throws IOException; 207 208 List<StoreFile> compact(CompactionContext compaction, 209 CompactionThroughputController throughputController, User user) throws IOException; 210 211 /** 212 * @return true if we should run a major compaction. 213 */ 214 boolean isMajorCompaction() throws IOException; 215 216 void triggerMajorCompaction(); 217 218 /** 219 * See if there's too much store files in this store 220 * @return true if number of store files is greater than the number defined in minFilesToCompact 221 */ 222 boolean needsCompaction(); 223 224 int getCompactPriority(); 225 226 StoreFlushContext createFlushContext(long cacheFlushId); 227 228 /** 229 * Call to complete a compaction. Its for the case where we find in the WAL a compaction 230 * that was not finished. We could find one recovering a WAL after a regionserver crash. 231 * See HBASE-2331. 232 * @param compaction the descriptor for compaction 233 * @param pickCompactionFiles whether or not pick up the new compaction output files and 234 * add it to the store 235 * @param removeFiles whether to remove/archive files from filesystem 236 */ 237 void replayCompactionMarker(CompactionDescriptor compaction, boolean pickCompactionFiles, 238 boolean removeFiles) 239 throws IOException; 240 241 // Split oriented methods 242 243 boolean canSplit(); 244 245 /** 246 * Determines if Store should be split 247 * @return byte[] if store should be split, null otherwise. 248 */ 249 byte[] getSplitPoint(); 250 251 // Bulk Load methods 252 253 /** 254 * This throws a WrongRegionException if the HFile does not fit in this region, or an 255 * InvalidHFileException if the HFile is not valid. 256 */ 257 void assertBulkLoadHFileOk(Path srcPath) throws IOException; 258 259 /** 260 * This method should only be called from Region. It is assumed that the ranges of values in the 261 * HFile fit within the stores assigned region. (assertBulkLoadHFileOk checks this) 262 * 263 * @param srcPathStr 264 * @param sequenceId sequence Id associated with the HFile 265 */ 266 Path bulkLoadHFile(String srcPathStr, long sequenceId) throws IOException; 267 268 // General accessors into the state of the store 269 // TODO abstract some of this out into a metrics class 270 271 /** 272 * @return <tt>true</tt> if the store has any underlying reference files to older HFiles 273 */ 274 boolean hasReferences(); 275 276 /** 277 * @return The size of this store's memstore, in bytes 278 */ 279 long getMemStoreSize(); 280 281 /** 282 * @return The amount of memory we could flush from this memstore; usually this is equal to 283 * {@link #getMemStoreSize()} unless we are carrying snapshots and then it will be the size of 284 * outstanding snapshots. 285 */ 286 long getFlushableSize(); 287 288 /** 289 * Returns the memstore snapshot size 290 * @return size of the memstore snapshot 291 */ 292 long getSnapshotSize(); 293 294 HColumnDescriptor getFamily(); 295 296 /** 297 * @return The maximum sequence id in all store files. 298 */ 299 long getMaxSequenceId(); 300 301 /** 302 * @return The maximum memstoreTS in all store files. 303 */ 304 long getMaxMemstoreTS(); 305 306 /** 307 * @return the data block encoder 308 */ 309 HFileDataBlockEncoder getDataBlockEncoder(); 310 311 /** @return aggregate size of all HStores used in the last compaction */ 312 long getLastCompactSize(); 313 314 /** @return aggregate size of HStore */ 315 long getSize(); 316 317 /** 318 * @return Count of store files 319 */ 320 int getStorefilesCount(); 321 322 /** 323 * @return The size of the store files, in bytes, uncompressed. 324 */ 325 long getStoreSizeUncompressed(); 326 327 /** 328 * @return The size of the store files, in bytes. 329 */ 330 long getStorefilesSize(); 331 332 /** 333 * @return The size of the store file indexes, in bytes. 334 */ 335 long getStorefilesIndexSize(); 336 337 /** 338 * Returns the total size of all index blocks in the data block indexes, including the root level, 339 * intermediate levels, and the leaf level for multi-level indexes, or just the root level for 340 * single-level indexes. 341 * @return the total size of block indexes in the store 342 */ 343 long getTotalStaticIndexSize(); 344 345 /** 346 * Returns the total byte size of all Bloom filter bit arrays. For compound Bloom filters even the 347 * Bloom blocks currently not loaded into the block cache are counted. 348 * @return the total size of all Bloom filters in the store 349 */ 350 long getTotalStaticBloomSize(); 351 352 // Test-helper methods 353 354 /** 355 * Used for tests. 356 * @return cache configuration for this Store. 357 */ 358 CacheConfig getCacheConfig(); 359 360 /** 361 * @return the parent region info hosting this store 362 */ 363 HRegionInfo getRegionInfo(); 364 365 RegionCoprocessorHost getCoprocessorHost(); 366 367 boolean areWritesEnabled(); 368 369 /** 370 * @return The smallest mvcc readPoint across all the scanners in this 371 * region. Writes older than this readPoint, are included in every 372 * read operation. 373 */ 374 long getSmallestReadPoint(); 375 376 String getColumnFamilyName(); 377 378 TableName getTableName(); 379 380 /** 381 * @return The number of cells flushed to disk 382 */ 383 long getFlushedCellsCount(); 384 385 /** 386 * @return The total size of data flushed to disk, in bytes 387 */ 388 long getFlushedCellsSize(); 389 390 /** 391 * @return The number of cells processed during minor compactions 392 */ 393 long getCompactedCellsCount(); 394 395 /** 396 * @return The total amount of data processed during minor compactions, in bytes 397 */ 398 long getCompactedCellsSize(); 399 400 /** 401 * @return The number of cells processed during major compactions 402 */ 403 long getMajorCompactedCellsCount(); 404 405 /** 406 * @return The total amount of data processed during major compactions, in bytes 407 */ 408 long getMajorCompactedCellsSize(); 409 410 /* 411 * @param o Observer who wants to know about changes in set of Readers 412 */ 413 void addChangedReaderObserver(ChangedReadersObserver o); 414 415 /* 416 * @param o Observer no longer interested in changes in set of Readers. 417 */ 418 void deleteChangedReaderObserver(ChangedReadersObserver o); 419 420 /** 421 * @return Whether this store has too many store files. 422 */ 423 boolean hasTooManyStoreFiles(); 424 425 /** 426 * Checks the underlying store files, and opens the files that have not 427 * been opened, and removes the store file readers for store files no longer 428 * available. Mainly used by secondary region replicas to keep up to date with 429 * the primary region files. 430 * @throws IOException 431 */ 432 void refreshStoreFiles() throws IOException; 433 434 /** 435 * This value can represent the degree of emergency of compaction for this store. It should be 436 * greater than or equal to 0.0, any value greater than 1.0 means we have too many store files. 437 * <ul> 438 * <li>if getStorefilesCount <= getMinFilesToCompact, return 0.0</li> 439 * <li>return (getStorefilesCount - getMinFilesToCompact) / (blockingFileCount - 440 * getMinFilesToCompact)</li> 441 * </ul> 442 * <p> 443 * And for striped stores, we should calculate this value by the files in each stripe separately 444 * and return the maximum value. 445 * <p> 446 * It is similar to {@link #getCompactPriority()} except that it is more suitable to use in a 447 * linear formula. 448 */ 449 double getCompactionPressure(); 450 451 /** 452 * Replaces the store files that the store has with the given files. Mainly used by 453 * secondary region replicas to keep up to date with 454 * the primary region files. 455 * @throws IOException 456 */ 457 void refreshStoreFiles(Collection<String> newFiles) throws IOException; 458 459 void bulkLoadHFile(StoreFileInfo fileInfo) throws IOException; 460 }