1 /* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 package org.apache.hadoop.hbase.regionserver; 19 20 import java.io.IOException; 21 import java.util.Collection; 22 import java.util.List; 23 import java.util.Map; 24 25 import org.apache.hadoop.hbase.Cell; 26 import org.apache.hadoop.hbase.HBaseInterfaceAudience; 27 import org.apache.hadoop.hbase.HDFSBlocksDistribution; 28 import org.apache.hadoop.hbase.HRegionInfo; 29 import org.apache.hadoop.hbase.HTableDescriptor; 30 import org.apache.hadoop.hbase.classification.InterfaceAudience; 31 import org.apache.hadoop.hbase.classification.InterfaceStability; 32 import org.apache.hadoop.hbase.client.Append; 33 import org.apache.hadoop.hbase.client.Delete; 34 import org.apache.hadoop.hbase.client.Get; 35 import org.apache.hadoop.hbase.client.Increment; 36 import org.apache.hadoop.hbase.client.IsolationLevel; 37 import org.apache.hadoop.hbase.client.Mutation; 38 import org.apache.hadoop.hbase.client.Put; 39 import org.apache.hadoop.hbase.client.Result; 40 import org.apache.hadoop.hbase.client.RowMutations; 41 import org.apache.hadoop.hbase.client.Scan; 42 import org.apache.hadoop.hbase.conf.ConfigurationObserver; 43 import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException; 44 import org.apache.hadoop.hbase.filter.ByteArrayComparable; 45 import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; 46 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState; 47 import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall; 48 import org.apache.hadoop.hbase.util.Pair; 49 import org.apache.hadoop.hbase.wal.WALSplitter.MutationReplay; 50 51 import com.google.protobuf.Message; 52 import com.google.protobuf.RpcController; 53 import com.google.protobuf.Service; 54 55 /** 56 * Regions store data for a certain region of a table. It stores all columns 57 * for each row. A given table consists of one or more Regions. 58 * 59 * <p>An Region is defined by its table and its key extent. 60 * 61 * <p>Locking at the Region level serves only one purpose: preventing the 62 * region from being closed (and consequently split) while other operations 63 * are ongoing. Each row level operation obtains both a row lock and a region 64 * read lock for the duration of the operation. While a scanner is being 65 * constructed, getScanner holds a read lock. If the scanner is successfully 66 * constructed, it holds a read lock until it is closed. A close takes out a 67 * write lock and consequently will block for ongoing operations and will block 68 * new operations from starting while the close is in progress. 69 */ 70 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC) 71 @InterfaceStability.Evolving 72 public interface Region extends ConfigurationObserver { 73 74 /////////////////////////////////////////////////////////////////////////// 75 // Region state 76 77 /** @return region information for this region */ 78 HRegionInfo getRegionInfo(); 79 80 /** @return table descriptor for this region */ 81 HTableDescriptor getTableDesc(); 82 83 /** @return true if region is available (not closed and not closing) */ 84 boolean isAvailable(); 85 86 /** @return true if region is closed */ 87 boolean isClosed(); 88 89 /** @return True if closing process has started */ 90 boolean isClosing(); 91 92 /** @return True if region is in recovering state */ 93 boolean isRecovering(); 94 95 /** @return True if region is read only */ 96 boolean isReadOnly(); 97 98 /** 99 * Return the list of Stores managed by this region 100 * <p>Use with caution. Exposed for use of fixup utilities. 101 * @return a list of the Stores managed by this region 102 */ 103 List<Store> getStores(); 104 105 /** 106 * Return the Store for the given family 107 * <p>Use with caution. Exposed for use of fixup utilities. 108 * @return the Store for the given family 109 */ 110 Store getStore(byte[] family); 111 112 /** @return list of store file names for the given families */ 113 List<String> getStoreFileList(byte [][] columns); 114 115 /** 116 * Check the region's underlying store files, open the files that have not 117 * been opened yet, and remove the store file readers for store files no 118 * longer available. 119 * @throws IOException 120 */ 121 boolean refreshStoreFiles() throws IOException; 122 123 /** @return the latest sequence number that was read from storage when this region was opened */ 124 long getOpenSeqNum(); 125 126 /** @return the max sequence id of flushed data on this region */ 127 long getMaxFlushedSeqId(); 128 129 /** @return the oldest sequence id found in the store for the given family */ 130 public long getOldestSeqIdOfStore(byte[] familyName); 131 132 /** 133 * This can be used to determine the last time all files of this region were major compacted. 134 * @param majorCompactioOnly Only consider HFile that are the result of major compaction 135 * @return the timestamp of the oldest HFile for all stores of this region 136 */ 137 long getOldestHfileTs(boolean majorCompactioOnly) throws IOException; 138 139 /** 140 * @return map of column family names to max sequence id that was read from storage when this 141 * region was opened 142 */ 143 public Map<byte[], Long> getMaxStoreSeqId(); 144 145 /** @return true if loading column families on demand by default */ 146 boolean isLoadingCfsOnDemandDefault(); 147 148 /** @return readpoint considering given IsolationLevel */ 149 long getReadpoint(IsolationLevel isolationLevel); 150 151 /** 152 * @return The earliest time a store in the region was flushed. All 153 * other stores in the region would have been flushed either at, or 154 * after this time. 155 */ 156 long getEarliestFlushTimeForAllStores(); 157 158 /////////////////////////////////////////////////////////////////////////// 159 // Metrics 160 161 /** @return read requests count for this region */ 162 long getReadRequestsCount(); 163 164 /** 165 * Update the read request count for this region 166 * @param i increment 167 */ 168 void updateReadRequestsCount(long i); 169 170 /** @return write request count for this region */ 171 long getWriteRequestsCount(); 172 173 /** 174 * Update the write request count for this region 175 * @param i increment 176 */ 177 void updateWriteRequestsCount(long i); 178 179 /** @return memstore size for this region, in bytes */ 180 long getMemstoreSize(); 181 182 /** @return the number of mutations processed bypassing the WAL */ 183 long getNumMutationsWithoutWAL(); 184 185 /** @return the size of data processed bypassing the WAL, in bytes */ 186 long getDataInMemoryWithoutWAL(); 187 188 /** @return the number of blocked requests */ 189 long getBlockedRequestsCount(); 190 191 /** @return the number of checkAndMutate guards that passed */ 192 long getCheckAndMutateChecksPassed(); 193 194 /** @return the number of failed checkAndMutate guards */ 195 long getCheckAndMutateChecksFailed(); 196 197 /** @return the MetricsRegion for this region */ 198 MetricsRegion getMetrics(); 199 200 /** @return the block distribution for all Stores managed by this region */ 201 HDFSBlocksDistribution getHDFSBlocksDistribution(); 202 203 /////////////////////////////////////////////////////////////////////////// 204 // Locking 205 206 // Region read locks 207 208 /** 209 * Operation enum is used in {@link Region#startRegionOperation} to provide context for 210 * various checks before any region operation begins. 211 */ 212 enum Operation { 213 ANY, GET, PUT, DELETE, SCAN, APPEND, INCREMENT, SPLIT_REGION, MERGE_REGION, BATCH_MUTATE, 214 REPLAY_BATCH_MUTATE, COMPACT_REGION, REPLAY_EVENT 215 } 216 217 /** 218 * This method needs to be called before any public call that reads or 219 * modifies data. 220 * Acquires a read lock and checks if the region is closing or closed. 221 * <p>{@link #closeRegionOperation} MUST then always be called after 222 * the operation has completed, whether it succeeded or failed. 223 * @throws IOException 224 */ 225 void startRegionOperation() throws IOException; 226 227 /** 228 * This method needs to be called before any public call that reads or 229 * modifies data. 230 * Acquires a read lock and checks if the region is closing or closed. 231 * <p>{@link #closeRegionOperation} MUST then always be called after 232 * the operation has completed, whether it succeeded or failed. 233 * @param op The operation is about to be taken on the region 234 * @throws IOException 235 */ 236 void startRegionOperation(Operation op) throws IOException; 237 238 /** 239 * Closes the region operation lock. 240 * @throws IOException 241 */ 242 void closeRegionOperation() throws IOException; 243 244 // Row write locks 245 246 /** 247 * Row lock held by a given thread. 248 * One thread may acquire multiple locks on the same row simultaneously. 249 * The locks must be released by calling release() from the same thread. 250 */ 251 public interface RowLock { 252 /** 253 * Release the given lock. If there are no remaining locks held by the current thread 254 * then unlock the row and allow other threads to acquire the lock. 255 * @throws IllegalArgumentException if called by a different thread than the lock owning 256 * thread 257 */ 258 void release(); 259 } 260 261 /** 262 * Tries to acquire a lock on the given row. 263 * @param waitForLock if true, will block until the lock is available. 264 * Otherwise, just tries to obtain the lock and returns 265 * false if unavailable. 266 * @return the row lock if acquired, 267 * null if waitForLock was false and the lock was not acquired 268 * @throws IOException if waitForLock was true and the lock could not be acquired after waiting 269 */ 270 RowLock getRowLock(byte[] row, boolean waitForLock) throws IOException; 271 272 /** 273 * If the given list of row locks is not null, releases all locks. 274 */ 275 void releaseRowLocks(List<RowLock> rowLocks); 276 277 /////////////////////////////////////////////////////////////////////////// 278 // Region operations 279 280 /** 281 * Perform one or more append operations on a row. 282 * @param append 283 * @param nonceGroup 284 * @param nonce 285 * @return result of the operation 286 * @throws IOException 287 */ 288 Result append(Append append, long nonceGroup, long nonce) throws IOException; 289 290 /** 291 * Perform a batch of mutations. 292 * <p> 293 * Note this supports only Put and Delete mutations and will ignore other types passed. 294 * @param mutations the list of mutations 295 * @param nonceGroup 296 * @param nonce 297 * @return an array of OperationStatus which internally contains the 298 * OperationStatusCode and the exceptionMessage if any. 299 * @throws IOException 300 */ 301 OperationStatus[] batchMutate(Mutation[] mutations, long nonceGroup, long nonce) 302 throws IOException; 303 304 /** 305 * Replay a batch of mutations. 306 * @param mutations mutations to replay. 307 * @param replaySeqId 308 * @return an array of OperationStatus which internally contains the 309 * OperationStatusCode and the exceptionMessage if any. 310 * @throws IOException 311 */ 312 OperationStatus[] batchReplay(MutationReplay[] mutations, long replaySeqId) throws IOException; 313 314 /** 315 * Atomically checks if a row/family/qualifier value matches the expected val 316 * If it does, it performs the row mutations. If the passed value is null, t 317 * is for the lack of column (ie: non-existence) 318 * @param row to check 319 * @param family column family to check 320 * @param qualifier column qualifier to check 321 * @param compareOp the comparison operator 322 * @param comparator 323 * @param mutation 324 * @param writeToWAL 325 * @return true if mutation was applied, false otherwise 326 * @throws IOException 327 */ 328 boolean checkAndMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp, 329 ByteArrayComparable comparator, Mutation mutation, boolean writeToWAL) throws IOException; 330 331 /** 332 * Atomically checks if a row/family/qualifier value matches the expected val 333 * If it does, it performs the row mutations. If the passed value is null, t 334 * is for the lack of column (ie: non-existence) 335 * @param row to check 336 * @param family column family to check 337 * @param qualifier column qualifier to check 338 * @param compareOp the comparison operator 339 * @param comparator 340 * @param mutations 341 * @param writeToWAL 342 * @return true if mutation was applied, false otherwise 343 * @throws IOException 344 */ 345 boolean checkAndRowMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp, 346 ByteArrayComparable comparator, RowMutations mutations, boolean writeToWAL) 347 throws IOException; 348 349 /** 350 * Deletes the specified cells/row. 351 * @param delete 352 * @throws IOException 353 */ 354 void delete(Delete delete) throws IOException; 355 356 /** 357 * Do a get based on the get parameter. 358 * @param get query parameters 359 * @return result of the operation 360 */ 361 Result get(Get get) throws IOException; 362 363 /** 364 * Do a get based on the get parameter. 365 * @param get query parameters 366 * @param withCoprocessor invoke coprocessor or not. We don't want to 367 * always invoke cp. 368 * @return list of cells resulting from the operation 369 */ 370 List<Cell> get(Get get, boolean withCoprocessor) throws IOException; 371 372 /** 373 * Return all the data for the row that matches <i>row</i> exactly, 374 * or the one that immediately preceeds it, at or immediately before 375 * <i>ts</i>. 376 * @param row 377 * @param family 378 * @return result of the operation 379 * @throws IOException 380 */ 381 Result getClosestRowBefore(byte[] row, byte[] family) throws IOException; 382 383 /** 384 * Return an iterator that scans over the HRegion, returning the indicated 385 * columns and rows specified by the {@link Scan}. 386 * <p> 387 * This Iterator must be closed by the caller. 388 * 389 * @param scan configured {@link Scan} 390 * @return RegionScanner 391 * @throws IOException read exceptions 392 */ 393 RegionScanner getScanner(Scan scan) throws IOException; 394 395 /** 396 * Perform one or more increment operations on a row. 397 * @param increment 398 * @param nonceGroup 399 * @param nonce 400 * @return result of the operation 401 * @throws IOException 402 */ 403 Result increment(Increment increment, long nonceGroup, long nonce) throws IOException; 404 405 /** 406 * Performs multiple mutations atomically on a single row. Currently 407 * {@link Put} and {@link Delete} are supported. 408 * 409 * @param mutations object that specifies the set of mutations to perform atomically 410 * @throws IOException 411 */ 412 void mutateRow(RowMutations mutations) throws IOException; 413 414 /** 415 * Perform atomic mutations within the region. 416 * 417 * @param mutations The list of mutations to perform. 418 * <code>mutations</code> can contain operations for multiple rows. 419 * Caller has to ensure that all rows are contained in this region. 420 * @param rowsToLock Rows to lock 421 * @param nonceGroup Optional nonce group of the operation (client Id) 422 * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence") 423 * If multiple rows are locked care should be taken that 424 * <code>rowsToLock</code> is sorted in order to avoid deadlocks. 425 * @throws IOException 426 */ 427 void mutateRowsWithLocks(Collection<Mutation> mutations, Collection<byte[]> rowsToLock, 428 long nonceGroup, long nonce) throws IOException; 429 430 /** 431 * Performs atomic multiple reads and writes on a given row. 432 * 433 * @param processor The object defines the reads and writes to a row. 434 */ 435 void processRowsWithLocks(RowProcessor<?,?> processor) throws IOException; 436 437 /** 438 * Performs atomic multiple reads and writes on a given row. 439 * 440 * @param processor The object defines the reads and writes to a row. 441 * @param nonceGroup Optional nonce group of the operation (client Id) 442 * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence") 443 */ 444 void processRowsWithLocks(RowProcessor<?,?> processor, long nonceGroup, long nonce) 445 throws IOException; 446 447 /** 448 * Performs atomic multiple reads and writes on a given row. 449 * 450 * @param processor The object defines the reads and writes to a row. 451 * @param timeout The timeout of the processor.process() execution 452 * Use a negative number to switch off the time bound 453 * @param nonceGroup Optional nonce group of the operation (client Id) 454 * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence") 455 */ 456 void processRowsWithLocks(RowProcessor<?,?> processor, long timeout, long nonceGroup, long nonce) 457 throws IOException; 458 459 /** 460 * Puts some data in the table. 461 * @param put 462 * @throws IOException 463 */ 464 void put(Put put) throws IOException; 465 466 /** 467 * Listener class to enable callers of 468 * bulkLoadHFile() to perform any necessary 469 * pre/post processing of a given bulkload call 470 */ 471 interface BulkLoadListener { 472 473 /** 474 * Called before an HFile is actually loaded 475 * @param family family being loaded to 476 * @param srcPath path of HFile 477 * @return final path to be used for actual loading 478 * @throws IOException 479 */ 480 String prepareBulkLoad(byte[] family, String srcPath) throws IOException; 481 482 /** 483 * Called after a successful HFile load 484 * @param family family being loaded to 485 * @param srcPath path of HFile 486 * @throws IOException 487 */ 488 void doneBulkLoad(byte[] family, String srcPath) throws IOException; 489 490 /** 491 * Called after a failed HFile load 492 * @param family family being loaded to 493 * @param srcPath path of HFile 494 * @throws IOException 495 */ 496 void failedBulkLoad(byte[] family, String srcPath) throws IOException; 497 } 498 499 /** 500 * Attempts to atomically load a group of hfiles. This is critical for loading 501 * rows with multiple column families atomically. 502 * 503 * @param familyPaths List of Pair<byte[] column family, String hfilePath> 504 * @param bulkLoadListener Internal hooks enabling massaging/preparation of a 505 * file about to be bulk loaded 506 * @param assignSeqId 507 * @return true if successful, false if failed recoverably 508 * @throws IOException if failed unrecoverably. 509 */ 510 boolean bulkLoadHFiles(Collection<Pair<byte[], String>> familyPaths, boolean assignSeqId, 511 BulkLoadListener bulkLoadListener) throws IOException; 512 513 /////////////////////////////////////////////////////////////////////////// 514 // Coprocessors 515 516 /** @return the coprocessor host */ 517 RegionCoprocessorHost getCoprocessorHost(); 518 519 /** 520 * Executes a single protocol buffer coprocessor endpoint {@link Service} method using 521 * the registered protocol handlers. {@link Service} implementations must be registered via the 522 * {@link Region#registerService(com.google.protobuf.Service)} 523 * method before they are available. 524 * 525 * @param controller an {@code RpcContoller} implementation to pass to the invoked service 526 * @param call a {@code CoprocessorServiceCall} instance identifying the service, method, 527 * and parameters for the method invocation 528 * @return a protocol buffer {@code Message} instance containing the method's result 529 * @throws IOException if no registered service handler is found or an error 530 * occurs during the invocation 531 * @see org.apache.hadoop.hbase.regionserver.Region#registerService(com.google.protobuf.Service) 532 */ 533 Message execService(RpcController controller, CoprocessorServiceCall call) throws IOException; 534 535 /** 536 * Registers a new protocol buffer {@link Service} subclass as a coprocessor endpoint to 537 * be available for handling 538 * {@link Region#execService(com.google.protobuf.RpcController, 539 * org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall)}} calls. 540 * 541 * <p> 542 * Only a single instance may be registered per region for a given {@link Service} subclass (the 543 * instances are keyed on {@link com.google.protobuf.Descriptors.ServiceDescriptor#getFullName()}. 544 * After the first registration, subsequent calls with the same service name will fail with 545 * a return value of {@code false}. 546 * </p> 547 * @param instance the {@code Service} subclass instance to expose as a coprocessor endpoint 548 * @return {@code true} if the registration was successful, {@code false} 549 * otherwise 550 */ 551 boolean registerService(Service instance); 552 553 /////////////////////////////////////////////////////////////////////////// 554 // RowMutation processor support 555 556 /** 557 * Check the collection of families for validity. 558 * @param families 559 * @throws NoSuchColumnFamilyException 560 */ 561 void checkFamilies(Collection<byte[]> families) throws NoSuchColumnFamilyException; 562 563 /** 564 * Check the collection of families for valid timestamps 565 * @param familyMap 566 * @param now current timestamp 567 * @throws FailedSanityCheckException 568 */ 569 void checkTimestamps(Map<byte[], List<Cell>> familyMap, long now) 570 throws FailedSanityCheckException; 571 572 /** 573 * Prepare a delete for a row mutation processor 574 * @param delete The passed delete is modified by this method. WARNING! 575 * @throws IOException 576 */ 577 void prepareDelete(Delete delete) throws IOException; 578 579 /** 580 * Set up correct timestamps in the KVs in Delete object. 581 * <p>Caller should have the row and region locks. 582 * @param mutation 583 * @param familyCellMap 584 * @param now 585 * @throws IOException 586 */ 587 void prepareDeleteTimestamps(Mutation mutation, Map<byte[], List<Cell>> familyCellMap, 588 byte[] now) throws IOException; 589 590 /** 591 * Replace any cell timestamps set to HConstants#LATEST_TIMESTAMP with the 592 * provided current timestamp. 593 * @param values 594 * @param now 595 */ 596 void updateCellTimestamps(final Iterable<List<Cell>> values, final byte[] now) 597 throws IOException; 598 599 /////////////////////////////////////////////////////////////////////////// 600 // Flushes, compactions, splits, etc. 601 // Wizards only, please 602 603 interface FlushResult { 604 enum Result { 605 FLUSHED_NO_COMPACTION_NEEDED, 606 FLUSHED_COMPACTION_NEEDED, 607 // Special case where a flush didn't run because there's nothing in the memstores. Used when 608 // bulk loading to know when we can still load even if a flush didn't happen. 609 CANNOT_FLUSH_MEMSTORE_EMPTY, 610 CANNOT_FLUSH 611 } 612 613 /** @return the detailed result code */ 614 Result getResult(); 615 616 /** @return true if the memstores were flushed, else false */ 617 boolean isFlushSucceeded(); 618 619 /** @return True if the flush requested a compaction, else false */ 620 boolean isCompactionNeeded(); 621 } 622 623 /** 624 * Flush the cache. 625 * 626 * <p>When this method is called the cache will be flushed unless: 627 * <ol> 628 * <li>the cache is empty</li> 629 * <li>the region is closed.</li> 630 * <li>a flush is already in progress</li> 631 * <li>writes are disabled</li> 632 * </ol> 633 * 634 * <p>This method may block for some time, so it should not be called from a 635 * time-sensitive thread. 636 * @param force whether we want to force a flush of all stores 637 * @return FlushResult indicating whether the flush was successful or not and if 638 * the region needs compacting 639 * 640 * @throws IOException general io exceptions 641 * @throws DroppedSnapshotException Thrown when abort is required. The caller MUST catch this 642 * exception and MUST abort. Any further operation to the region may cause data loss. 643 * because a snapshot was not properly persisted. 644 */ 645 FlushResult flush(boolean force) throws IOException; 646 647 /** 648 * Synchronously compact all stores in the region. 649 * <p>This operation could block for a long time, so don't call it from a 650 * time-sensitive thread. 651 * <p>Note that no locks are taken to prevent possible conflicts between 652 * compaction and splitting activities. The regionserver does not normally compact 653 * and split in parallel. However by calling this method you may introduce 654 * unexpected and unhandled concurrency. Don't do this unless you know what 655 * you are doing. 656 * 657 * @param majorCompaction True to force a major compaction regardless of thresholds 658 * @throws IOException 659 */ 660 void compact(final boolean majorCompaction) throws IOException; 661 662 /** 663 * Trigger major compaction on all stores in the region. 664 * <p> 665 * Compaction will be performed asynchronously to this call by the RegionServer's 666 * CompactSplitThread. See also {@link Store#triggerMajorCompaction()} 667 * @throws IOException 668 */ 669 void triggerMajorCompaction() throws IOException; 670 671 /** 672 * @return if a given region is in compaction now. 673 */ 674 CompactionState getCompactionState(); 675 676 /** Wait for all current flushes and compactions of the region to complete */ 677 void waitForFlushesAndCompactions(); 678 679 }