View Javadoc

1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.Collection;
22  import java.util.List;
23  import java.util.Map;
24  
25  import org.apache.hadoop.hbase.Cell;
26  import org.apache.hadoop.hbase.HBaseInterfaceAudience;
27  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
28  import org.apache.hadoop.hbase.HRegionInfo;
29  import org.apache.hadoop.hbase.HTableDescriptor;
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.hbase.classification.InterfaceStability;
32  import org.apache.hadoop.hbase.client.Append;
33  import org.apache.hadoop.hbase.client.Delete;
34  import org.apache.hadoop.hbase.client.Get;
35  import org.apache.hadoop.hbase.client.Increment;
36  import org.apache.hadoop.hbase.client.IsolationLevel;
37  import org.apache.hadoop.hbase.client.Mutation;
38  import org.apache.hadoop.hbase.client.Put;
39  import org.apache.hadoop.hbase.client.Result;
40  import org.apache.hadoop.hbase.client.RowMutations;
41  import org.apache.hadoop.hbase.client.Scan;
42  import org.apache.hadoop.hbase.conf.ConfigurationObserver;
43  import org.apache.hadoop.hbase.exceptions.FailedSanityCheckException;
44  import org.apache.hadoop.hbase.filter.ByteArrayComparable;
45  import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
46  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.GetRegionInfoResponse.CompactionState;
47  import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall;
48  import org.apache.hadoop.hbase.util.Pair;
49  import org.apache.hadoop.hbase.wal.WALSplitter.MutationReplay;
50  
51  import com.google.protobuf.Message;
52  import com.google.protobuf.RpcController;
53  import com.google.protobuf.Service;
54  
55  /**
56   * Regions store data for a certain region of a table.  It stores all columns
57   * for each row. A given table consists of one or more Regions.
58   *
59   * <p>An Region is defined by its table and its key extent.
60   *
61   * <p>Locking at the Region level serves only one purpose: preventing the
62   * region from being closed (and consequently split) while other operations
63   * are ongoing. Each row level operation obtains both a row lock and a region
64   * read lock for the duration of the operation. While a scanner is being
65   * constructed, getScanner holds a read lock. If the scanner is successfully
66   * constructed, it holds a read lock until it is closed. A close takes out a
67   * write lock and consequently will block for ongoing operations and will block
68   * new operations from starting while the close is in progress.
69   */
70  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.COPROC)
71  @InterfaceStability.Evolving
72  public interface Region extends ConfigurationObserver {
73  
74    ///////////////////////////////////////////////////////////////////////////
75    // Region state
76  
77    /** @return region information for this region */
78    HRegionInfo getRegionInfo();
79  
80    /** @return table descriptor for this region */
81    HTableDescriptor getTableDesc();
82  
83    /** @return true if region is available (not closed and not closing) */
84    boolean isAvailable();
85  
86    /** @return true if region is closed */
87    boolean isClosed();
88  
89    /** @return True if closing process has started */
90    boolean isClosing();
91  
92    /** @return True if region is in recovering state */
93    boolean isRecovering();
94  
95    /** @return True if region is read only */
96    boolean isReadOnly();
97  
98    /**
99     * Return the list of Stores managed by this region
100    * <p>Use with caution.  Exposed for use of fixup utilities.
101    * @return a list of the Stores managed by this region
102    */
103   List<Store> getStores();
104 
105   /**
106    * Return the Store for the given family
107    * <p>Use with caution.  Exposed for use of fixup utilities.
108    * @return the Store for the given family
109    */
110   Store getStore(byte[] family);
111 
112   /** @return list of store file names for the given families */
113   List<String> getStoreFileList(byte [][] columns);
114 
115   /**
116    * Check the region's underlying store files, open the files that have not
117    * been opened yet, and remove the store file readers for store files no
118    * longer available.
119    * @throws IOException
120    */
121   boolean refreshStoreFiles() throws IOException;
122 
123   /** @return the latest sequence number that was read from storage when this region was opened */
124   long getOpenSeqNum();
125 
126   /** @return the max sequence id of flushed data on this region */
127   long getMaxFlushedSeqId();
128 
129   /** @return the oldest sequence id found in the store for the given family */
130   public long getOldestSeqIdOfStore(byte[] familyName);
131 
132   /**
133    * This can be used to determine the last time all files of this region were major compacted.
134    * @param majorCompactioOnly Only consider HFile that are the result of major compaction
135    * @return the timestamp of the oldest HFile for all stores of this region
136    */
137   long getOldestHfileTs(boolean majorCompactioOnly) throws IOException;
138 
139   /**
140    * @return map of column family names to max sequence id that was read from storage when this
141    * region was opened
142    */
143   public Map<byte[], Long> getMaxStoreSeqId();
144 
145   /** @return true if loading column families on demand by default */
146   boolean isLoadingCfsOnDemandDefault();
147 
148   /** @return readpoint considering given IsolationLevel */
149   long getReadpoint(IsolationLevel isolationLevel);
150 
151   /**
152    * @return The earliest time a store in the region was flushed. All
153    *         other stores in the region would have been flushed either at, or
154    *         after this time.
155    */
156   long getEarliestFlushTimeForAllStores();
157 
158   ///////////////////////////////////////////////////////////////////////////
159   // Metrics
160 
161   /** @return read requests count for this region */
162   long getReadRequestsCount();
163 
164   /**
165    * Update the read request count for this region
166    * @param i increment
167    */
168   void updateReadRequestsCount(long i);
169 
170   /** @return write request count for this region */
171   long getWriteRequestsCount();
172 
173   /**
174    * Update the write request count for this region
175    * @param i increment
176    */
177   void updateWriteRequestsCount(long i);
178 
179   /** @return memstore size for this region, in bytes */
180   long getMemstoreSize();
181 
182   /** @return the number of mutations processed bypassing the WAL */
183   long getNumMutationsWithoutWAL();
184 
185   /** @return the size of data processed bypassing the WAL, in bytes */
186   long getDataInMemoryWithoutWAL();
187 
188   /** @return the number of blocked requests */
189   long getBlockedRequestsCount();
190 
191   /** @return the number of checkAndMutate guards that passed */
192   long getCheckAndMutateChecksPassed();
193 
194   /** @return the number of failed checkAndMutate guards */
195   long getCheckAndMutateChecksFailed();
196 
197   /** @return the MetricsRegion for this region */
198   MetricsRegion getMetrics();
199 
200   /** @return the block distribution for all Stores managed by this region */
201   HDFSBlocksDistribution getHDFSBlocksDistribution();
202 
203   ///////////////////////////////////////////////////////////////////////////
204   // Locking
205 
206   // Region read locks
207 
208   /**
209    * Operation enum is used in {@link Region#startRegionOperation} to provide context for
210    * various checks before any region operation begins.
211    */
212   enum Operation {
213     ANY, GET, PUT, DELETE, SCAN, APPEND, INCREMENT, SPLIT_REGION, MERGE_REGION, BATCH_MUTATE,
214     REPLAY_BATCH_MUTATE, COMPACT_REGION, REPLAY_EVENT
215   }
216 
217   /**
218    * This method needs to be called before any public call that reads or
219    * modifies data.
220    * Acquires a read lock and checks if the region is closing or closed.
221    * <p>{@link #closeRegionOperation} MUST then always be called after
222    * the operation has completed, whether it succeeded or failed.
223    * @throws IOException
224    */
225   void startRegionOperation() throws IOException;
226 
227   /**
228    * This method needs to be called before any public call that reads or
229    * modifies data.
230    * Acquires a read lock and checks if the region is closing or closed.
231    * <p>{@link #closeRegionOperation} MUST then always be called after
232    * the operation has completed, whether it succeeded or failed.
233    * @param op The operation is about to be taken on the region
234    * @throws IOException
235    */
236   void startRegionOperation(Operation op) throws IOException;
237 
238   /**
239    * Closes the region operation lock.
240    * @throws IOException
241    */
242   void closeRegionOperation() throws IOException;
243 
244   // Row write locks
245 
246   /**
247    * Row lock held by a given thread.
248    * One thread may acquire multiple locks on the same row simultaneously.
249    * The locks must be released by calling release() from the same thread.
250    */
251   public interface RowLock {
252     /**
253      * Release the given lock.  If there are no remaining locks held by the current thread
254      * then unlock the row and allow other threads to acquire the lock.
255      * @throws IllegalArgumentException if called by a different thread than the lock owning
256      *     thread
257      */
258     void release();
259   }
260 
261   /**
262    * Tries to acquire a lock on the given row.
263    * @param waitForLock if true, will block until the lock is available.
264    *        Otherwise, just tries to obtain the lock and returns
265    *        false if unavailable.
266    * @return the row lock if acquired,
267    *   null if waitForLock was false and the lock was not acquired
268    * @throws IOException if waitForLock was true and the lock could not be acquired after waiting
269    */
270   RowLock getRowLock(byte[] row, boolean waitForLock) throws IOException;
271 
272   /**
273    * If the given list of row locks is not null, releases all locks.
274    */
275   void releaseRowLocks(List<RowLock> rowLocks);
276 
277   ///////////////////////////////////////////////////////////////////////////
278   // Region operations
279 
280   /**
281    * Perform one or more append operations on a row.
282    * @param append
283    * @param nonceGroup
284    * @param nonce
285    * @return result of the operation
286    * @throws IOException
287    */
288   Result append(Append append, long nonceGroup, long nonce) throws IOException;
289 
290   /**
291    * Perform a batch of mutations.
292    * <p>
293    * Note this supports only Put and Delete mutations and will ignore other types passed.
294    * @param mutations the list of mutations
295    * @param nonceGroup
296    * @param nonce
297    * @return an array of OperationStatus which internally contains the
298    *         OperationStatusCode and the exceptionMessage if any.
299    * @throws IOException
300    */
301   OperationStatus[] batchMutate(Mutation[] mutations, long nonceGroup, long nonce)
302       throws IOException;
303 
304   /**
305    * Replay a batch of mutations.
306    * @param mutations mutations to replay.
307    * @param replaySeqId
308    * @return an array of OperationStatus which internally contains the
309    *         OperationStatusCode and the exceptionMessage if any.
310    * @throws IOException
311    */
312    OperationStatus[] batchReplay(MutationReplay[] mutations, long replaySeqId) throws IOException;
313 
314   /**
315    * Atomically checks if a row/family/qualifier value matches the expected val
316    * If it does, it performs the row mutations.  If the passed value is null, t
317    * is for the lack of column (ie: non-existence)
318    * @param row to check
319    * @param family column family to check
320    * @param qualifier column qualifier to check
321    * @param compareOp the comparison operator
322    * @param comparator
323    * @param mutation
324    * @param writeToWAL
325    * @return true if mutation was applied, false otherwise
326    * @throws IOException
327    */
328   boolean checkAndMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp,
329       ByteArrayComparable comparator, Mutation mutation, boolean writeToWAL) throws IOException;
330 
331   /**
332    * Atomically checks if a row/family/qualifier value matches the expected val
333    * If it does, it performs the row mutations.  If the passed value is null, t
334    * is for the lack of column (ie: non-existence)
335    * @param row to check
336    * @param family column family to check
337    * @param qualifier column qualifier to check
338    * @param compareOp the comparison operator
339    * @param comparator
340    * @param mutations
341    * @param writeToWAL
342    * @return true if mutation was applied, false otherwise
343    * @throws IOException
344    */
345   boolean checkAndRowMutate(byte [] row, byte [] family, byte [] qualifier, CompareOp compareOp,
346       ByteArrayComparable comparator, RowMutations mutations, boolean writeToWAL)
347       throws IOException;
348 
349   /**
350    * Deletes the specified cells/row.
351    * @param delete
352    * @throws IOException
353    */
354   void delete(Delete delete) throws IOException;
355 
356   /**
357    * Do a get based on the get parameter.
358    * @param get query parameters
359    * @return result of the operation
360    */
361   Result get(Get get) throws IOException;
362 
363   /**
364    * Do a get based on the get parameter.
365    * @param get query parameters
366    * @param withCoprocessor invoke coprocessor or not. We don't want to
367    * always invoke cp.
368    * @return list of cells resulting from the operation
369    */
370   List<Cell> get(Get get, boolean withCoprocessor) throws IOException;
371 
372   /**
373    * Return all the data for the row that matches <i>row</i> exactly,
374    * or the one that immediately preceeds it, at or immediately before
375    * <i>ts</i>.
376    * @param row
377    * @param family
378    * @return result of the operation
379    * @throws IOException
380    */
381   Result getClosestRowBefore(byte[] row, byte[] family) throws IOException;
382 
383   /**
384    * Return an iterator that scans over the HRegion, returning the indicated
385    * columns and rows specified by the {@link Scan}.
386    * <p>
387    * This Iterator must be closed by the caller.
388    *
389    * @param scan configured {@link Scan}
390    * @return RegionScanner
391    * @throws IOException read exceptions
392    */
393   RegionScanner getScanner(Scan scan) throws IOException;
394 
395   /**
396    * Perform one or more increment operations on a row.
397    * @param increment
398    * @param nonceGroup
399    * @param nonce
400    * @return result of the operation
401    * @throws IOException
402    */
403   Result increment(Increment increment, long nonceGroup, long nonce) throws IOException;
404 
405   /**
406    * Performs multiple mutations atomically on a single row. Currently
407    * {@link Put} and {@link Delete} are supported.
408    *
409    * @param mutations object that specifies the set of mutations to perform atomically
410    * @throws IOException
411    */
412   void mutateRow(RowMutations mutations) throws IOException;
413 
414   /**
415    * Perform atomic mutations within the region.
416    *
417    * @param mutations The list of mutations to perform.
418    * <code>mutations</code> can contain operations for multiple rows.
419    * Caller has to ensure that all rows are contained in this region.
420    * @param rowsToLock Rows to lock
421    * @param nonceGroup Optional nonce group of the operation (client Id)
422    * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
423    * If multiple rows are locked care should be taken that
424    * <code>rowsToLock</code> is sorted in order to avoid deadlocks.
425    * @throws IOException
426    */
427   void mutateRowsWithLocks(Collection<Mutation> mutations, Collection<byte[]> rowsToLock,
428       long nonceGroup, long nonce) throws IOException;
429 
430   /**
431    * Performs atomic multiple reads and writes on a given row.
432    *
433    * @param processor The object defines the reads and writes to a row.
434    */
435   void processRowsWithLocks(RowProcessor<?,?> processor) throws IOException;
436 
437   /**
438    * Performs atomic multiple reads and writes on a given row.
439    *
440    * @param processor The object defines the reads and writes to a row.
441    * @param nonceGroup Optional nonce group of the operation (client Id)
442    * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
443    */
444   void processRowsWithLocks(RowProcessor<?,?> processor, long nonceGroup, long nonce)
445       throws IOException;
446 
447   /**
448    * Performs atomic multiple reads and writes on a given row.
449    *
450    * @param processor The object defines the reads and writes to a row.
451    * @param timeout The timeout of the processor.process() execution
452    *                Use a negative number to switch off the time bound
453    * @param nonceGroup Optional nonce group of the operation (client Id)
454    * @param nonce Optional nonce of the operation (unique random id to ensure "more idempotence")
455    */
456   void processRowsWithLocks(RowProcessor<?,?> processor, long timeout, long nonceGroup, long nonce)
457       throws IOException;
458 
459   /**
460    * Puts some data in the table.
461    * @param put
462    * @throws IOException
463    */
464   void put(Put put) throws IOException;
465 
466   /**
467    * Listener class to enable callers of
468    * bulkLoadHFile() to perform any necessary
469    * pre/post processing of a given bulkload call
470    */
471   interface BulkLoadListener {
472 
473     /**
474      * Called before an HFile is actually loaded
475      * @param family family being loaded to
476      * @param srcPath path of HFile
477      * @return final path to be used for actual loading
478      * @throws IOException
479      */
480     String prepareBulkLoad(byte[] family, String srcPath) throws IOException;
481 
482     /**
483      * Called after a successful HFile load
484      * @param family family being loaded to
485      * @param srcPath path of HFile
486      * @throws IOException
487      */
488     void doneBulkLoad(byte[] family, String srcPath) throws IOException;
489 
490     /**
491      * Called after a failed HFile load
492      * @param family family being loaded to
493      * @param srcPath path of HFile
494      * @throws IOException
495      */
496     void failedBulkLoad(byte[] family, String srcPath) throws IOException;
497   }
498 
499   /**
500    * Attempts to atomically load a group of hfiles.  This is critical for loading
501    * rows with multiple column families atomically.
502    *
503    * @param familyPaths List of Pair<byte[] column family, String hfilePath>
504    * @param bulkLoadListener Internal hooks enabling massaging/preparation of a
505    * file about to be bulk loaded
506    * @param assignSeqId
507    * @return true if successful, false if failed recoverably
508    * @throws IOException if failed unrecoverably.
509    */
510   boolean bulkLoadHFiles(Collection<Pair<byte[], String>> familyPaths, boolean assignSeqId,
511       BulkLoadListener bulkLoadListener) throws IOException;
512 
513   ///////////////////////////////////////////////////////////////////////////
514   // Coprocessors
515 
516   /** @return the coprocessor host */
517   RegionCoprocessorHost getCoprocessorHost();
518 
519   /**
520    * Executes a single protocol buffer coprocessor endpoint {@link Service} method using
521    * the registered protocol handlers.  {@link Service} implementations must be registered via the
522    * {@link Region#registerService(com.google.protobuf.Service)}
523    * method before they are available.
524    *
525    * @param controller an {@code RpcContoller} implementation to pass to the invoked service
526    * @param call a {@code CoprocessorServiceCall} instance identifying the service, method,
527    *     and parameters for the method invocation
528    * @return a protocol buffer {@code Message} instance containing the method's result
529    * @throws IOException if no registered service handler is found or an error
530    *     occurs during the invocation
531    * @see org.apache.hadoop.hbase.regionserver.Region#registerService(com.google.protobuf.Service)
532    */
533   Message execService(RpcController controller, CoprocessorServiceCall call) throws IOException;
534 
535   /**
536    * Registers a new protocol buffer {@link Service} subclass as a coprocessor endpoint to
537    * be available for handling
538    * {@link Region#execService(com.google.protobuf.RpcController,
539    *    org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceCall)}} calls.
540    *
541    * <p>
542    * Only a single instance may be registered per region for a given {@link Service} subclass (the
543    * instances are keyed on {@link com.google.protobuf.Descriptors.ServiceDescriptor#getFullName()}.
544    * After the first registration, subsequent calls with the same service name will fail with
545    * a return value of {@code false}.
546    * </p>
547    * @param instance the {@code Service} subclass instance to expose as a coprocessor endpoint
548    * @return {@code true} if the registration was successful, {@code false}
549    * otherwise
550    */
551   boolean registerService(Service instance);
552 
553   ///////////////////////////////////////////////////////////////////////////
554   // RowMutation processor support
555 
556   /**
557    * Check the collection of families for validity.
558    * @param families
559    * @throws NoSuchColumnFamilyException
560    */
561   void checkFamilies(Collection<byte[]> families) throws NoSuchColumnFamilyException;
562 
563   /**
564    * Check the collection of families for valid timestamps
565    * @param familyMap
566    * @param now current timestamp
567    * @throws FailedSanityCheckException
568    */
569   void checkTimestamps(Map<byte[], List<Cell>> familyMap, long now)
570       throws FailedSanityCheckException;
571 
572   /**
573    * Prepare a delete for a row mutation processor
574    * @param delete The passed delete is modified by this method. WARNING!
575    * @throws IOException
576    */
577   void prepareDelete(Delete delete) throws IOException;
578 
579   /**
580    * Set up correct timestamps in the KVs in Delete object.
581    * <p>Caller should have the row and region locks.
582    * @param mutation
583    * @param familyCellMap
584    * @param now
585    * @throws IOException
586    */
587   void prepareDeleteTimestamps(Mutation mutation, Map<byte[], List<Cell>> familyCellMap,
588       byte[] now) throws IOException;
589 
590   /**
591    * Replace any cell timestamps set to HConstants#LATEST_TIMESTAMP with the
592    * provided current timestamp.
593    * @param values
594    * @param now
595    */
596   void updateCellTimestamps(final Iterable<List<Cell>> values, final byte[] now)
597       throws IOException;
598 
599   ///////////////////////////////////////////////////////////////////////////
600   // Flushes, compactions, splits, etc.
601   // Wizards only, please
602 
603   interface FlushResult {
604     enum Result {
605       FLUSHED_NO_COMPACTION_NEEDED,
606       FLUSHED_COMPACTION_NEEDED,
607       // Special case where a flush didn't run because there's nothing in the memstores. Used when
608       // bulk loading to know when we can still load even if a flush didn't happen.
609       CANNOT_FLUSH_MEMSTORE_EMPTY,
610       CANNOT_FLUSH
611     }
612 
613     /** @return the detailed result code */
614     Result getResult();
615 
616     /** @return true if the memstores were flushed, else false */
617     boolean isFlushSucceeded();
618 
619     /** @return True if the flush requested a compaction, else false */
620     boolean isCompactionNeeded();
621   }
622 
623   /**
624    * Flush the cache.
625    *
626    * <p>When this method is called the cache will be flushed unless:
627    * <ol>
628    *   <li>the cache is empty</li>
629    *   <li>the region is closed.</li>
630    *   <li>a flush is already in progress</li>
631    *   <li>writes are disabled</li>
632    * </ol>
633    *
634    * <p>This method may block for some time, so it should not be called from a
635    * time-sensitive thread.
636    * @param force whether we want to force a flush of all stores
637    * @return FlushResult indicating whether the flush was successful or not and if
638    * the region needs compacting
639    *
640    * @throws IOException general io exceptions
641    * @throws DroppedSnapshotException Thrown when abort is required. The caller MUST catch this
642    * exception and MUST abort. Any further operation to the region may cause data loss.
643    * because a snapshot was not properly persisted.
644    */
645   FlushResult flush(boolean force) throws IOException;
646 
647   /**
648    * Synchronously compact all stores in the region.
649    * <p>This operation could block for a long time, so don't call it from a
650    * time-sensitive thread.
651    * <p>Note that no locks are taken to prevent possible conflicts between
652    * compaction and splitting activities. The regionserver does not normally compact
653    * and split in parallel. However by calling this method you may introduce
654    * unexpected and unhandled concurrency. Don't do this unless you know what
655    * you are doing.
656    *
657    * @param majorCompaction True to force a major compaction regardless of thresholds
658    * @throws IOException
659    */
660   void compact(final boolean majorCompaction) throws IOException;
661 
662   /**
663    * Trigger major compaction on all stores in the region.
664    * <p>
665    * Compaction will be performed asynchronously to this call by the RegionServer's
666    * CompactSplitThread. See also {@link Store#triggerMajorCompaction()}
667    * @throws IOException
668    */
669   void triggerMajorCompaction() throws IOException;
670 
671   /**
672    * @return if a given region is in compaction now.
673    */
674   CompactionState getCompactionState();
675 
676   /** Wait for all current flushes and compactions of the region to complete */
677   void waitForFlushesAndCompactions();
678 
679 }