View Javadoc

1   /**
2    * Copyright 2010 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.client;
21  
22  import java.io.Closeable;
23  import java.io.IOException;
24  import java.util.List;
25  import java.util.Map;
26  
27  import org.apache.hadoop.conf.Configuration;
28  import org.apache.hadoop.hbase.HTableDescriptor;
29  import org.apache.hadoop.hbase.KeyValue;
30  import org.apache.hadoop.hbase.client.coprocessor.Batch;
31  import org.apache.hadoop.hbase.ipc.CoprocessorProtocol;
32  
33  /**
34   * Used to communicate with a single HBase table.
35   *
36   * @since 0.21.0
37   */
38  public interface HTableInterface extends Closeable {
39  
40    /**
41     * Gets the name of this table.
42     *
43     * @return the table name.
44     */
45    byte[] getTableName();
46  
47    /**
48     * Returns the {@link Configuration} object used by this instance.
49     * <p>
50     * The reference returned is not a copy, so any change made to it will
51     * affect this instance.
52     */
53    Configuration getConfiguration();
54  
55    /**
56     * Gets the {@link HTableDescriptor table descriptor} for this table.
57     * @throws IOException if a remote or network exception occurs.
58     */
59    HTableDescriptor getTableDescriptor() throws IOException;
60  
61    /**
62     * Test for the existence of columns in the table, as specified in the Get.
63     * <p>
64     *
65     * This will return true if the Get matches one or more keys, false if not.
66     * <p>
67     *
68     * This is a server-side call so it prevents any data from being transfered to
69     * the client.
70     *
71     * @param get the Get
72     * @return true if the specified Get matches one or more keys, false if not
73     * @throws IOException e
74     */
75    boolean exists(Get get) throws IOException;
76  
77    /**
78     * Method that does a batch call on Deletes, Gets, Puts, Increments, Appends and RowMutations.
79     * The execution ordering of the actions is not defined. Meaning if you do a Put and a
80     * Get in the same {@link #batch} call, you will not necessarily be
81     * guaranteed that the Get returns what the Put had put.
82     *
83     * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects
84     * @param results Empty Object[], same size as actions. Provides access to partial
85     *                results, in case an exception is thrown. A null in the result array means that
86     *                the call for that action failed, even after retries
87     * @throws IOException
88     * @since 0.90.0
89     */
90    void batch(final List<?extends Row> actions, final Object[] results) throws IOException, InterruptedException;
91  
92    /**
93     * Same as {@link #batch(List, Object[])}, but returns an array of
94     * results instead of using a results parameter reference.
95     *
96     * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects
97     * @return the results from the actions. A null in the return array means that
98     *         the call for that action failed, even after retries
99     * @throws IOException
100    * @since 0.90.0
101    */
102   Object[] batch(final List<? extends Row> actions) throws IOException, InterruptedException;
103 
104   /**
105    * Extracts certain cells from a given row.
106    * @param get The object that specifies what data to fetch and from which row.
107    * @return The data coming from the specified row, if it exists.  If the row
108    * specified doesn't exist, the {@link Result} instance returned won't
109    * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}.
110    * @throws IOException if a remote or network exception occurs.
111    * @since 0.20.0
112    */
113   Result get(Get get) throws IOException;
114 
115   /**
116    * Extracts certain cells from the given rows, in batch.
117    *
118    * @param gets The objects that specify what data to fetch and from which rows.
119    *
120    * @return The data coming from the specified rows, if it exists.  If the row
121    *         specified doesn't exist, the {@link Result} instance returned won't
122    *         contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}.
123    *         If there are any failures even after retries, there will be a null in
124    *         the results array for those Gets, AND an exception will be thrown.
125    * @throws IOException if a remote or network exception occurs.
126    *
127    * @since 0.90.0
128    */
129   Result[] get(List<Get> gets) throws IOException;
130 
131   /**
132    * Return the row that matches <i>row</i> exactly,
133    * or the one that immediately precedes it.
134    *
135    * @param row A row key.
136    * @param family Column family to include in the {@link Result}.
137    * @throws IOException if a remote or network exception occurs.
138    * @since 0.20.0
139    * 
140    * @deprecated As of version 0.92 this method is deprecated without
141    * replacement.   
142    * getRowOrBefore is used internally to find entries in .META. and makes
143    * various assumptions about the table (which are true for .META. but not
144    * in general) to be efficient.
145    */
146   Result getRowOrBefore(byte[] row, byte[] family) throws IOException;
147 
148   /**
149    * Returns a scanner on the current table as specified by the {@link Scan}
150    * object.
151    * Note that the passed {@link Scan}'s start row and caching properties
152    * maybe changed.
153    *
154    * @param scan A configured {@link Scan} object.
155    * @return A scanner.
156    * @throws IOException if a remote or network exception occurs.
157    * @since 0.20.0
158    */
159   ResultScanner getScanner(Scan scan) throws IOException;
160 
161   /**
162    * Gets a scanner on the current table for the given family.
163    *
164    * @param family The column family to scan.
165    * @return A scanner.
166    * @throws IOException if a remote or network exception occurs.
167    * @since 0.20.0
168    */
169   ResultScanner getScanner(byte[] family) throws IOException;
170 
171   /**
172    * Gets a scanner on the current table for the given family and qualifier.
173    *
174    * @param family The column family to scan.
175    * @param qualifier The column qualifier to scan.
176    * @return A scanner.
177    * @throws IOException if a remote or network exception occurs.
178    * @since 0.20.0
179    */
180   ResultScanner getScanner(byte[] family, byte[] qualifier) throws IOException;
181 
182 
183   /**
184    * Puts some data in the table.
185    * <p>
186    * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered
187    * until the internal buffer is full.
188    * @param put The data to put.
189    * @throws IOException if a remote or network exception occurs.
190    * @since 0.20.0
191    */
192   void put(Put put) throws IOException;
193 
194   /**
195    * Puts some data in the table, in batch.
196    * <p>
197    * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered
198    * until the internal buffer is full.
199    * <p>
200    * This can be used for group commit, or for submitting user defined
201    * batches.  The writeBuffer will be periodically inspected while the List
202    * is processed, so depending on the List size the writeBuffer may flush
203    * not at all, or more than once.
204    * @param puts The list of mutations to apply. The batch put is done by
205    * aggregating the iteration of the Puts over the write buffer
206    * at the client-side for a single RPC call.
207    * @throws IOException if a remote or network exception occurs.
208    * @since 0.20.0
209    */
210   void put(List<Put> puts) throws IOException;
211 
212   /**
213    * Atomically checks if a row/family/qualifier value matches the expected
214    * value. If it does, it adds the put.  If the passed value is null, the check
215    * is for the lack of column (ie: non-existance)
216    *
217    * @param row to check
218    * @param family column family to check
219    * @param qualifier column qualifier to check
220    * @param value the expected value
221    * @param put data to put if check succeeds
222    * @throws IOException e
223    * @return true if the new put was executed, false otherwise
224    */
225   boolean checkAndPut(byte[] row, byte[] family, byte[] qualifier,
226       byte[] value, Put put) throws IOException;
227 
228   /**
229    * Deletes the specified cells/row.
230    *
231    * @param delete The object that specifies what to delete.
232    * @throws IOException if a remote or network exception occurs.
233    * @since 0.20.0
234    */
235   void delete(Delete delete) throws IOException;
236 
237   /**
238    * Deletes the specified cells/rows in bulk.
239    * @param deletes List of things to delete.  List gets modified by this
240    * method (in particular it gets re-ordered, so the order in which the elements
241    * are inserted in the list gives no guarantee as to the order in which the
242    * {@link Delete}s are executed).
243    * @throws IOException if a remote or network exception occurs. In that case
244    * the {@code deletes} argument will contain the {@link Delete} instances
245    * that have not be successfully applied.
246    * @since 0.20.1
247    */
248   void delete(List<Delete> deletes) throws IOException;
249 
250   /**
251    * Atomically checks if a row/family/qualifier value matches the expected
252    * value. If it does, it adds the delete.  If the passed value is null, the
253    * check is for the lack of column (ie: non-existance)
254    *
255    * @param row to check
256    * @param family column family to check
257    * @param qualifier column qualifier to check
258    * @param value the expected value
259    * @param delete data to delete if check succeeds
260    * @throws IOException e
261    * @return true if the new delete was executed, false otherwise
262    */
263   boolean checkAndDelete(byte[] row, byte[] family, byte[] qualifier,
264       byte[] value, Delete delete) throws IOException;
265 
266   /**
267    * Performs multiple mutations atomically on a single row. Currently
268    * {@link Put} and {@link Delete} are supported.
269    *
270    * @param arm object that specifies the set of mutations to perform
271    * atomically
272    * @throws IOException
273    */
274   public void mutateRow(final RowMutations rm) throws IOException;
275 
276   /**
277    * Appends values to one or more columns within a single row.
278    * <p>
279    * This operation does not appear atomic to readers.  Appends are done
280    * under a single row lock, so write operations to a row are synchronized, but
281    * readers do not take row locks so get and scan operations can see this
282    * operation partially completed.
283    *
284    * @param append object that specifies the columns and amounts to be used
285    *                  for the increment operations
286    * @throws IOException e
287    * @return values of columns after the append operation (maybe null)
288    */
289   public Result append(final Append append) throws IOException;
290 
291   /**
292    * Increments one or more columns within a single row.
293    * <p>
294    * This operation does not appear atomic to readers.  Increments are done
295    * under a single row lock, so write operations to a row are synchronized, but
296    * readers do not take row locks so get and scan operations can see this
297    * operation partially completed.
298    *
299    * @param increment object that specifies the columns and amounts to be used
300    *                  for the increment operations
301    * @throws IOException e
302    * @return values of columns after the increment
303    */
304   public Result increment(final Increment increment) throws IOException;
305 
306   /**
307    * Atomically increments a column value.
308    * <p>
309    * Equivalent to {@link #incrementColumnValue(byte[], byte[], byte[],
310    * long, boolean) incrementColumnValue}(row, family, qualifier, amount,
311    * <b>true</b>)}
312    * @param row The row that contains the cell to increment.
313    * @param family The column family of the cell to increment.
314    * @param qualifier The column qualifier of the cell to increment.
315    * @param amount The amount to increment the cell with (or decrement, if the
316    * amount is negative).
317    * @return The new value, post increment.
318    * @throws IOException if a remote or network exception occurs.
319    */
320   long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier,
321       long amount) throws IOException;
322 
323   /**
324    * Atomically increments a column value. If the column value already exists
325    * and is not a big-endian long, this could throw an exception. If the column
326    * value does not yet exist it is initialized to <code>amount</code> and
327    * written to the specified column.
328    *
329    * <p>Setting writeToWAL to false means that in a fail scenario, you will lose
330    * any increments that have not been flushed.
331    * @param row The row that contains the cell to increment.
332    * @param family The column family of the cell to increment.
333    * @param qualifier The column qualifier of the cell to increment.
334    * @param amount The amount to increment the cell with (or decrement, if the
335    * amount is negative).
336    * @param writeToWAL if {@code true}, the operation will be applied to the
337    * Write Ahead Log (WAL).  This makes the operation slower but safer, as if
338    * the call returns successfully, it is guaranteed that the increment will
339    * be safely persisted.  When set to {@code false}, the call may return
340    * successfully before the increment is safely persisted, so it's possible
341    * that the increment be lost in the event of a failure happening before the
342    * operation gets persisted.
343    * @return The new value, post increment.
344    * @throws IOException if a remote or network exception occurs.
345    */
346   long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier,
347       long amount, boolean writeToWAL) throws IOException;
348 
349   /**
350    * Tells whether or not 'auto-flush' is turned on.
351    *
352    * @return {@code true} if 'auto-flush' is enabled (default), meaning
353    * {@link Put} operations don't get buffered/delayed and are immediately
354    * executed.
355    */
356   boolean isAutoFlush();
357 
358   /**
359    * Executes all the buffered {@link Put} operations.
360    * <p>
361    * This method gets called once automatically for every {@link Put} or batch
362    * of {@link Put}s (when <code>put(List<Put>)</code> is used) when
363    * {@link #isAutoFlush} is {@code true}.
364    * @throws IOException if a remote or network exception occurs.
365    */
366   void flushCommits() throws IOException;
367 
368   /**
369    * Releases any resources help or pending changes in internal buffers.
370    *
371    * @throws IOException if a remote or network exception occurs.
372    */
373   void close() throws IOException;
374 
375   /**
376    * Obtains a lock on a row.
377    *
378    * @param row The row to lock.
379    * @return A {@link RowLock} containing the row and lock id.
380    * @throws IOException if a remote or network exception occurs.
381    * @see RowLock
382    * @see #unlockRow
383    * @deprecated {@link RowLock} and associated operations are deprecated
384    */
385   RowLock lockRow(byte[] row) throws IOException;
386 
387   /**
388    * Releases a row lock.
389    *
390    * @param rl The row lock to release.
391    * @throws IOException if a remote or network exception occurs.
392    * @see RowLock
393    * @see #unlockRow
394    * @deprecated {@link RowLock} and associated operations are deprecated
395    */
396   void unlockRow(RowLock rl) throws IOException;
397 
398   /**
399    * Creates and returns a proxy to the CoprocessorProtocol instance running in the
400    * region containing the specified row.  The row given does not actually have
401    * to exist.  Whichever region would contain the row based on start and end keys will
402    * be used.  Note that the {@code row} parameter is also not passed to the
403    * coprocessor handler registered for this protocol, unless the {@code row}
404    * is separately passed as an argument in a proxy method call.  The parameter
405    * here is just used to locate the region used to handle the call.
406    *
407    * @param protocol The class or interface defining the remote protocol
408    * @param row The row key used to identify the remote region location
409    * @return A CoprocessorProtocol instance
410    */
411   <T extends CoprocessorProtocol> T coprocessorProxy(Class<T> protocol, byte[] row);
412 
413   /**
414    * Invoke the passed
415    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call} against
416    * the {@link CoprocessorProtocol} instances running in the selected regions.
417    * All regions beginning with the region containing the <code>startKey</code>
418    * row, through to the region containing the <code>endKey</code> row (inclusive)
419    * will be used.  If <code>startKey</code> or <code>endKey</code> is
420    * <code>null</code>, the first and last regions in the table, respectively,
421    * will be used in the range selection.
422    *
423    * @param protocol the CoprocessorProtocol implementation to call
424    * @param startKey start region selection with region containing this row
425    * @param endKey select regions up to and including the region containing
426    * this row
427    * @param callable wraps the CoprocessorProtocol implementation method calls
428    * made per-region
429    * @param <T> CoprocessorProtocol subclass for the remote invocation
430    * @param <R> Return type for the
431    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call(Object)}
432    * method
433    * @return a <code>Map</code> of region names to
434    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call(Object)} return values
435    */
436   <T extends CoprocessorProtocol, R> Map<byte[],R> coprocessorExec(
437       Class<T> protocol, byte[] startKey, byte[] endKey, Batch.Call<T,R> callable)
438       throws IOException, Throwable;
439 
440   /**
441    * Invoke the passed
442    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call} against
443    * the {@link CoprocessorProtocol} instances running in the selected regions.
444    * All regions beginning with the region containing the <code>startKey</code>
445    * row, through to the region containing the <code>endKey</code> row
446    * (inclusive)
447    * will be used.  If <code>startKey</code> or <code>endKey</code> is
448    * <code>null</code>, the first and last regions in the table, respectively,
449    * will be used in the range selection.
450    *
451    * <p>
452    * For each result, the given
453    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Callback#update(byte[], byte[], Object)}
454    * method will be called.
455    *</p>
456    *
457    * @param protocol the CoprocessorProtocol implementation to call
458    * @param startKey start region selection with region containing this row
459    * @param endKey select regions up to and including the region containing
460    * this row
461    * @param callable wraps the CoprocessorProtocol implementation method calls
462    * made per-region
463    * @param callback an instance upon which
464    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Callback#update(byte[], byte[], Object)} with the
465    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call(Object)}
466    * return value for each region
467    * @param <T> CoprocessorProtocol subclass for the remote invocation
468    * @param <R> Return type for the
469    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call(Object)}
470    * method
471    */
472   <T extends CoprocessorProtocol, R> void coprocessorExec(
473       Class<T> protocol, byte[] startKey, byte[] endKey,
474       Batch.Call<T,R> callable, Batch.Callback<R> callback)
475       throws IOException, Throwable;
476 
477   /**
478    * See {@link #setAutoFlush(boolean, boolean)}
479    *
480    * @param autoFlush
481    *          Whether or not to enable 'auto-flush'.
482    */
483   public void setAutoFlush(boolean autoFlush);
484 
485   /**
486    * Turns 'auto-flush' on or off.
487    * <p>
488    * When enabled (default), {@link Put} operations don't get buffered/delayed
489    * and are immediately executed. Failed operations are not retried. This is
490    * slower but safer.
491    * <p>
492    * Turning off {@link #autoFlush} means that multiple {@link Put}s will be
493    * accepted before any RPC is actually sent to do the write operations. If the
494    * application dies before pending writes get flushed to HBase, data will be
495    * lost.
496    * <p>
497    * When you turn {@link #autoFlush} off, you should also consider the
498    * {@link #clearBufferOnFail} option. By default, asynchronous {@link Put}
499    * requests will be retried on failure until successful. However, this can
500    * pollute the writeBuffer and slow down batching performance. Additionally,
501    * you may want to issue a number of Put requests and call
502    * {@link #flushCommits()} as a barrier. In both use cases, consider setting
503    * clearBufferOnFail to true to erase the buffer after {@link #flushCommits()}
504    * has been called, regardless of success.
505    *
506    * @param autoFlush
507    *          Whether or not to enable 'auto-flush'.
508    * @param clearBufferOnFail
509    *          Whether to keep Put failures in the writeBuffer
510    * @see #flushCommits
511    */
512   public void setAutoFlush(boolean autoFlush, boolean clearBufferOnFail);
513 
514   /**
515    * Returns the maximum size in bytes of the write buffer for this HTable.
516    * <p>
517    * The default value comes from the configuration parameter
518    * {@code hbase.client.write.buffer}.
519    * @return The size of the write buffer in bytes.
520    */
521   public long getWriteBufferSize();
522 
523   /**
524    * Sets the size of the buffer in bytes.
525    * <p>
526    * If the new size is less than the current amount of data in the
527    * write buffer, the buffer gets flushed.
528    * @param writeBufferSize The new write buffer size, in bytes.
529    * @throws IOException if a remote or network exception occurs.
530    */
531   public void setWriteBufferSize(long writeBufferSize) throws IOException;
532 }