1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.client;
20  
21  import com.google.protobuf.Service;
22  import com.google.protobuf.ServiceException;
23  import org.apache.hadoop.classification.InterfaceAudience;
24  import org.apache.hadoop.classification.InterfaceStability;
25  import org.apache.hadoop.conf.Configuration;
26  import org.apache.hadoop.hbase.HTableDescriptor;
27  import org.apache.hadoop.hbase.KeyValue;
28  import org.apache.hadoop.hbase.client.coprocessor.Batch;
29  import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel;
30  
31  import java.io.Closeable;
32  import java.io.IOException;
33  import java.util.List;
34  import java.util.Map;
35  
36  /**
37   * Used to communicate with a single HBase table.
38   *
39   * @since 0.21.0
40   */
41  @InterfaceAudience.Public
42  @InterfaceStability.Stable
43  public interface HTableInterface extends Closeable {
44  
45    /**
46     * Gets the name of this table.
47     *
48     * @return the table name.
49     */
50    byte[] getTableName();
51  
52    /**
53     * Returns the {@link Configuration} object used by this instance.
54     * <p>
55     * The reference returned is not a copy, so any change made to it will
56     * affect this instance.
57     */
58    Configuration getConfiguration();
59  
60    /**
61     * Gets the {@link HTableDescriptor table descriptor} for this table.
62     * @throws IOException if a remote or network exception occurs.
63     */
64    HTableDescriptor getTableDescriptor() throws IOException;
65  
66    /**
67     * Test for the existence of columns in the table, as specified by the Get.
68     * <p>
69     *
70     * This will return true if the Get matches one or more keys, false if not.
71     * <p>
72     *
73     * This is a server-side call so it prevents any data from being transfered to
74     * the client.
75     *
76     * @param get the Get
77     * @return true if the specified Get matches one or more keys, false if not
78     * @throws IOException e
79     */
80    boolean exists(Get get) throws IOException;
81  
82    /**
83     * Test for the existence of columns in the table, as specified by the Gets.
84     * <p>
85     *
86     * This will return an array of booleans. Each value will be true if the related Get matches
87     * one or more keys, false if not.
88     * <p>
89     *
90     * This is a server-side call so it prevents any data from being transfered to
91     * the client.
92     *
93     * @param gets the Gets
94     * @return Array of Boolean true if the specified Get matches one or more keys, false if not
95     * @throws IOException e
96     */
97    Boolean[] exists(List<Get> gets) throws IOException;
98  
99    /**
100    * Method that does a batch call on Deletes, Gets, Puts, Increments, Appends and RowMutations.
101    * The ordering of execution of the actions is not defined. Meaning if you do a Put and a
102    * Get in the same {@link #batch} call, you will not necessarily be
103    * guaranteed that the Get returns what the Put had put.
104    *
105    * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects
106    * @param results Empty Object[], same size as actions. Provides access to partial
107    *                results, in case an exception is thrown. A null in the result array means that
108    *                the call for that action failed, even after retries
109    * @throws IOException
110    * @since 0.90.0
111    */
112   void batch(final List<?extends Row> actions, final Object[] results) throws IOException, InterruptedException;
113 
114   /**
115    * Same as {@link #batch(List, Object[])}, but returns an array of
116    * results instead of using a results parameter reference.
117    *
118    * @param actions list of Get, Put, Delete, Increment, Append, RowMutations objects
119    * @return the results from the actions. A null in the return array means that
120    *         the call for that action failed, even after retries
121    * @throws IOException
122    * @since 0.90.0
123    */
124   Object[] batch(final List<? extends Row> actions) throws IOException, InterruptedException;
125 
126   /**
127    * Same as {@link #batch(List, Object[])}, but with a callback.
128    * @since 0.96.0
129    */
130   public <R> void batchCallback(
131     final List<? extends Row> actions, final Object[] results, final Batch.Callback<R> callback)
132     throws IOException, InterruptedException;
133 
134 
135   /**
136    * Same as {@link #batch(List)}, but with a callback.
137    * @since 0.96.0
138    */
139   public <R> Object[] batchCallback(
140     List<? extends Row> actions, Batch.Callback<R> callback) throws IOException,
141     InterruptedException;
142 
143   /**
144    * Extracts certain cells from a given row.
145    * @param get The object that specifies what data to fetch and from which row.
146    * @return The data coming from the specified row, if it exists.  If the row
147    * specified doesn't exist, the {@link Result} instance returned won't
148    * contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}.
149    * @throws IOException if a remote or network exception occurs.
150    * @since 0.20.0
151    */
152   Result get(Get get) throws IOException;
153 
154   /**
155    * Extracts certain cells from the given rows, in batch.
156    *
157    * @param gets The objects that specify what data to fetch and from which rows.
158    *
159    * @return The data coming from the specified rows, if it exists.  If the row
160    *         specified doesn't exist, the {@link Result} instance returned won't
161    *         contain any {@link KeyValue}, as indicated by {@link Result#isEmpty()}.
162    *         If there are any failures even after retries, there will be a null in
163    *         the results array for those Gets, AND an exception will be thrown.
164    * @throws IOException if a remote or network exception occurs.
165    *
166    * @since 0.90.0
167    */
168   Result[] get(List<Get> gets) throws IOException;
169 
170   /**
171    * Return the row that matches <i>row</i> exactly,
172    * or the one that immediately precedes it.
173    *
174    * @param row A row key.
175    * @param family Column family to include in the {@link Result}.
176    * @throws IOException if a remote or network exception occurs.
177    * @since 0.20.0
178    * 
179    * @deprecated As of version 0.92 this method is deprecated without
180    * replacement.   
181    * getRowOrBefore is used internally to find entries in .META. and makes
182    * various assumptions about the table (which are true for .META. but not
183    * in general) to be efficient.
184    */
185   Result getRowOrBefore(byte[] row, byte[] family) throws IOException;
186 
187   /**
188    * Returns a scanner on the current table as specified by the {@link Scan}
189    * object.
190    * Note that the passed {@link Scan}'s start row and caching properties
191    * maybe changed.
192    *
193    * @param scan A configured {@link Scan} object.
194    * @return A scanner.
195    * @throws IOException if a remote or network exception occurs.
196    * @since 0.20.0
197    */
198   ResultScanner getScanner(Scan scan) throws IOException;
199 
200   /**
201    * Gets a scanner on the current table for the given family.
202    *
203    * @param family The column family to scan.
204    * @return A scanner.
205    * @throws IOException if a remote or network exception occurs.
206    * @since 0.20.0
207    */
208   ResultScanner getScanner(byte[] family) throws IOException;
209 
210   /**
211    * Gets a scanner on the current table for the given family and qualifier.
212    *
213    * @param family The column family to scan.
214    * @param qualifier The column qualifier to scan.
215    * @return A scanner.
216    * @throws IOException if a remote or network exception occurs.
217    * @since 0.20.0
218    */
219   ResultScanner getScanner(byte[] family, byte[] qualifier) throws IOException;
220 
221 
222   /**
223    * Puts some data in the table.
224    * <p>
225    * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered
226    * until the internal buffer is full.
227    * @param put The data to put.
228    * @throws IOException if a remote or network exception occurs.
229    * @since 0.20.0
230    */
231   void put(Put put) throws IOException;
232 
233   /**
234    * Puts some data in the table, in batch.
235    * <p>
236    * If {@link #isAutoFlush isAutoFlush} is false, the update is buffered
237    * until the internal buffer is full.
238    * <p>
239    * This can be used for group commit, or for submitting user defined
240    * batches.  The writeBuffer will be periodically inspected while the List
241    * is processed, so depending on the List size the writeBuffer may flush
242    * not at all, or more than once.
243    * @param puts The list of mutations to apply. The batch put is done by
244    * aggregating the iteration of the Puts over the write buffer
245    * at the client-side for a single RPC call.
246    * @throws IOException if a remote or network exception occurs.
247    * @since 0.20.0
248    */
249   void put(List<Put> puts) throws IOException;
250 
251   /**
252    * Atomically checks if a row/family/qualifier value matches the expected
253    * value. If it does, it adds the put.  If the passed value is null, the check
254    * is for the lack of column (ie: non-existance)
255    *
256    * @param row to check
257    * @param family column family to check
258    * @param qualifier column qualifier to check
259    * @param value the expected value
260    * @param put data to put if check succeeds
261    * @throws IOException e
262    * @return true if the new put was executed, false otherwise
263    */
264   boolean checkAndPut(byte[] row, byte[] family, byte[] qualifier,
265       byte[] value, Put put) throws IOException;
266 
267   /**
268    * Deletes the specified cells/row.
269    *
270    * @param delete The object that specifies what to delete.
271    * @throws IOException if a remote or network exception occurs.
272    * @since 0.20.0
273    */
274   void delete(Delete delete) throws IOException;
275 
276   /**
277    * Deletes the specified cells/rows in bulk.
278    * @param deletes List of things to delete.  List gets modified by this
279    * method (in particular it gets re-ordered, so the order in which the elements
280    * are inserted in the list gives no guarantee as to the order in which the
281    * {@link Delete}s are executed).
282    * @throws IOException if a remote or network exception occurs. In that case
283    * the {@code deletes} argument will contain the {@link Delete} instances
284    * that have not be successfully applied.
285    * @since 0.20.1
286    */
287   void delete(List<Delete> deletes) throws IOException;
288 
289   /**
290    * Atomically checks if a row/family/qualifier value matches the expected
291    * value. If it does, it adds the delete.  If the passed value is null, the
292    * check is for the lack of column (ie: non-existance)
293    *
294    * @param row to check
295    * @param family column family to check
296    * @param qualifier column qualifier to check
297    * @param value the expected value
298    * @param delete data to delete if check succeeds
299    * @throws IOException e
300    * @return true if the new delete was executed, false otherwise
301    */
302   boolean checkAndDelete(byte[] row, byte[] family, byte[] qualifier,
303       byte[] value, Delete delete) throws IOException;
304 
305   /**
306    * Performs multiple mutations atomically on a single row. Currently
307    * {@link Put} and {@link Delete} are supported.
308    *
309    * @param rm object that specifies the set of mutations to perform atomically
310    * @throws IOException
311    */
312   public void mutateRow(final RowMutations rm) throws IOException;
313 
314   /**
315    * Appends values to one or more columns within a single row.
316    * <p>
317    * This operation does not appear atomic to readers.  Appends are done
318    * under a single row lock, so write operations to a row are synchronized, but
319    * readers do not take row locks so get and scan operations can see this
320    * operation partially completed.
321    *
322    * @param append object that specifies the columns and amounts to be used
323    *                  for the increment operations
324    * @throws IOException e
325    * @return values of columns after the append operation (maybe null)
326    */
327   public Result append(final Append append) throws IOException;
328 
329   /**
330    * Increments one or more columns within a single row.
331    * <p>
332    * This operation does not appear atomic to readers.  Increments are done
333    * under a single row lock, so write operations to a row are synchronized, but
334    * readers do not take row locks so get and scan operations can see this
335    * operation partially completed.
336    *
337    * @param increment object that specifies the columns and amounts to be used
338    *                  for the increment operations
339    * @throws IOException e
340    * @return values of columns after the increment
341    */
342   public Result increment(final Increment increment) throws IOException;
343 
344   /**
345    * See {@link #incrementColumnValue(byte[], byte[], byte[], long, Durability)}
346    * <p>
347    * The {@link Durability} is defaulted to {@link Durability#SYNC_WAL}.
348    * @param row The row that contains the cell to increment.
349    * @param family The column family of the cell to increment.
350    * @param qualifier The column qualifier of the cell to increment.
351    * @param amount The amount to increment the cell with (or decrement, if the
352    * amount is negative).
353    * @return The new value, post increment.
354    * @throws IOException if a remote or network exception occurs.
355    */
356   long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier,
357       long amount) throws IOException;
358 
359   /**
360    * Atomically increments a column value. If the column value already exists
361    * and is not a big-endian long, this could throw an exception. If the column
362    * value does not yet exist it is initialized to <code>amount</code> and
363    * written to the specified column.
364    *
365    * <p>Setting durability to {@link Durability#SKIP_WAL} means that in a fail
366    * scenario you will lose any increments that have not been flushed.
367    * @param row The row that contains the cell to increment.
368    * @param family The column family of the cell to increment.
369    * @param qualifier The column qualifier of the cell to increment.
370    * @param amount The amount to increment the cell with (or decrement, if the
371    * amount is negative).
372    * @param durability The persistence guarantee for this increment.
373    * @return The new value, post increment.
374    * @throws IOException if a remote or network exception occurs.
375    */
376   long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier,
377       long amount, Durability durability) throws IOException;
378 
379   /**
380    * Tells whether or not 'auto-flush' is turned on.
381    *
382    * @return {@code true} if 'auto-flush' is enabled (default), meaning
383    * {@link Put} operations don't get buffered/delayed and are immediately
384    * executed.
385    */
386   boolean isAutoFlush();
387 
388   /**
389    * Executes all the buffered {@link Put} operations.
390    * <p>
391    * This method gets called once automatically for every {@link Put} or batch
392    * of {@link Put}s (when <code>put(List<Put>)</code> is used) when
393    * {@link #isAutoFlush} is {@code true}.
394    * @throws IOException if a remote or network exception occurs.
395    */
396   void flushCommits() throws IOException;
397 
398   /**
399    * Releases any resources held or pending changes in internal buffers.
400    *
401    * @throws IOException if a remote or network exception occurs.
402    */
403   void close() throws IOException;
404 
405   /**
406    * Creates and returns a {@link com.google.protobuf.RpcChannel} instance connected to the
407    * table region containing the specified row.  The row given does not actually have
408    * to exist.  Whichever region would contain the row based on start and end keys will
409    * be used.  Note that the {@code row} parameter is also not passed to the
410    * coprocessor handler registered for this protocol, unless the {@code row}
411    * is separately passed as an argument in the service request.  The parameter
412    * here is only used to locate the region used to handle the call.
413    *
414    * <p>
415    * The obtained {@link com.google.protobuf.RpcChannel} instance can be used to access a published
416    * coprocessor {@link com.google.protobuf.Service} using standard protobuf service invocations:
417    * </p>
418    *
419    * <div style="background-color: #cccccc; padding: 2px">
420    * <blockquote><pre>
421    * CoprocessorRpcChannel channel = myTable.coprocessorService(rowkey);
422    * MyService.BlockingInterface service = MyService.newBlockingStub(channel);
423    * MyCallRequest request = MyCallRequest.newBuilder()
424    *     ...
425    *     .build();
426    * MyCallResponse response = service.myCall(null, request);
427    * </pre></blockquote></div>
428    *
429    * @param row The row key used to identify the remote region location
430    * @return A CoprocessorRpcChannel instance
431    */
432   CoprocessorRpcChannel coprocessorService(byte[] row);
433 
434   /**
435    * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table
436    * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive),
437    * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
438    * method with each {@link Service}
439    * instance.
440    *
441    * @param service the protocol buffer {@code Service} implementation to call
442    * @param startKey start region selection with region containing this row.  If {@code null}, the
443    *                 selection will start with the first table region.
444    * @param endKey select regions up to and including the region containing this row.
445    *               If {@code null}, selection will continue through the last table region.
446    * @param callable this instance's
447    *                 {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
448    *                 method will be invoked once per table region, using the {@link Service}
449    *                 instance connected to that region.
450    * @param <T> the {@link Service} subclass to connect to
451    * @param <R> Return type for the {@code callable} parameter's
452    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
453    * @return a map of result values keyed by region name
454    */
455   <T extends Service, R> Map<byte[],R> coprocessorService(final Class<T> service,
456       byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable)
457       throws ServiceException, Throwable;
458 
459   /**
460    * Creates an instance of the given {@link com.google.protobuf.Service} subclass for each table
461    * region spanning the range from the {@code startKey} row to {@code endKey} row (inclusive),
462    * and invokes the passed {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call}
463    * method with each {@link Service} instance.
464    *
465    * <p>
466    * The given
467    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Callback#update(byte[], byte[], Object)}
468    * method will be called with the return value from each region's
469    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} invocation.
470    *</p>
471    *
472    * @param service the protocol buffer {@code Service} implementation to call
473    * @param startKey start region selection with region containing this row.  If {@code null}, the
474    *                 selection will start with the first table region.
475    * @param endKey select regions up to and including the region containing this row.
476    *               If {@code null}, selection will continue through the last table region.
477    * @param callable this instance's
478    *                 {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
479    *                 will be invoked once per table region, using the {@link Service} instance
480    *                 connected to that region.
481    * @param callback
482    * @param <T> the {@link Service} subclass to connect to
483    * @param <R> Return type for the {@code callable} parameter's
484    * {@link org.apache.hadoop.hbase.client.coprocessor.Batch.Call#call} method
485    */
486   <T extends Service, R> void coprocessorService(final Class<T> service,
487       byte[] startKey, byte[] endKey, final Batch.Call<T,R> callable,
488       final Batch.Callback<R> callback) throws ServiceException, Throwable;
489 
490   /**
491    * See {@link #setAutoFlush(boolean, boolean)}
492    *
493    * @param autoFlush
494    *        Whether or not to enable 'auto-flush'.
495    */
496   public void setAutoFlush(boolean autoFlush);
497 
498   /**
499    * Turns 'auto-flush' on or off.
500    * <p>
501    * When enabled (default), {@link Put} operations don't get buffered/delayed
502    * and are immediately executed. Failed operations are not retried. This is
503    * slower but safer.
504    * <p>
505    * Turning off {@code autoFlush} means that multiple {@link Put}s will be
506    * accepted before any RPC is actually sent to do the write operations. If the
507    * application dies before pending writes get flushed to HBase, data will be
508    * lost.
509    * <p>
510    * When you turn {@code #autoFlush} off, you should also consider the
511    * {@code clearBufferOnFail} option. By default, asynchronous {@link Put}
512    * requests will be retried on failure until successful. However, this can
513    * pollute the writeBuffer and slow down batching performance. Additionally,
514    * you may want to issue a number of Put requests and call
515    * {@link #flushCommits()} as a barrier. In both use cases, consider setting
516    * clearBufferOnFail to true to erase the buffer after {@link #flushCommits()}
517    * has been called, regardless of success.
518    *
519    * @param autoFlush
520    *        Whether or not to enable 'auto-flush'.
521    * @param clearBufferOnFail
522    *        Whether to keep Put failures in the writeBuffer
523    * @see #flushCommits
524    */
525   public void setAutoFlush(boolean autoFlush, boolean clearBufferOnFail);
526 
527   /**
528    * Returns the maximum size in bytes of the write buffer for this HTable.
529    * <p>
530    * The default value comes from the configuration parameter
531    * {@code hbase.client.write.buffer}.
532    * @return The size of the write buffer in bytes.
533    */
534   public long getWriteBufferSize();
535 
536   /**
537    * Sets the size of the buffer in bytes.
538    * <p>
539    * If the new size is less than the current amount of data in the
540    * write buffer, the buffer gets flushed.
541    * @param writeBufferSize The new write buffer size, in bytes.
542    * @throws IOException if a remote or network exception occurs.
543    */
544   public void setWriteBufferSize(long writeBufferSize) throws IOException;
545 }