View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.DataInput;
22  import java.io.DataOutput;
23  import java.io.IOException;
24  import java.util.Arrays;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.classification.InterfaceStability;
30  import org.apache.hadoop.hbase.TableName;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.client.Scan;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.io.Writable;
35  import org.apache.hadoop.io.WritableUtils;
36  import org.apache.hadoop.mapreduce.InputSplit;
37  
38  /**
39   * A table split corresponds to a key range (low, high) and an optional scanner.
40   * All references to row below refer to the key of the row.
41   */
42  @InterfaceAudience.Public
43  @InterfaceStability.Evolving
44  public class TableSplit extends InputSplit
45  implements Writable, Comparable<TableSplit> {
46    public static final Log LOG = LogFactory.getLog(TableSplit.class);
47    
48    // should be < 0 (@see #readFields(DataInput))
49    // version 1 supports Scan data member
50    enum Version {
51      UNVERSIONED(0),
52      // Initial number we put on TableSplit when we introduced versioning.
53      INITIAL(-1);
54  
55      final int code;
56      static final Version[] byCode;
57      static {
58        byCode = Version.values();
59        for (int i = 0; i < byCode.length; i++) {
60          if (byCode[i].code != -1 * i) {
61            throw new AssertionError("Values in this enum should be descending by one");
62          }
63        }
64      }
65  
66      Version(int code) {
67        this.code = code;
68      }
69  
70      boolean atLeast(Version other) {
71        return code <= other.code;
72      }
73  
74      static Version fromCode(int code) {
75        return byCode[code * -1];
76      }
77    }
78    
79    private static final Version VERSION = Version.INITIAL;
80    private TableName tableName;
81    private byte [] startRow;
82    private byte [] endRow;
83    private String regionLocation;
84    private String scan = ""; // stores the serialized form of the Scan
85    private long length; // Contains estimation of region size in bytes
86  
87    /** Default constructor. */
88    public TableSplit() {
89      this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
90        HConstants.EMPTY_BYTE_ARRAY, "");
91    }
92  
93    /**
94     * @deprecated Since 0.96.0; use {@link TableSplit#TableSplit(TableName, byte[], byte[], String)}
95     */
96    @Deprecated
97    public TableSplit(final byte [] tableName, Scan scan, byte [] startRow, byte [] endRow,
98        final String location) {
99      this(TableName.valueOf(tableName), scan, startRow, endRow, location);
100   }
101 
102   /**
103    * Creates a new instance while assigning all variables.
104    * Length of region is set to 0
105    *
106    * @param tableName  The name of the current table.
107    * @param scan The scan associated with this split.
108    * @param startRow  The start row of the split.
109    * @param endRow  The end row of the split.
110    * @param location  The location of the region.
111    */
112   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
113                     final String location) {
114     this(tableName, scan, startRow, endRow, location, 0L);
115   }
116 
117   /**
118    * Creates a new instance while assigning all variables.
119    *
120    * @param tableName  The name of the current table.
121    * @param scan The scan associated with this split.
122    * @param startRow  The start row of the split.
123    * @param endRow  The end row of the split.
124    * @param location  The location of the region.
125    */
126   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
127       final String location, long length) {
128     this.tableName = tableName;
129     try {
130       this.scan =
131         (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
132     } catch (IOException e) {
133       LOG.warn("Failed to convert Scan to String", e);
134     }
135     this.startRow = startRow;
136     this.endRow = endRow;
137     this.regionLocation = location;
138     this.length = length;
139   }
140 
141   /**
142    * @deprecated Since 0.96.0; use {@link TableSplit#TableSplit(TableName, byte[], byte[], String)}
143    */
144   @Deprecated
145   public TableSplit(final byte [] tableName, byte[] startRow, byte[] endRow,
146       final String location) {
147     this(TableName.valueOf(tableName), startRow, endRow, location);
148   }
149 
150   /**
151    * Creates a new instance without a scanner.
152    *
153    * @param tableName The name of the current table.
154    * @param startRow The start row of the split.
155    * @param endRow The end row of the split.
156    * @param location The location of the region.
157    */
158   public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
159       final String location) {
160     this(tableName, null, startRow, endRow, location);
161   }
162 
163   /**
164    * Creates a new instance without a scanner.
165    *
166    * @param tableName The name of the current table.
167    * @param startRow The start row of the split.
168    * @param endRow The end row of the split.
169    * @param location The location of the region.
170    * @param length Size of region in bytes
171    */
172   public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
173                     final String location, long length) {
174     this(tableName, null, startRow, endRow, location, length);
175   }
176 
177   /**
178    * Returns a Scan object from the stored string representation.
179    *
180    * @return Returns a Scan object based on the stored scanner.
181    * @throws IOException
182    */
183   public Scan getScan() throws IOException {
184     return TableMapReduceUtil.convertStringToScan(this.scan);
185   }
186 
187   /**
188    * Returns the table name converted to a byte array.
189    * @see #getTable()
190    * @return The table name.
191    */
192   public byte [] getTableName() {
193     return tableName.getName();
194   }
195 
196   /**
197    * Returns the table name.
198    *
199    * @return The table name.
200    */
201   public TableName getTable() {
202     // It is ugly that usually to get a TableName, the method is called getTableName.  We can't do
203     // that in here though because there was an existing getTableName in place already since
204     // deprecated.
205     return tableName;
206   }
207 
208   /**
209    * Returns the start row.
210    *
211    * @return The start row.
212    */
213   public byte [] getStartRow() {
214     return startRow;
215   }
216 
217   /**
218    * Returns the end row.
219    *
220    * @return The end row.
221    */
222   public byte [] getEndRow() {
223     return endRow;
224   }
225 
226   /**
227    * Returns the region location.
228    *
229    * @return The region's location.
230    */
231   public String getRegionLocation() {
232     return regionLocation;
233   }
234 
235   /**
236    * Returns the region's location as an array.
237    *
238    * @return The array containing the region location.
239    * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
240    */
241   @Override
242   public String[] getLocations() {
243     return new String[] {regionLocation};
244   }
245 
246   /**
247    * Returns the length of the split.
248    *
249    * @return The length of the split.
250    * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
251    */
252   @Override
253   public long getLength() {
254     return length;
255   }
256 
257   /**
258    * Reads the values of each field.
259    *
260    * @param in  The input to read from.
261    * @throws IOException When reading the input fails.
262    */
263   @Override
264   public void readFields(DataInput in) throws IOException {
265     Version version = Version.UNVERSIONED;
266     // TableSplit was not versioned in the beginning.
267     // In order to introduce it now, we make use of the fact
268     // that tableName was written with Bytes.writeByteArray,
269     // which encodes the array length as a vint which is >= 0.
270     // Hence if the vint is >= 0 we have an old version and the vint
271     // encodes the length of tableName.
272     // If < 0 we just read the version and the next vint is the length.
273     // @see Bytes#readByteArray(DataInput)
274     int len = WritableUtils.readVInt(in);
275     if (len < 0) {
276       // what we just read was the version
277       version = Version.fromCode(len);
278       len = WritableUtils.readVInt(in);
279     }
280     byte[] tableNameBytes = new byte[len];
281     in.readFully(tableNameBytes);
282     tableName = TableName.valueOf(tableNameBytes);
283     startRow = Bytes.readByteArray(in);
284     endRow = Bytes.readByteArray(in);
285     regionLocation = Bytes.toString(Bytes.readByteArray(in));
286     if (version.atLeast(Version.INITIAL)) {
287       scan = Bytes.toString(Bytes.readByteArray(in));
288     }
289     length = WritableUtils.readVLong(in);
290   }
291 
292   /**
293    * Writes the field values to the output.
294    *
295    * @param out  The output to write to.
296    * @throws IOException When writing the values to the output fails.
297    */
298   @Override
299   public void write(DataOutput out) throws IOException {
300     WritableUtils.writeVInt(out, VERSION.code);
301     Bytes.writeByteArray(out, tableName.getName());
302     Bytes.writeByteArray(out, startRow);
303     Bytes.writeByteArray(out, endRow);
304     Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
305     Bytes.writeByteArray(out, Bytes.toBytes(scan));
306     WritableUtils.writeVLong(out, length);
307   }
308 
309   /**
310    * Returns the details about this instance as a string.
311    *
312    * @return The values of this instance as a string.
313    * @see java.lang.Object#toString()
314    */
315   @Override
316   public String toString() {
317     return regionLocation + ":" +
318       Bytes.toStringBinary(startRow) + "," + Bytes.toStringBinary(endRow);
319   }
320 
321   /**
322    * Compares this split against the given one.
323    *
324    * @param split  The split to compare to.
325    * @return The result of the comparison.
326    * @see java.lang.Comparable#compareTo(java.lang.Object)
327    */
328   @Override
329   public int compareTo(TableSplit split) {
330     // If The table name of the two splits is the same then compare start row
331     // otherwise compare based on table names
332     int tableNameComparison =
333         getTable().compareTo(split.getTable());
334     return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
335         getStartRow(), split.getStartRow());
336   }
337 
338   @Override
339   public boolean equals(Object o) {
340     if (o == null || !(o instanceof TableSplit)) {
341       return false;
342     }
343     return tableName.equals(((TableSplit)o).tableName) &&
344       Bytes.equals(startRow, ((TableSplit)o).startRow) &&
345       Bytes.equals(endRow, ((TableSplit)o).endRow) &&
346       regionLocation.equals(((TableSplit)o).regionLocation);
347   }
348 
349     @Override
350     public int hashCode() {
351         int result = tableName != null ? tableName.hashCode() : 0;
352         result = 31 * result + (scan != null ? scan.hashCode() : 0);
353         result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
354         result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
355         result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
356         return result;
357     }
358 }