View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.DataInput;
22  import java.io.DataOutput;
23  import java.io.IOException;
24  import java.util.Arrays;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.classification.InterfaceStability;
30  import org.apache.hadoop.hbase.TableName;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.client.Scan;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.io.Writable;
35  import org.apache.hadoop.io.WritableUtils;
36  import org.apache.hadoop.mapreduce.InputSplit;
37  
38  /**
39   * A table split corresponds to a key range (low, high) and an optional scanner.
40   * All references to row below refer to the key of the row.
41   */
42  @InterfaceAudience.Public
43  @InterfaceStability.Evolving
44  public class TableSplit extends InputSplit
45  implements Writable, Comparable<TableSplit> {
46    /** @deprecated LOG variable would be made private. */
47    @Deprecated
48    public static final Log LOG = LogFactory.getLog(TableSplit.class);
49    
50    // should be < 0 (@see #readFields(DataInput))
51    // version 1 supports Scan data member
52    enum Version {
53      UNVERSIONED(0),
54      // Initial number we put on TableSplit when we introduced versioning.
55      INITIAL(-1);
56  
57      final int code;
58      static final Version[] byCode;
59      static {
60        byCode = Version.values();
61        for (int i = 0; i < byCode.length; i++) {
62          if (byCode[i].code != -1 * i) {
63            throw new AssertionError("Values in this enum should be descending by one");
64          }
65        }
66      }
67  
68      Version(int code) {
69        this.code = code;
70      }
71  
72      boolean atLeast(Version other) {
73        return code <= other.code;
74      }
75  
76      static Version fromCode(int code) {
77        return byCode[code * -1];
78      }
79    }
80    
81    private static final Version VERSION = Version.INITIAL;
82    private TableName tableName;
83    private byte [] startRow;
84    private byte [] endRow;
85    private String regionLocation;
86    private String scan = ""; // stores the serialized form of the Scan
87    private long length; // Contains estimation of region size in bytes
88  
89    /** Default constructor. */
90    public TableSplit() {
91      this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
92        HConstants.EMPTY_BYTE_ARRAY, "");
93    }
94  
95    /**
96     * @deprecated As of release 0.96
97     *             (<a href="https://issues.apache.org/jira/browse/HBASE-9508">HBASE-9508</a>).
98     *             This will be removed in HBase 2.0.0.
99     *             Use {@link TableSplit#TableSplit(TableName, byte[], byte[], String)}.
100    */
101   @Deprecated
102   public TableSplit(final byte [] tableName, Scan scan, byte [] startRow, byte [] endRow,
103       final String location) {
104     this(TableName.valueOf(tableName), scan, startRow, endRow, location);
105   }
106 
107   /**
108    * Creates a new instance while assigning all variables.
109    * Length of region is set to 0
110    *
111    * @param tableName  The name of the current table.
112    * @param scan The scan associated with this split.
113    * @param startRow  The start row of the split.
114    * @param endRow  The end row of the split.
115    * @param location  The location of the region.
116    */
117   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
118                     final String location) {
119     this(tableName, scan, startRow, endRow, location, 0L);
120   }
121 
122   /**
123    * Creates a new instance while assigning all variables.
124    *
125    * @param tableName  The name of the current table.
126    * @param scan The scan associated with this split.
127    * @param startRow  The start row of the split.
128    * @param endRow  The end row of the split.
129    * @param location  The location of the region.
130    */
131   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
132       final String location, long length) {
133     this.tableName = tableName;
134     try {
135       this.scan =
136         (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
137     } catch (IOException e) {
138       LOG.warn("Failed to convert Scan to String", e);
139     }
140     this.startRow = startRow;
141     this.endRow = endRow;
142     this.regionLocation = location;
143     this.length = length;
144   }
145 
146   /**
147    * @deprecated As of release 0.96
148    *             (<a href="https://issues.apache.org/jira/browse/HBASE-9508">HBASE-9508</a>).
149    *             This will be removed in HBase 2.0.0.
150    *             Use {@link TableSplit#TableSplit(TableName, byte[], byte[], String)}.
151    */
152   @Deprecated
153   public TableSplit(final byte [] tableName, byte[] startRow, byte[] endRow,
154       final String location) {
155     this(TableName.valueOf(tableName), startRow, endRow, location);
156   }
157 
158   /**
159    * Creates a new instance without a scanner.
160    *
161    * @param tableName The name of the current table.
162    * @param startRow The start row of the split.
163    * @param endRow The end row of the split.
164    * @param location The location of the region.
165    */
166   public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
167       final String location) {
168     this(tableName, null, startRow, endRow, location);
169   }
170 
171   /**
172    * Creates a new instance without a scanner.
173    *
174    * @param tableName The name of the current table.
175    * @param startRow The start row of the split.
176    * @param endRow The end row of the split.
177    * @param location The location of the region.
178    * @param length Size of region in bytes
179    */
180   public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
181                     final String location, long length) {
182     this(tableName, null, startRow, endRow, location, length);
183   }
184 
185   /**
186    * Returns a Scan object from the stored string representation.
187    *
188    * @return Returns a Scan object based on the stored scanner.
189    * @throws IOException
190    */
191   public Scan getScan() throws IOException {
192     return TableMapReduceUtil.convertStringToScan(this.scan);
193   }
194 
195   /**
196    * Returns the table name converted to a byte array.
197    * @see #getTable()
198    * @return The table name.
199    */
200   public byte [] getTableName() {
201     return tableName.getName();
202   }
203 
204   /**
205    * Returns the table name.
206    *
207    * @return The table name.
208    */
209   public TableName getTable() {
210     // It is ugly that usually to get a TableName, the method is called getTableName.  We can't do
211     // that in here though because there was an existing getTableName in place already since
212     // deprecated.
213     return tableName;
214   }
215 
216   /**
217    * Returns the start row.
218    *
219    * @return The start row.
220    */
221   public byte [] getStartRow() {
222     return startRow;
223   }
224 
225   /**
226    * Returns the end row.
227    *
228    * @return The end row.
229    */
230   public byte [] getEndRow() {
231     return endRow;
232   }
233 
234   /**
235    * Returns the region location.
236    *
237    * @return The region's location.
238    */
239   public String getRegionLocation() {
240     return regionLocation;
241   }
242 
243   /**
244    * Returns the region's location as an array.
245    *
246    * @return The array containing the region location.
247    * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
248    */
249   @Override
250   public String[] getLocations() {
251     return new String[] {regionLocation};
252   }
253 
254   /**
255    * Returns the length of the split.
256    *
257    * @return The length of the split.
258    * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
259    */
260   @Override
261   public long getLength() {
262     return length;
263   }
264 
265   /**
266    * Reads the values of each field.
267    *
268    * @param in  The input to read from.
269    * @throws IOException When reading the input fails.
270    */
271   @Override
272   public void readFields(DataInput in) throws IOException {
273     Version version = Version.UNVERSIONED;
274     // TableSplit was not versioned in the beginning.
275     // In order to introduce it now, we make use of the fact
276     // that tableName was written with Bytes.writeByteArray,
277     // which encodes the array length as a vint which is >= 0.
278     // Hence if the vint is >= 0 we have an old version and the vint
279     // encodes the length of tableName.
280     // If < 0 we just read the version and the next vint is the length.
281     // @see Bytes#readByteArray(DataInput)
282     int len = WritableUtils.readVInt(in);
283     if (len < 0) {
284       // what we just read was the version
285       version = Version.fromCode(len);
286       len = WritableUtils.readVInt(in);
287     }
288     byte[] tableNameBytes = new byte[len];
289     in.readFully(tableNameBytes);
290     tableName = TableName.valueOf(tableNameBytes);
291     startRow = Bytes.readByteArray(in);
292     endRow = Bytes.readByteArray(in);
293     regionLocation = Bytes.toString(Bytes.readByteArray(in));
294     if (version.atLeast(Version.INITIAL)) {
295       scan = Bytes.toString(Bytes.readByteArray(in));
296     }
297     length = WritableUtils.readVLong(in);
298   }
299 
300   /**
301    * Writes the field values to the output.
302    *
303    * @param out  The output to write to.
304    * @throws IOException When writing the values to the output fails.
305    */
306   @Override
307   public void write(DataOutput out) throws IOException {
308     WritableUtils.writeVInt(out, VERSION.code);
309     Bytes.writeByteArray(out, tableName.getName());
310     Bytes.writeByteArray(out, startRow);
311     Bytes.writeByteArray(out, endRow);
312     Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
313     Bytes.writeByteArray(out, Bytes.toBytes(scan));
314     WritableUtils.writeVLong(out, length);
315   }
316 
317   /**
318    * Returns the details about this instance as a string.
319    *
320    * @return The values of this instance as a string.
321    * @see java.lang.Object#toString()
322    */
323   @Override
324   public String toString() {
325     StringBuilder sb = new StringBuilder();
326     sb.append("HBase table split(");
327     sb.append("table name: ").append(tableName);
328     sb.append(", scan: ").append(scan);
329     sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
330     sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
331     sb.append(", region location: ").append(regionLocation);
332     sb.append(")");
333     return sb.toString();
334   }
335 
336   /**
337    * Compares this split against the given one.
338    *
339    * @param split  The split to compare to.
340    * @return The result of the comparison.
341    * @see java.lang.Comparable#compareTo(java.lang.Object)
342    */
343   @Override
344   public int compareTo(TableSplit split) {
345     // If The table name of the two splits is the same then compare start row
346     // otherwise compare based on table names
347     int tableNameComparison =
348         getTable().compareTo(split.getTable());
349     return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
350         getStartRow(), split.getStartRow());
351   }
352 
353   @Override
354   public boolean equals(Object o) {
355     if (o == null || !(o instanceof TableSplit)) {
356       return false;
357     }
358     return tableName.equals(((TableSplit)o).tableName) &&
359       Bytes.equals(startRow, ((TableSplit)o).startRow) &&
360       Bytes.equals(endRow, ((TableSplit)o).endRow) &&
361       regionLocation.equals(((TableSplit)o).regionLocation);
362   }
363 
364     @Override
365     public int hashCode() {
366         int result = tableName != null ? tableName.hashCode() : 0;
367         result = 31 * result + (scan != null ? scan.hashCode() : 0);
368         result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
369         result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
370         result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
371         return result;
372     }
373 }