View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.DataInput;
22  import java.io.DataOutput;
23  import java.io.IOException;
24  import java.util.Arrays;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.classification.InterfaceStability;
30  import org.apache.hadoop.hbase.TableName;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.client.Scan;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.io.Writable;
35  import org.apache.hadoop.io.WritableUtils;
36  import org.apache.hadoop.mapreduce.InputSplit;
37  
38  /**
39   * A table split corresponds to a key range (low, high) and an optional scanner.
40   * All references to row below refer to the key of the row.
41   */
42  @InterfaceAudience.Public
43  @InterfaceStability.Evolving
44  public class TableSplit extends InputSplit
45  implements Writable, Comparable<TableSplit> {
46    /** @deprecated LOG variable would be made private. */
47    @Deprecated
48    public static final Log LOG = LogFactory.getLog(TableSplit.class);
49  
50    // should be < 0 (@see #readFields(DataInput))
51    // version 1 supports Scan data member
52    enum Version {
53      UNVERSIONED(0),
54      // Initial number we put on TableSplit when we introduced versioning.
55      INITIAL(-1);
56  
57      final int code;
58      static final Version[] byCode;
59      static {
60        byCode = Version.values();
61        for (int i = 0; i < byCode.length; i++) {
62          if (byCode[i].code != -1 * i) {
63            throw new AssertionError("Values in this enum should be descending by one");
64          }
65        }
66      }
67  
68      Version(int code) {
69        this.code = code;
70      }
71  
72      boolean atLeast(Version other) {
73        return code <= other.code;
74      }
75  
76      static Version fromCode(int code) {
77        return byCode[code * -1];
78      }
79    }
80  
81    private static final Version VERSION = Version.INITIAL;
82    private TableName tableName;
83    private byte [] startRow;
84    private byte [] endRow;
85    private String regionLocation;
86    private String scan = ""; // stores the serialized form of the Scan
87    private long length; // Contains estimation of region size in bytes
88  
89    /** Default constructor. */
90    public TableSplit() {
91      this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
92        HConstants.EMPTY_BYTE_ARRAY, "");
93    }
94  
95    /**
96     * Creates a new instance while assigning all variables.
97     * Length of region is set to 0
98     *
99     * @param tableName  The name of the current table.
100    * @param scan The scan associated with this split.
101    * @param startRow  The start row of the split.
102    * @param endRow  The end row of the split.
103    * @param location  The location of the region.
104    */
105   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
106                     final String location) {
107     this(tableName, scan, startRow, endRow, location, 0L);
108   }
109 
110   /**
111    * Creates a new instance while assigning all variables.
112    *
113    * @param tableName  The name of the current table.
114    * @param scan The scan associated with this split.
115    * @param startRow  The start row of the split.
116    * @param endRow  The end row of the split.
117    * @param location  The location of the region.
118    */
119   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
120       final String location, long length) {
121     this.tableName = tableName;
122     try {
123       this.scan =
124         (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
125     } catch (IOException e) {
126       LOG.warn("Failed to convert Scan to String", e);
127     }
128     this.startRow = startRow;
129     this.endRow = endRow;
130     this.regionLocation = location;
131     this.length = length;
132   }
133 
134   /**
135    * Creates a new instance without a scanner.
136    *
137    * @param tableName The name of the current table.
138    * @param startRow The start row of the split.
139    * @param endRow The end row of the split.
140    * @param location The location of the region.
141    */
142   public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
143       final String location) {
144     this(tableName, null, startRow, endRow, location);
145   }
146 
147   /**
148    * Creates a new instance without a scanner.
149    *
150    * @param tableName The name of the current table.
151    * @param startRow The start row of the split.
152    * @param endRow The end row of the split.
153    * @param location The location of the region.
154    * @param length Size of region in bytes
155    */
156   public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
157                     final String location, long length) {
158     this(tableName, null, startRow, endRow, location, length);
159   }
160 
161   /**
162    * Returns a Scan object from the stored string representation.
163    *
164    * @return Returns a Scan object based on the stored scanner.
165    * @throws IOException
166    */
167   public Scan getScan() throws IOException {
168     return TableMapReduceUtil.convertStringToScan(this.scan);
169   }
170 
171   /**
172    * Returns the table name converted to a byte array.
173    * @see #getTable()
174    * @return The table name.
175    */
176   public byte [] getTableName() {
177     return tableName.getName();
178   }
179 
180   /**
181    * Returns the table name.
182    *
183    * @return The table name.
184    */
185   public TableName getTable() {
186     // It is ugly that usually to get a TableName, the method is called getTableName.  We can't do
187     // that in here though because there was an existing getTableName in place already since
188     // deprecated.
189     return tableName;
190   }
191 
192   /**
193    * Returns the start row.
194    *
195    * @return The start row.
196    */
197   public byte [] getStartRow() {
198     return startRow;
199   }
200 
201   /**
202    * Returns the end row.
203    *
204    * @return The end row.
205    */
206   public byte [] getEndRow() {
207     return endRow;
208   }
209 
210   /**
211    * Returns the region location.
212    *
213    * @return The region's location.
214    */
215   public String getRegionLocation() {
216     return regionLocation;
217   }
218 
219   /**
220    * Returns the region's location as an array.
221    *
222    * @return The array containing the region location.
223    * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
224    */
225   @Override
226   public String[] getLocations() {
227     return new String[] {regionLocation};
228   }
229 
230   /**
231    * Returns the length of the split.
232    *
233    * @return The length of the split.
234    * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
235    */
236   @Override
237   public long getLength() {
238     return length;
239   }
240 
241   /**
242    * Reads the values of each field.
243    *
244    * @param in  The input to read from.
245    * @throws IOException When reading the input fails.
246    */
247   @Override
248   public void readFields(DataInput in) throws IOException {
249     Version version = Version.UNVERSIONED;
250     // TableSplit was not versioned in the beginning.
251     // In order to introduce it now, we make use of the fact
252     // that tableName was written with Bytes.writeByteArray,
253     // which encodes the array length as a vint which is >= 0.
254     // Hence if the vint is >= 0 we have an old version and the vint
255     // encodes the length of tableName.
256     // If < 0 we just read the version and the next vint is the length.
257     // @see Bytes#readByteArray(DataInput)
258     int len = WritableUtils.readVInt(in);
259     if (len < 0) {
260       // what we just read was the version
261       version = Version.fromCode(len);
262       len = WritableUtils.readVInt(in);
263     }
264     byte[] tableNameBytes = new byte[len];
265     in.readFully(tableNameBytes);
266     tableName = TableName.valueOf(tableNameBytes);
267     startRow = Bytes.readByteArray(in);
268     endRow = Bytes.readByteArray(in);
269     regionLocation = Bytes.toString(Bytes.readByteArray(in));
270     if (version.atLeast(Version.INITIAL)) {
271       scan = Bytes.toString(Bytes.readByteArray(in));
272     }
273     length = WritableUtils.readVLong(in);
274   }
275 
276   /**
277    * Writes the field values to the output.
278    *
279    * @param out  The output to write to.
280    * @throws IOException When writing the values to the output fails.
281    */
282   @Override
283   public void write(DataOutput out) throws IOException {
284     WritableUtils.writeVInt(out, VERSION.code);
285     Bytes.writeByteArray(out, tableName.getName());
286     Bytes.writeByteArray(out, startRow);
287     Bytes.writeByteArray(out, endRow);
288     Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
289     Bytes.writeByteArray(out, Bytes.toBytes(scan));
290     WritableUtils.writeVLong(out, length);
291   }
292 
293   /**
294    * Returns the details about this instance as a string.
295    *
296    * @return The values of this instance as a string.
297    * @see java.lang.Object#toString()
298    */
299   @Override
300   public String toString() {
301     StringBuilder sb = new StringBuilder();
302     sb.append("HBase table split(");
303     sb.append("table name: ").append(tableName);
304     sb.append(", scan: ").append(scan);
305     sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
306     sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
307     sb.append(", region location: ").append(regionLocation);
308     sb.append(")");
309     return sb.toString();
310   }
311 
312   /**
313    * Compares this split against the given one.
314    *
315    * @param split  The split to compare to.
316    * @return The result of the comparison.
317    * @see java.lang.Comparable#compareTo(java.lang.Object)
318    */
319   @Override
320   public int compareTo(TableSplit split) {
321     // If The table name of the two splits is the same then compare start row
322     // otherwise compare based on table names
323     int tableNameComparison =
324         getTable().compareTo(split.getTable());
325     return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
326         getStartRow(), split.getStartRow());
327   }
328 
329   @Override
330   public boolean equals(Object o) {
331     if (o == null || !(o instanceof TableSplit)) {
332       return false;
333     }
334     return tableName.equals(((TableSplit)o).tableName) &&
335       Bytes.equals(startRow, ((TableSplit)o).startRow) &&
336       Bytes.equals(endRow, ((TableSplit)o).endRow) &&
337       regionLocation.equals(((TableSplit)o).regionLocation);
338   }
339 
340     @Override
341     public int hashCode() {
342         int result = tableName != null ? tableName.hashCode() : 0;
343         result = 31 * result + (scan != null ? scan.hashCode() : 0);
344         result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
345         result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
346         result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
347         return result;
348     }
349 }