View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.mapreduce;
20  
21  import java.io.DataInput;
22  import java.io.DataOutput;
23  import java.io.IOException;
24  import java.util.Arrays;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.classification.InterfaceStability;
30  import org.apache.hadoop.hbase.TableName;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.client.Scan;
33  import org.apache.hadoop.hbase.util.Bytes;
34  import org.apache.hadoop.io.Writable;
35  import org.apache.hadoop.io.WritableUtils;
36  import org.apache.hadoop.mapreduce.InputSplit;
37  
38  /**
39   * A table split corresponds to a key range (low, high) and an optional scanner.
40   * All references to row below refer to the key of the row.
41   */
42  @InterfaceAudience.Public
43  @InterfaceStability.Evolving
44  public class TableSplit extends InputSplit
45  implements Writable, Comparable<TableSplit> {
46    /** @deprecated LOG variable would be made private. fix in hbase 3.0 */
47    @Deprecated
48    public static final Log LOG = LogFactory.getLog(TableSplit.class);
49  
50    // should be < 0 (@see #readFields(DataInput))
51    // version 1 supports Scan data member
52    enum Version {
53      UNVERSIONED(0),
54      // Initial number we put on TableSplit when we introduced versioning.
55      INITIAL(-1),
56      // Added an encoded region name field for easier identification of split -> region
57      WITH_ENCODED_REGION_NAME(-2);
58  
59      final int code;
60      static final Version[] byCode;
61      static {
62        byCode = Version.values();
63        for (int i = 0; i < byCode.length; i++) {
64          if (byCode[i].code != -1 * i) {
65            throw new AssertionError("Values in this enum should be descending by one");
66          }
67        }
68      }
69  
70      Version(int code) {
71        this.code = code;
72      }
73  
74      boolean atLeast(Version other) {
75        return code <= other.code;
76      }
77  
78      static Version fromCode(int code) {
79        return byCode[code * -1];
80      }
81    }
82  
83    private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME;
84    private TableName tableName;
85    private byte [] startRow;
86    private byte [] endRow;
87    private String regionLocation;
88    private String encodedRegionName = "";
89    private String scan = ""; // stores the serialized form of the Scan
90    private long length; // Contains estimation of region size in bytes
91  
92    /** Default constructor. */
93    public TableSplit() {
94      this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
95        HConstants.EMPTY_BYTE_ARRAY, "");
96    }
97  
98    /**
99     * Creates a new instance while assigning all variables.
100    * Length of region is set to 0
101    * Encoded name of the region is set to blank
102    *
103    * @param tableName  The name of the current table.
104    * @param scan The scan associated with this split.
105    * @param startRow  The start row of the split.
106    * @param endRow  The end row of the split.
107    * @param location  The location of the region.
108    */
109   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
110                     final String location) {
111     this(tableName, scan, startRow, endRow, location, 0L);
112   }
113 
114   /**
115    * Creates a new instance while assigning all variables.
116    * Encoded name of region is set to blank
117    *
118    * @param tableName  The name of the current table.
119    * @param scan The scan associated with this split.
120    * @param startRow  The start row of the split.
121    * @param endRow  The end row of the split.
122    * @param location  The location of the region.
123    */
124   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
125       final String location, long length) {
126     this(tableName, scan, startRow, endRow, location, "", length);
127   }
128 
129   /**
130    * Creates a new instance while assigning all variables.
131    *
132    * @param tableName  The name of the current table.
133    * @param scan The scan associated with this split.
134    * @param startRow  The start row of the split.
135    * @param endRow  The end row of the split.
136    * @param encodedRegionName The region ID.
137    * @param location  The location of the region.
138    */
139   public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
140       final String location, final String encodedRegionName, long length) {
141     this.tableName = tableName;
142     try {
143       this.scan =
144         (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
145     } catch (IOException e) {
146       LOG.warn("Failed to convert Scan to String", e);
147     }
148     this.startRow = startRow;
149     this.endRow = endRow;
150     this.regionLocation = location;
151     this.encodedRegionName = encodedRegionName;
152     this.length = length;
153   }
154 
155   /**
156    * Creates a new instance without a scanner.
157    * Length of region is set to 0
158    *
159    * @param tableName The name of the current table.
160    * @param startRow The start row of the split.
161    * @param endRow The end row of the split.
162    * @param location The location of the region.
163    */
164   public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
165       final String location) {
166     this(tableName, null, startRow, endRow, location);
167   }
168 
169   /**
170    * Creates a new instance without a scanner.
171    *
172    * @param tableName The name of the current table.
173    * @param startRow The start row of the split.
174    * @param endRow The end row of the split.
175    * @param location The location of the region.
176    * @param length Size of region in bytes
177    */
178   public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
179                     final String location, long length) {
180     this(tableName, null, startRow, endRow, location, length);
181   }
182 
183   /**
184    * Returns a Scan object from the stored string representation.
185    *
186    * @return Returns a Scan object based on the stored scanner.
187    * @throws IOException
188    */
189   public Scan getScan() throws IOException {
190     return TableMapReduceUtil.convertStringToScan(this.scan);
191   }
192 
193   /**
194    * Returns the table name converted to a byte array.
195    * @see #getTable()
196    * @return The table name.
197    */
198   public byte [] getTableName() {
199     return tableName.getName();
200   }
201 
202   /**
203    * Returns the table name.
204    *
205    * @return The table name.
206    */
207   public TableName getTable() {
208     // It is ugly that usually to get a TableName, the method is called getTableName.  We can't do
209     // that in here though because there was an existing getTableName in place already since
210     // deprecated.
211     return tableName;
212   }
213 
214   /**
215    * Returns the start row.
216    *
217    * @return The start row.
218    */
219   public byte [] getStartRow() {
220     return startRow;
221   }
222 
223   /**
224    * Returns the end row.
225    *
226    * @return The end row.
227    */
228   public byte [] getEndRow() {
229     return endRow;
230   }
231 
232   /**
233    * Returns the region location.
234    *
235    * @return The region's location.
236    */
237   public String getRegionLocation() {
238     return regionLocation;
239   }
240 
241   /**
242    * Returns the region's location as an array.
243    *
244    * @return The array containing the region location.
245    * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
246    */
247   @Override
248   public String[] getLocations() {
249     return new String[] {regionLocation};
250   }
251 
252   /**
253    * Returns the region's encoded name.
254    *
255    * @return The region's encoded name.
256    */
257   public String getEncodedRegionName() {
258     return encodedRegionName;
259   }
260 
261   /**
262    * Returns the length of the split.
263    *
264    * @return The length of the split.
265    * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
266    */
267   @Override
268   public long getLength() {
269     return length;
270   }
271 
272   /**
273    * Reads the values of each field.
274    *
275    * @param in  The input to read from.
276    * @throws IOException When reading the input fails.
277    */
278   @Override
279   public void readFields(DataInput in) throws IOException {
280     Version version = Version.UNVERSIONED;
281     // TableSplit was not versioned in the beginning.
282     // In order to introduce it now, we make use of the fact
283     // that tableName was written with Bytes.writeByteArray,
284     // which encodes the array length as a vint which is >= 0.
285     // Hence if the vint is >= 0 we have an old version and the vint
286     // encodes the length of tableName.
287     // If < 0 we just read the version and the next vint is the length.
288     // @see Bytes#readByteArray(DataInput)
289     int len = WritableUtils.readVInt(in);
290     if (len < 0) {
291       // what we just read was the version
292       version = Version.fromCode(len);
293       len = WritableUtils.readVInt(in);
294     }
295     byte[] tableNameBytes = new byte[len];
296     in.readFully(tableNameBytes);
297     tableName = TableName.valueOf(tableNameBytes);
298     startRow = Bytes.readByteArray(in);
299     endRow = Bytes.readByteArray(in);
300     regionLocation = Bytes.toString(Bytes.readByteArray(in));
301     if (version.atLeast(Version.INITIAL)) {
302       scan = Bytes.toString(Bytes.readByteArray(in));
303     }
304     length = WritableUtils.readVLong(in);
305     if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) {
306       encodedRegionName = Bytes.toString(Bytes.readByteArray(in));
307     }
308   }
309 
310   /**
311    * Writes the field values to the output.
312    *
313    * @param out  The output to write to.
314    * @throws IOException When writing the values to the output fails.
315    */
316   @Override
317   public void write(DataOutput out) throws IOException {
318     WritableUtils.writeVInt(out, VERSION.code);
319     Bytes.writeByteArray(out, tableName.getName());
320     Bytes.writeByteArray(out, startRow);
321     Bytes.writeByteArray(out, endRow);
322     Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
323     Bytes.writeByteArray(out, Bytes.toBytes(scan));
324     WritableUtils.writeVLong(out, length);
325     Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName));
326   }
327 
328   /**
329    * Returns the details about this instance as a string.
330    *
331    * @return The values of this instance as a string.
332    * @see java.lang.Object#toString()
333    */
334   @Override
335   public String toString() {
336     StringBuilder sb = new StringBuilder();
337     sb.append("HBase table split(");
338     sb.append("table name: ").append(tableName);
339     // null scan input is represented by ""
340     String printScan = "";
341     if (!scan.equals("")) {
342       try {
343         // get the real scan here in toString, not the Base64 string
344         printScan = TableMapReduceUtil.convertStringToScan(scan).toString();
345       }
346       catch (IOException e) {
347         printScan = "";
348       }
349     }
350     sb.append(", scan: ").append(printScan);
351     sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
352     sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
353     sb.append(", region location: ").append(regionLocation);
354     sb.append(", encoded region name: ").append(encodedRegionName);
355     sb.append(")");
356     return sb.toString();
357   }
358 
359   /**
360    * Compares this split against the given one.
361    *
362    * @param split  The split to compare to.
363    * @return The result of the comparison.
364    * @see java.lang.Comparable#compareTo(java.lang.Object)
365    */
366   @Override
367   public int compareTo(TableSplit split) {
368     // If The table name of the two splits is the same then compare start row
369     // otherwise compare based on table names
370     int tableNameComparison =
371         getTable().compareTo(split.getTable());
372     return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
373         getStartRow(), split.getStartRow());
374   }
375 
376   @Override
377   public boolean equals(Object o) {
378     if (o == null || !(o instanceof TableSplit)) {
379       return false;
380     }
381     return tableName.equals(((TableSplit)o).tableName) &&
382       Bytes.equals(startRow, ((TableSplit)o).startRow) &&
383       Bytes.equals(endRow, ((TableSplit)o).endRow) &&
384       regionLocation.equals(((TableSplit)o).regionLocation);
385   }
386 
387   @Override
388   public int hashCode() {
389     int result = tableName != null ? tableName.hashCode() : 0;
390     result = 31 * result + (scan != null ? scan.hashCode() : 0);
391     result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
392     result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
393     result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
394     result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0);
395     return result;
396   }
397 }