View Javadoc

1   /**
2    * Copyright 2007 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.mapreduce;
21  
22  import java.io.DataInput;
23  import java.io.DataOutput;
24  import java.io.IOException;
25  import java.util.Arrays;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.HConstants;
30  import org.apache.hadoop.hbase.client.Scan;
31  import org.apache.hadoop.hbase.util.Bytes;
32  import org.apache.hadoop.io.Writable;
33  import org.apache.hadoop.io.WritableUtils;
34  import org.apache.hadoop.mapreduce.InputSplit;
35  
36  /**
37   * A table split corresponds to a key range (low, high) and an optional scanner.
38   * All references to row below refer to the key of the row.
39   */
40  public class TableSplit extends InputSplit
41  implements Writable, Comparable<TableSplit> { 
42    public static final Log LOG = LogFactory.getLog(TableSplit.class);
43  
44    // should be < 0 (@see #readFields(DataInput))
45    // version 1 supports Scan data member
46    enum Version {
47      UNVERSIONED(0),
48      // Initial number we put on TableSplit when we introduced versioning.
49      INITIAL(-1);
50  
51      final int code;
52      static final Version[] byCode;
53      static {
54        byCode = Version.values();
55        for (int i = 0; i < byCode.length; i++) {
56          if (byCode[i].code != -1 * i) {
57            throw new AssertionError("Values in this enum should be descending by one");
58          }
59        }
60      }
61  
62      Version(int code) {
63        this.code = code;
64      }
65  
66      boolean atLeast(Version other) {
67        return code <= other.code;
68      }
69  
70      static Version fromCode(int code) {
71        return byCode[code * -1];
72      }
73    }
74  
75    private static final Version VERSION = Version.INITIAL;
76  
77    private byte [] tableName;
78    private byte [] startRow;
79    private byte [] endRow;
80    private String regionLocation;
81    private String scan = ""; // stores the serialized form of the Scan
82  
83    /** Default constructor. */
84    public TableSplit() {
85      this(HConstants.EMPTY_BYTE_ARRAY, null, HConstants.EMPTY_BYTE_ARRAY,
86        HConstants.EMPTY_BYTE_ARRAY, "");
87    }
88  
89    /**
90     * Creates a new instance while assigning all variables.
91     *
92     * @param tableName  The name of the current table.
93     * @param scan The scan associated with this split.
94     * @param startRow  The start row of the split.
95     * @param endRow  The end row of the split.
96     * @param location  The location of the region.
97     */
98    public TableSplit(byte [] tableName, Scan scan, byte [] startRow, byte [] endRow,
99        final String location) {
100     this.tableName = tableName;
101     try {
102       this.scan =
103         (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
104     } catch (IOException e) {
105       LOG.warn("Failed to convert Scan to String", e);
106     }
107     this.startRow = startRow;
108     this.endRow = endRow;
109     this.regionLocation = location;
110   }
111   
112   /**
113    * Creates a new instance without a scanner.
114    *
115    * @param tableName The name of the current table.
116    * @param startRow The start row of the split.
117    * @param endRow The end row of the split.
118    * @param location The location of the region.
119    */
120   public TableSplit(byte[] tableName, byte[] startRow, byte[] endRow,
121       final String location) {
122     this(tableName, null, startRow, endRow, location);
123   }
124 
125   /**
126    * Returns a Scan object from the stored string representation.
127    *
128    * @return Returns a Scan object based on the stored scanner.
129    * @throws IOException
130    */
131   public Scan getScan() throws IOException {
132     return TableMapReduceUtil.convertStringToScan(this.scan);
133   }
134 
135   /**
136    * Returns the table name.
137    *
138    * @return The table name.
139    */
140   public byte [] getTableName() {
141     return tableName;
142   }
143 
144   /**
145    * Returns the start row.
146    *
147    * @return The start row.
148    */
149   public byte [] getStartRow() {
150     return startRow;
151   }
152 
153   /**
154    * Returns the end row.
155    *
156    * @return The end row.
157    */
158   public byte [] getEndRow() {
159     return endRow;
160   }
161 
162   /**
163    * Returns the region location.
164    *
165    * @return The region's location.
166    */
167   public String getRegionLocation() {
168     return regionLocation;
169   }
170 
171   /**
172    * Returns the region's location as an array.
173    *
174    * @return The array containing the region location.
175    * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
176    */
177   @Override
178   public String[] getLocations() {
179     return new String[] {regionLocation};
180   }
181 
182   /**
183    * Returns the length of the split.
184    *
185    * @return The length of the split.
186    * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
187    */
188   @Override
189   public long getLength() {
190     // Not clear how to obtain this... seems to be used only for sorting splits
191     return 0;
192   }
193 
194   /**
195    * Reads the values of each field.
196    *
197    * @param in  The input to read from.
198    * @throws IOException When reading the input fails.
199    */
200   @Override
201   public void readFields(DataInput in) throws IOException {
202     Version version = Version.UNVERSIONED;
203     // TableSplit was not versioned in the beginning.
204     // In order to introduce it now, we make use of the fact
205     // that tableName was written with Bytes.writeByteArray,
206     // which encodes the array length as a vint which is >= 0.
207     // Hence if the vint is >= 0 we have an old version and the vint
208     // encodes the length of tableName.
209     // If < 0 we just read the version and the next vint is the length.
210     // @see Bytes#readByteArray(DataInput)
211     int len = WritableUtils.readVInt(in);
212     if (len < 0) {
213       // what we just read was the version
214       version = Version.fromCode(len);
215       len = WritableUtils.readVInt(in);
216     }
217     tableName = new byte[len];
218     in.readFully(tableName);
219     startRow = Bytes.readByteArray(in);
220     endRow = Bytes.readByteArray(in);
221     regionLocation = Bytes.toString(Bytes.readByteArray(in));
222     if (version.atLeast(Version.INITIAL)) {
223       scan = Bytes.toString(Bytes.readByteArray(in));
224     }
225   }
226 
227   /**
228    * Writes the field values to the output.
229    *
230    * @param out  The output to write to.
231    * @throws IOException When writing the values to the output fails.
232    */
233   @Override
234   public void write(DataOutput out) throws IOException {
235     WritableUtils.writeVInt(out, VERSION.code);
236     Bytes.writeByteArray(out, tableName);
237     Bytes.writeByteArray(out, startRow);
238     Bytes.writeByteArray(out, endRow);
239     Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
240     Bytes.writeByteArray(out, Bytes.toBytes(scan));
241   }
242 
243   /**
244    * Returns the details about this instance as a string.
245    *
246    * @return The values of this instance as a string.
247    * @see java.lang.Object#toString()
248    */
249   @Override
250   public String toString() {
251     return regionLocation + ":" +
252       Bytes.toStringBinary(startRow) + "," + Bytes.toStringBinary(endRow);
253   }
254 
255   /**
256    * Compares this split against the given one.
257    *
258    * @param split  The split to compare to.
259    * @return The result of the comparison.
260    * @see java.lang.Comparable#compareTo(java.lang.Object)
261    */
262   @Override
263   public int compareTo(TableSplit split) {
264     // If The table name of the two splits is the same then compare start row
265     // otherwise compare based on table names
266     int tableNameComparison =
267         Bytes.compareTo(getTableName(), split.getTableName());
268     return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
269         getStartRow(), split.getStartRow());
270   }
271 
272   @Override
273   public boolean equals(Object o) {
274     if (o == null || !(o instanceof TableSplit)) {
275       return false;
276     }
277     return Bytes.equals(tableName, ((TableSplit)o).tableName) &&
278       Bytes.equals(startRow, ((TableSplit)o).startRow) &&
279       Bytes.equals(endRow, ((TableSplit)o).endRow) &&
280       regionLocation.equals(((TableSplit)o).regionLocation);
281   }
282 
283     @Override
284     public int hashCode() {
285         int result = tableName != null ? Arrays.hashCode(tableName) : 0;
286         result = 31 * result + (scan != null ? scan.hashCode() : 0);
287         result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
288         result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
289         result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
290         return result;
291     }
292 }