Source code

001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.mapreduce;
020
021import java.io.DataInput;
022import java.io.DataOutput;
023import java.io.IOException;
024import java.util.Arrays;
025import org.apache.hadoop.hbase.HConstants;
026import org.apache.hadoop.hbase.TableName;
027import org.apache.hadoop.hbase.client.Scan;
028import org.apache.hadoop.hbase.util.Bytes;
029import org.apache.hadoop.io.Writable;
030import org.apache.hadoop.io.WritableUtils;
031import org.apache.hadoop.mapreduce.InputSplit;
032import org.apache.yetus.audience.InterfaceAudience;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035
036/**
037 * A table split corresponds to a key range (low, high) and an optional scanner.
038 * All references to row below refer to the key of the row.
039 */
040@InterfaceAudience.Public
041public class TableSplit extends InputSplit
042  implements Writable, Comparable<TableSplit> {
043  /** @deprecated LOG variable would be made private. fix in hbase 3.0 */
044  @Deprecated
045  public static final Logger LOG = LoggerFactory.getLogger(TableSplit.class);
046
047  // should be < 0 (@see #readFields(DataInput))
048  // version 1 supports Scan data member
049  enum Version {
050    UNVERSIONED(0),
051    // Initial number we put on TableSplit when we introduced versioning.
052    INITIAL(-1),
053    // Added an encoded region name field for easier identification of split -> region
054    WITH_ENCODED_REGION_NAME(-2);
055
056    final int code;
057    static final Version[] byCode;
058    static {
059      byCode = Version.values();
060      for (int i = 0; i < byCode.length; i++) {
061        if (byCode[i].code != -1 * i) {
062          throw new AssertionError("Values in this enum should be descending by one");
063        }
064      }
065    }
066
067    Version(int code) {
068      this.code = code;
069    }
070
071    boolean atLeast(Version other) {
072      return code <= other.code;
073    }
074
075    static Version fromCode(int code) {
076      return byCode[code * -1];
077    }
078  }
079
080  private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME;
081  private TableName tableName;
082  private byte [] startRow;
083  private byte [] endRow;
084  private String regionLocation;
085  private String encodedRegionName = "";
086
087  /**
088   * The scan object may be null but the serialized form of scan is never null
089   * or empty since we serialize the scan object with default values then.
090   * Having no scanner in TableSplit doesn't necessarily mean there is no scanner
091   * for mapreduce job, it just means that we do not need to set it for each split.
092   * For example, it is not required to have a scan object for
093   * {@link org.apache.hadoop.hbase.mapred.TableInputFormatBase} since we use the scan from the
094   * job conf and scanner is supposed to be same for all the splits of table.
095   */
096  private String scan = ""; // stores the serialized form of the Scan
097  private long length; // Contains estimation of region size in bytes
098
099  /** Default constructor. */
100  public TableSplit() {
101    this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
102      HConstants.EMPTY_BYTE_ARRAY, "");
103  }
104
105  /**
106   * Creates a new instance while assigning all variables.
107   * Length of region is set to 0
108   * Encoded name of the region is set to blank
109   *
110   * @param tableName  The name of the current table.
111   * @param scan The scan associated with this split.
112   * @param startRow  The start row of the split.
113   * @param endRow  The end row of the split.
114   * @param location  The location of the region.
115   */
116  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
117                    final String location) {
118    this(tableName, scan, startRow, endRow, location, 0L);
119  }
120
121  /**
122   * Creates a new instance while assigning all variables.
123   * Encoded name of region is set to blank
124   *
125   * @param tableName  The name of the current table.
126   * @param scan The scan associated with this split.
127   * @param startRow  The start row of the split.
128   * @param endRow  The end row of the split.
129   * @param location  The location of the region.
130   */
131  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
132      final String location, long length) {
133    this(tableName, scan, startRow, endRow, location, "", length);
134  }
135
136  /**
137   * Creates a new instance while assigning all variables.
138   *
139   * @param tableName  The name of the current table.
140   * @param scan The scan associated with this split.
141   * @param startRow  The start row of the split.
142   * @param endRow  The end row of the split.
143   * @param encodedRegionName The region ID.
144   * @param location  The location of the region.
145   */
146  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
147      final String location, final String encodedRegionName, long length) {
148    this.tableName = tableName;
149    try {
150      this.scan =
151        (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
152    } catch (IOException e) {
153      LOG.warn("Failed to convert Scan to String", e);
154    }
155    this.startRow = startRow;
156    this.endRow = endRow;
157    this.regionLocation = location;
158    this.encodedRegionName = encodedRegionName;
159    this.length = length;
160  }
161
162  /**
163   * Creates a new instance without a scanner.
164   * Length of region is set to 0
165   *
166   * @param tableName The name of the current table.
167   * @param startRow The start row of the split.
168   * @param endRow The end row of the split.
169   * @param location The location of the region.
170   */
171  public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
172      final String location) {
173    this(tableName, null, startRow, endRow, location);
174  }
175
176  /**
177   * Creates a new instance without a scanner.
178   *
179   * @param tableName The name of the current table.
180   * @param startRow The start row of the split.
181   * @param endRow The end row of the split.
182   * @param location The location of the region.
183   * @param length Size of region in bytes
184   */
185  public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
186                    final String location, long length) {
187    this(tableName, null, startRow, endRow, location, length);
188  }
189
190  /**
191   * Returns a Scan object from the stored string representation.
192   *
193   * @return Returns a Scan object based on the stored scanner.
194   * @throws IOException throws IOException if deserialization fails
195   */
196  public Scan getScan() throws IOException {
197    return TableMapReduceUtil.convertStringToScan(this.scan);
198  }
199
200  /**
201   * Returns a scan string
202   * @return scan as string. Should be noted that this is not same as getScan().toString()
203   *    because Scan object will have the default values when empty scan string is
204   *    deserialized. Thus, getScan().toString() can never be empty
205   */
206  @InterfaceAudience.Private
207  public String getScanAsString() {
208    return this.scan;
209  }
210
211  /**
212   * Returns the table name converted to a byte array.
213   * @see #getTable()
214   * @return The table name.
215   */
216  public byte [] getTableName() {
217    return tableName.getName();
218  }
219
220  /**
221   * Returns the table name.
222   *
223   * @return The table name.
224   */
225  public TableName getTable() {
226    // It is ugly that usually to get a TableName, the method is called getTableName.  We can't do
227    // that in here though because there was an existing getTableName in place already since
228    // deprecated.
229    return tableName;
230  }
231
232  /**
233   * Returns the start row.
234   *
235   * @return The start row.
236   */
237  public byte [] getStartRow() {
238    return startRow;
239  }
240
241  /**
242   * Returns the end row.
243   *
244   * @return The end row.
245   */
246  public byte [] getEndRow() {
247    return endRow;
248  }
249
250  /**
251   * Returns the region location.
252   *
253   * @return The region's location.
254   */
255  public String getRegionLocation() {
256    return regionLocation;
257  }
258
259  /**
260   * Returns the region's location as an array.
261   *
262   * @return The array containing the region location.
263   * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
264   */
265  @Override
266  public String[] getLocations() {
267    return new String[] {regionLocation};
268  }
269
270  /**
271   * Returns the region's encoded name.
272   *
273   * @return The region's encoded name.
274   */
275  public String getEncodedRegionName() {
276    return encodedRegionName;
277  }
278
279  /**
280   * Returns the length of the split.
281   *
282   * @return The length of the split.
283   * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
284   */
285  @Override
286  public long getLength() {
287    return length;
288  }
289
290  /**
291   * Reads the values of each field.
292   *
293   * @param in  The input to read from.
294   * @throws IOException When reading the input fails.
295   */
296  @Override
297  public void readFields(DataInput in) throws IOException {
298    Version version = Version.UNVERSIONED;
299    // TableSplit was not versioned in the beginning.
300    // In order to introduce it now, we make use of the fact
301    // that tableName was written with Bytes.writeByteArray,
302    // which encodes the array length as a vint which is >= 0.
303    // Hence if the vint is >= 0 we have an old version and the vint
304    // encodes the length of tableName.
305    // If < 0 we just read the version and the next vint is the length.
306    // @see Bytes#readByteArray(DataInput)
307    int len = WritableUtils.readVInt(in);
308    if (len < 0) {
309      // what we just read was the version
310      version = Version.fromCode(len);
311      len = WritableUtils.readVInt(in);
312    }
313    byte[] tableNameBytes = new byte[len];
314    in.readFully(tableNameBytes);
315    tableName = TableName.valueOf(tableNameBytes);
316    startRow = Bytes.readByteArray(in);
317    endRow = Bytes.readByteArray(in);
318    regionLocation = Bytes.toString(Bytes.readByteArray(in));
319    if (version.atLeast(Version.INITIAL)) {
320      scan = Bytes.toString(Bytes.readByteArray(in));
321    }
322    length = WritableUtils.readVLong(in);
323    if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) {
324      encodedRegionName = Bytes.toString(Bytes.readByteArray(in));
325    }
326  }
327
328  /**
329   * Writes the field values to the output.
330   *
331   * @param out  The output to write to.
332   * @throws IOException When writing the values to the output fails.
333   */
334  @Override
335  public void write(DataOutput out) throws IOException {
336    WritableUtils.writeVInt(out, VERSION.code);
337    Bytes.writeByteArray(out, tableName.getName());
338    Bytes.writeByteArray(out, startRow);
339    Bytes.writeByteArray(out, endRow);
340    Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
341    Bytes.writeByteArray(out, Bytes.toBytes(scan));
342    WritableUtils.writeVLong(out, length);
343    Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName));
344  }
345
346  /**
347   * Returns the details about this instance as a string.
348   *
349   * @return The values of this instance as a string.
350   * @see java.lang.Object#toString()
351   */
352  @Override
353  public String toString() {
354    StringBuilder sb = new StringBuilder();
355    sb.append("HBase table split(");
356    sb.append("table name: ").append(tableName);
357    // null scan input is represented by ""
358    String printScan = "";
359    if (!scan.equals("")) {
360      try {
361        // get the real scan here in toString, not the Base64 string
362        printScan = TableMapReduceUtil.convertStringToScan(scan).toString();
363      }
364      catch (IOException e) {
365        printScan = "";
366      }
367    }
368    sb.append(", scan: ").append(printScan);
369    sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
370    sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
371    sb.append(", region location: ").append(regionLocation);
372    sb.append(", encoded region name: ").append(encodedRegionName);
373    sb.append(")");
374    return sb.toString();
375  }
376
377  /**
378   * Compares this split against the given one.
379   *
380   * @param split  The split to compare to.
381   * @return The result of the comparison.
382   * @see java.lang.Comparable#compareTo(java.lang.Object)
383   */
384  @Override
385  public int compareTo(TableSplit split) {
386    // If The table name of the two splits is the same then compare start row
387    // otherwise compare based on table names
388    int tableNameComparison =
389        getTable().compareTo(split.getTable());
390    return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
391        getStartRow(), split.getStartRow());
392  }
393
394  @Override
395  public boolean equals(Object o) {
396    if (o == null || !(o instanceof TableSplit)) {
397      return false;
398    }
399    return tableName.equals(((TableSplit)o).tableName) &&
400      Bytes.equals(startRow, ((TableSplit)o).startRow) &&
401      Bytes.equals(endRow, ((TableSplit)o).endRow) &&
402      regionLocation.equals(((TableSplit)o).regionLocation);
403  }
404
405  @Override
406  public int hashCode() {
407    int result = tableName != null ? tableName.hashCode() : 0;
408    result = 31 * result + (scan != null ? scan.hashCode() : 0);
409    result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
410    result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
411    result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
412    result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0);
413    return result;
414  }
415}