Source code

001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.mapreduce;
020
021import java.io.DataInput;
022import java.io.DataOutput;
023import java.io.IOException;
024import java.util.Arrays;
025
026import org.apache.yetus.audience.InterfaceAudience;
027import org.slf4j.Logger;
028import org.slf4j.LoggerFactory;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.HConstants;
031import org.apache.hadoop.hbase.client.Scan;
032import org.apache.hadoop.hbase.util.Bytes;
033import org.apache.hadoop.io.Writable;
034import org.apache.hadoop.io.WritableUtils;
035import org.apache.hadoop.mapreduce.InputSplit;
036
037/**
038 * A table split corresponds to a key range (low, high) and an optional scanner.
039 * All references to row below refer to the key of the row.
040 */
041@InterfaceAudience.Public
042public class TableSplit extends InputSplit
043implements Writable, Comparable<TableSplit> {
044  /** @deprecated LOG variable would be made private. fix in hbase 3.0 */
045  @Deprecated
046  public static final Logger LOG = LoggerFactory.getLogger(TableSplit.class);
047
048  // should be < 0 (@see #readFields(DataInput))
049  // version 1 supports Scan data member
050  enum Version {
051    UNVERSIONED(0),
052    // Initial number we put on TableSplit when we introduced versioning.
053    INITIAL(-1),
054    // Added an encoded region name field for easier identification of split -> region
055    WITH_ENCODED_REGION_NAME(-2);
056
057    final int code;
058    static final Version[] byCode;
059    static {
060      byCode = Version.values();
061      for (int i = 0; i < byCode.length; i++) {
062        if (byCode[i].code != -1 * i) {
063          throw new AssertionError("Values in this enum should be descending by one");
064        }
065      }
066    }
067
068    Version(int code) {
069      this.code = code;
070    }
071
072    boolean atLeast(Version other) {
073      return code <= other.code;
074    }
075
076    static Version fromCode(int code) {
077      return byCode[code * -1];
078    }
079  }
080
081  private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME;
082  private TableName tableName;
083  private byte [] startRow;
084  private byte [] endRow;
085  private String regionLocation;
086  private String encodedRegionName = "";
087  private String scan = ""; // stores the serialized form of the Scan
088  private long length; // Contains estimation of region size in bytes
089
090  /** Default constructor. */
091  public TableSplit() {
092    this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY,
093      HConstants.EMPTY_BYTE_ARRAY, "");
094  }
095
096  /**
097   * Creates a new instance while assigning all variables.
098   * Length of region is set to 0
099   * Encoded name of the region is set to blank
100   *
101   * @param tableName  The name of the current table.
102   * @param scan The scan associated with this split.
103   * @param startRow  The start row of the split.
104   * @param endRow  The end row of the split.
105   * @param location  The location of the region.
106   */
107  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
108                    final String location) {
109    this(tableName, scan, startRow, endRow, location, 0L);
110  }
111
112  /**
113   * Creates a new instance while assigning all variables.
114   * Encoded name of region is set to blank
115   *
116   * @param tableName  The name of the current table.
117   * @param scan The scan associated with this split.
118   * @param startRow  The start row of the split.
119   * @param endRow  The end row of the split.
120   * @param location  The location of the region.
121   */
122  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
123      final String location, long length) {
124    this(tableName, scan, startRow, endRow, location, "", length);
125  }
126
127  /**
128   * Creates a new instance while assigning all variables.
129   *
130   * @param tableName  The name of the current table.
131   * @param scan The scan associated with this split.
132   * @param startRow  The start row of the split.
133   * @param endRow  The end row of the split.
134   * @param encodedRegionName The region ID.
135   * @param location  The location of the region.
136   */
137  public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow,
138      final String location, final String encodedRegionName, long length) {
139    this.tableName = tableName;
140    try {
141      this.scan =
142        (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan);
143    } catch (IOException e) {
144      LOG.warn("Failed to convert Scan to String", e);
145    }
146    this.startRow = startRow;
147    this.endRow = endRow;
148    this.regionLocation = location;
149    this.encodedRegionName = encodedRegionName;
150    this.length = length;
151  }
152
153  /**
154   * Creates a new instance without a scanner.
155   * Length of region is set to 0
156   *
157   * @param tableName The name of the current table.
158   * @param startRow The start row of the split.
159   * @param endRow The end row of the split.
160   * @param location The location of the region.
161   */
162  public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
163      final String location) {
164    this(tableName, null, startRow, endRow, location);
165  }
166
167  /**
168   * Creates a new instance without a scanner.
169   *
170   * @param tableName The name of the current table.
171   * @param startRow The start row of the split.
172   * @param endRow The end row of the split.
173   * @param location The location of the region.
174   * @param length Size of region in bytes
175   */
176  public TableSplit(TableName tableName, byte[] startRow, byte[] endRow,
177                    final String location, long length) {
178    this(tableName, null, startRow, endRow, location, length);
179  }
180
181  /**
182   * Returns a Scan object from the stored string representation.
183   *
184   * @return Returns a Scan object based on the stored scanner.
185   * @throws IOException
186   */
187  public Scan getScan() throws IOException {
188    return TableMapReduceUtil.convertStringToScan(this.scan);
189  }
190
191  /**
192   * Returns the table name converted to a byte array.
193   * @see #getTable()
194   * @return The table name.
195   */
196  public byte [] getTableName() {
197    return tableName.getName();
198  }
199
200  /**
201   * Returns the table name.
202   *
203   * @return The table name.
204   */
205  public TableName getTable() {
206    // It is ugly that usually to get a TableName, the method is called getTableName.  We can't do
207    // that in here though because there was an existing getTableName in place already since
208    // deprecated.
209    return tableName;
210  }
211
212  /**
213   * Returns the start row.
214   *
215   * @return The start row.
216   */
217  public byte [] getStartRow() {
218    return startRow;
219  }
220
221  /**
222   * Returns the end row.
223   *
224   * @return The end row.
225   */
226  public byte [] getEndRow() {
227    return endRow;
228  }
229
230  /**
231   * Returns the region location.
232   *
233   * @return The region's location.
234   */
235  public String getRegionLocation() {
236    return regionLocation;
237  }
238
239  /**
240   * Returns the region's location as an array.
241   *
242   * @return The array containing the region location.
243   * @see org.apache.hadoop.mapreduce.InputSplit#getLocations()
244   */
245  @Override
246  public String[] getLocations() {
247    return new String[] {regionLocation};
248  }
249
250  /**
251   * Returns the region's encoded name.
252   *
253   * @return The region's encoded name.
254   */
255  public String getEncodedRegionName() {
256    return encodedRegionName;
257  }
258
259  /**
260   * Returns the length of the split.
261   *
262   * @return The length of the split.
263   * @see org.apache.hadoop.mapreduce.InputSplit#getLength()
264   */
265  @Override
266  public long getLength() {
267    return length;
268  }
269
270  /**
271   * Reads the values of each field.
272   *
273   * @param in  The input to read from.
274   * @throws IOException When reading the input fails.
275   */
276  @Override
277  public void readFields(DataInput in) throws IOException {
278    Version version = Version.UNVERSIONED;
279    // TableSplit was not versioned in the beginning.
280    // In order to introduce it now, we make use of the fact
281    // that tableName was written with Bytes.writeByteArray,
282    // which encodes the array length as a vint which is >= 0.
283    // Hence if the vint is >= 0 we have an old version and the vint
284    // encodes the length of tableName.
285    // If < 0 we just read the version and the next vint is the length.
286    // @see Bytes#readByteArray(DataInput)
287    int len = WritableUtils.readVInt(in);
288    if (len < 0) {
289      // what we just read was the version
290      version = Version.fromCode(len);
291      len = WritableUtils.readVInt(in);
292    }
293    byte[] tableNameBytes = new byte[len];
294    in.readFully(tableNameBytes);
295    tableName = TableName.valueOf(tableNameBytes);
296    startRow = Bytes.readByteArray(in);
297    endRow = Bytes.readByteArray(in);
298    regionLocation = Bytes.toString(Bytes.readByteArray(in));
299    if (version.atLeast(Version.INITIAL)) {
300      scan = Bytes.toString(Bytes.readByteArray(in));
301    }
302    length = WritableUtils.readVLong(in);
303    if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) {
304      encodedRegionName = Bytes.toString(Bytes.readByteArray(in));
305    }
306  }
307
308  /**
309   * Writes the field values to the output.
310   *
311   * @param out  The output to write to.
312   * @throws IOException When writing the values to the output fails.
313   */
314  @Override
315  public void write(DataOutput out) throws IOException {
316    WritableUtils.writeVInt(out, VERSION.code);
317    Bytes.writeByteArray(out, tableName.getName());
318    Bytes.writeByteArray(out, startRow);
319    Bytes.writeByteArray(out, endRow);
320    Bytes.writeByteArray(out, Bytes.toBytes(regionLocation));
321    Bytes.writeByteArray(out, Bytes.toBytes(scan));
322    WritableUtils.writeVLong(out, length);
323    Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName));
324  }
325
326  /**
327   * Returns the details about this instance as a string.
328   *
329   * @return The values of this instance as a string.
330   * @see java.lang.Object#toString()
331   */
332  @Override
333  public String toString() {
334    StringBuilder sb = new StringBuilder();
335    sb.append("HBase table split(");
336    sb.append("table name: ").append(tableName);
337    // null scan input is represented by ""
338    String printScan = "";
339    if (!scan.equals("")) {
340      try {
341        // get the real scan here in toString, not the Base64 string
342        printScan = TableMapReduceUtil.convertStringToScan(scan).toString();
343      }
344      catch (IOException e) {
345        printScan = "";
346      }
347    }
348    sb.append(", scan: ").append(printScan);
349    sb.append(", start row: ").append(Bytes.toStringBinary(startRow));
350    sb.append(", end row: ").append(Bytes.toStringBinary(endRow));
351    sb.append(", region location: ").append(regionLocation);
352    sb.append(", encoded region name: ").append(encodedRegionName);
353    sb.append(")");
354    return sb.toString();
355  }
356
357  /**
358   * Compares this split against the given one.
359   *
360   * @param split  The split to compare to.
361   * @return The result of the comparison.
362   * @see java.lang.Comparable#compareTo(java.lang.Object)
363   */
364  @Override
365  public int compareTo(TableSplit split) {
366    // If The table name of the two splits is the same then compare start row
367    // otherwise compare based on table names
368    int tableNameComparison =
369        getTable().compareTo(split.getTable());
370    return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo(
371        getStartRow(), split.getStartRow());
372  }
373
374  @Override
375  public boolean equals(Object o) {
376    if (o == null || !(o instanceof TableSplit)) {
377      return false;
378    }
379    return tableName.equals(((TableSplit)o).tableName) &&
380      Bytes.equals(startRow, ((TableSplit)o).startRow) &&
381      Bytes.equals(endRow, ((TableSplit)o).endRow) &&
382      regionLocation.equals(((TableSplit)o).regionLocation);
383  }
384
385  @Override
386  public int hashCode() {
387    int result = tableName != null ? tableName.hashCode() : 0;
388    result = 31 * result + (scan != null ? scan.hashCode() : 0);
389    result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0);
390    result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0);
391    result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0);
392    result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0);
393    return result;
394  }
395}