001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import java.io.DataInput; 021import java.io.DataOutput; 022import java.io.IOException; 023import java.util.Arrays; 024import org.apache.hadoop.hbase.HConstants; 025import org.apache.hadoop.hbase.TableName; 026import org.apache.hadoop.hbase.client.Scan; 027import org.apache.hadoop.hbase.util.Bytes; 028import org.apache.hadoop.io.Writable; 029import org.apache.hadoop.io.WritableUtils; 030import org.apache.hadoop.mapreduce.InputSplit; 031import org.apache.yetus.audience.InterfaceAudience; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035/** 036 * A table split corresponds to a key range (low, high) and an optional scanner. All references to 037 * row below refer to the key of the row. 038 */ 039@InterfaceAudience.Public 040public class TableSplit extends InputSplit implements Writable, Comparable<TableSplit> { 041 /** @deprecated LOG variable would be made private. fix in hbase 3.0 */ 042 @Deprecated 043 public static final Logger LOG = LoggerFactory.getLogger(TableSplit.class); 044 045 // should be < 0 (@see #readFields(DataInput)) 046 // version 1 supports Scan data member 047 enum Version { 048 UNVERSIONED(0), 049 // Initial number we put on TableSplit when we introduced versioning. 050 INITIAL(-1), 051 // Added an encoded region name field for easier identification of split -> region 052 WITH_ENCODED_REGION_NAME(-2); 053 054 final int code; 055 static final Version[] byCode; 056 static { 057 byCode = Version.values(); 058 for (int i = 0; i < byCode.length; i++) { 059 if (byCode[i].code != -1 * i) { 060 throw new AssertionError("Values in this enum should be descending by one"); 061 } 062 } 063 } 064 065 Version(int code) { 066 this.code = code; 067 } 068 069 boolean atLeast(Version other) { 070 return code <= other.code; 071 } 072 073 static Version fromCode(int code) { 074 return byCode[code * -1]; 075 } 076 } 077 078 private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME; 079 private TableName tableName; 080 private byte[] startRow; 081 private byte[] endRow; 082 private String regionLocation; 083 private String encodedRegionName = ""; 084 085 /** 086 * The scan object may be null but the serialized form of scan is never null or empty since we 087 * serialize the scan object with default values then. Having no scanner in TableSplit doesn't 088 * necessarily mean there is no scanner for mapreduce job, it just means that we do not need to 089 * set it for each split. For example, it is not required to have a scan object for 090 * {@link org.apache.hadoop.hbase.mapred.TableInputFormatBase} since we use the scan from the job 091 * conf and scanner is supposed to be same for all the splits of table. 092 */ 093 private String scan = ""; // stores the serialized form of the Scan 094 private long length; // Contains estimation of region size in bytes 095 096 /** Default constructor. */ 097 public TableSplit() { 098 this((TableName) null, null, HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, ""); 099 } 100 101 /** 102 * Creates a new instance while assigning all variables. Length of region is set to 0 Encoded name 103 * of the region is set to blank 104 * @param tableName The name of the current table. 105 * @param scan The scan associated with this split. 106 * @param startRow The start row of the split. 107 * @param endRow The end row of the split. 108 * @param location The location of the region. 109 */ 110 public TableSplit(TableName tableName, Scan scan, byte[] startRow, byte[] endRow, 111 final String location) { 112 this(tableName, scan, startRow, endRow, location, 0L); 113 } 114 115 /** 116 * Creates a new instance while assigning all variables. Encoded name of region is set to blank 117 * @param tableName The name of the current table. 118 * @param scan The scan associated with this split. 119 * @param startRow The start row of the split. 120 * @param endRow The end row of the split. 121 * @param location The location of the region. 122 */ 123 public TableSplit(TableName tableName, Scan scan, byte[] startRow, byte[] endRow, 124 final String location, long length) { 125 this(tableName, scan, startRow, endRow, location, "", length); 126 } 127 128 /** 129 * Creates a new instance while assigning all variables. 130 * @param tableName The name of the current table. 131 * @param scan The scan associated with this split. 132 * @param startRow The start row of the split. 133 * @param endRow The end row of the split. 134 * @param encodedRegionName The region ID. 135 * @param location The location of the region. 136 */ 137 public TableSplit(TableName tableName, Scan scan, byte[] startRow, byte[] endRow, 138 final String location, final String encodedRegionName, long length) { 139 this.tableName = tableName; 140 try { 141 this.scan = (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan); 142 } catch (IOException e) { 143 LOG.warn("Failed to convert Scan to String", e); 144 } 145 this.startRow = startRow; 146 this.endRow = endRow; 147 this.regionLocation = location; 148 this.encodedRegionName = encodedRegionName; 149 this.length = length; 150 } 151 152 /** 153 * Creates a new instance without a scanner. Length of region is set to 0 154 * @param tableName The name of the current table. 155 * @param startRow The start row of the split. 156 * @param endRow The end row of the split. 157 * @param location The location of the region. 158 */ 159 public TableSplit(TableName tableName, byte[] startRow, byte[] endRow, final String location) { 160 this(tableName, null, startRow, endRow, location); 161 } 162 163 /** 164 * Creates a new instance without a scanner. 165 * @param tableName The name of the current table. 166 * @param startRow The start row of the split. 167 * @param endRow The end row of the split. 168 * @param location The location of the region. 169 * @param length Size of region in bytes 170 */ 171 public TableSplit(TableName tableName, byte[] startRow, byte[] endRow, final String location, 172 long length) { 173 this(tableName, null, startRow, endRow, location, length); 174 } 175 176 /** 177 * Returns a Scan object from the stored string representation. 178 * @return Returns a Scan object based on the stored scanner. 179 * @throws IOException throws IOException if deserialization fails 180 */ 181 public Scan getScan() throws IOException { 182 return TableMapReduceUtil.convertStringToScan(this.scan); 183 } 184 185 /** 186 * Returns a scan string 187 * @return scan as string. Should be noted that this is not same as getScan().toString() because 188 * Scan object will have the default values when empty scan string is deserialized. Thus, 189 * getScan().toString() can never be empty 190 */ 191 @InterfaceAudience.Private 192 public String getScanAsString() { 193 return this.scan; 194 } 195 196 /** 197 * Returns the table name converted to a byte array. 198 * @see #getTable() 199 * @return The table name. 200 */ 201 public byte[] getTableName() { 202 return tableName.getName(); 203 } 204 205 /** 206 * Returns the table name. 207 * @return The table name. 208 */ 209 public TableName getTable() { 210 // It is ugly that usually to get a TableName, the method is called getTableName. We can't do 211 // that in here though because there was an existing getTableName in place already since 212 // deprecated. 213 return tableName; 214 } 215 216 /** 217 * Returns the start row. 218 * @return The start row. 219 */ 220 public byte[] getStartRow() { 221 return startRow; 222 } 223 224 /** 225 * Returns the end row. 226 * @return The end row. 227 */ 228 public byte[] getEndRow() { 229 return endRow; 230 } 231 232 /** 233 * Returns the region location. 234 * @return The region's location. 235 */ 236 public String getRegionLocation() { 237 return regionLocation; 238 } 239 240 /** 241 * Returns the region's location as an array. 242 * @return The array containing the region location. 243 * @see org.apache.hadoop.mapreduce.InputSplit#getLocations() 244 */ 245 @Override 246 public String[] getLocations() { 247 return new String[] { regionLocation }; 248 } 249 250 /** 251 * Returns the region's encoded name. 252 * @return The region's encoded name. 253 */ 254 public String getEncodedRegionName() { 255 return encodedRegionName; 256 } 257 258 /** 259 * Returns the length of the split. 260 * @return The length of the split. 261 * @see org.apache.hadoop.mapreduce.InputSplit#getLength() 262 */ 263 @Override 264 public long getLength() { 265 return length; 266 } 267 268 /** 269 * Reads the values of each field. 270 * @param in The input to read from. 271 * @throws IOException When reading the input fails. 272 */ 273 @Override 274 public void readFields(DataInput in) throws IOException { 275 Version version = Version.UNVERSIONED; 276 // TableSplit was not versioned in the beginning. 277 // In order to introduce it now, we make use of the fact 278 // that tableName was written with Bytes.writeByteArray, 279 // which encodes the array length as a vint which is >= 0. 280 // Hence if the vint is >= 0 we have an old version and the vint 281 // encodes the length of tableName. 282 // If < 0 we just read the version and the next vint is the length. 283 // @see Bytes#readByteArray(DataInput) 284 int len = WritableUtils.readVInt(in); 285 if (len < 0) { 286 // what we just read was the version 287 version = Version.fromCode(len); 288 len = WritableUtils.readVInt(in); 289 } 290 byte[] tableNameBytes = new byte[len]; 291 in.readFully(tableNameBytes); 292 tableName = TableName.valueOf(tableNameBytes); 293 startRow = Bytes.readByteArray(in); 294 endRow = Bytes.readByteArray(in); 295 regionLocation = Bytes.toString(Bytes.readByteArray(in)); 296 if (version.atLeast(Version.INITIAL)) { 297 scan = Bytes.toString(Bytes.readByteArray(in)); 298 } 299 length = WritableUtils.readVLong(in); 300 if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) { 301 encodedRegionName = Bytes.toString(Bytes.readByteArray(in)); 302 } 303 } 304 305 /** 306 * Writes the field values to the output. 307 * @param out The output to write to. 308 * @throws IOException When writing the values to the output fails. 309 */ 310 @Override 311 public void write(DataOutput out) throws IOException { 312 WritableUtils.writeVInt(out, VERSION.code); 313 Bytes.writeByteArray(out, tableName.getName()); 314 Bytes.writeByteArray(out, startRow); 315 Bytes.writeByteArray(out, endRow); 316 Bytes.writeByteArray(out, Bytes.toBytes(regionLocation)); 317 Bytes.writeByteArray(out, Bytes.toBytes(scan)); 318 WritableUtils.writeVLong(out, length); 319 Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName)); 320 } 321 322 /** 323 * Returns the details about this instance as a string. 324 * @return The values of this instance as a string. 325 * @see java.lang.Object#toString() 326 */ 327 @Override 328 public String toString() { 329 StringBuilder sb = new StringBuilder(); 330 sb.append("Split("); 331 sb.append("tablename=").append(tableName); 332 // null scan input is represented by "" 333 String printScan = ""; 334 if (!scan.equals("")) { 335 try { 336 // get the real scan here in toString, not the Base64 string 337 printScan = TableMapReduceUtil.convertStringToScan(scan).toString(); 338 } catch (IOException e) { 339 printScan = ""; 340 } 341 sb.append(", scan=").append(printScan); 342 } 343 sb.append(", startrow=").append(Bytes.toStringBinary(startRow)); 344 sb.append(", endrow=").append(Bytes.toStringBinary(endRow)); 345 sb.append(", regionLocation=").append(regionLocation); 346 sb.append(", regionname=").append(encodedRegionName); 347 sb.append(")"); 348 return sb.toString(); 349 } 350 351 /** 352 * Compares this split against the given one. 353 * @param split The split to compare to. 354 * @return The result of the comparison. 355 * @see java.lang.Comparable#compareTo(java.lang.Object) 356 */ 357 @Override 358 public int compareTo(TableSplit split) { 359 // If The table name of the two splits is the same then compare start row 360 // otherwise compare based on table names 361 int tableNameComparison = getTable().compareTo(split.getTable()); 362 return tableNameComparison != 0 363 ? tableNameComparison 364 : Bytes.compareTo(getStartRow(), split.getStartRow()); 365 } 366 367 @Override 368 public boolean equals(Object o) { 369 if (o == null || !(o instanceof TableSplit)) { 370 return false; 371 } 372 return tableName.equals(((TableSplit) o).tableName) 373 && Bytes.equals(startRow, ((TableSplit) o).startRow) 374 && Bytes.equals(endRow, ((TableSplit) o).endRow) 375 && regionLocation.equals(((TableSplit) o).regionLocation); 376 } 377 378 @Override 379 public int hashCode() { 380 int result = tableName != null ? tableName.hashCode() : 0; 381 result = 31 * result + (scan != null ? scan.hashCode() : 0); 382 result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0); 383 result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0); 384 result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0); 385 result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0); 386 return result; 387 } 388}