001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.mapreduce; 020 021import java.io.DataInput; 022import java.io.DataOutput; 023import java.io.IOException; 024import java.util.Arrays; 025 026import org.apache.yetus.audience.InterfaceAudience; 027import org.slf4j.Logger; 028import org.slf4j.LoggerFactory; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.HConstants; 031import org.apache.hadoop.hbase.client.Scan; 032import org.apache.hadoop.hbase.util.Bytes; 033import org.apache.hadoop.io.Writable; 034import org.apache.hadoop.io.WritableUtils; 035import org.apache.hadoop.mapreduce.InputSplit; 036 037/** 038 * A table split corresponds to a key range (low, high) and an optional scanner. 039 * All references to row below refer to the key of the row. 040 */ 041@InterfaceAudience.Public 042public class TableSplit extends InputSplit 043implements Writable, Comparable<TableSplit> { 044 /** @deprecated LOG variable would be made private. fix in hbase 3.0 */ 045 @Deprecated 046 public static final Logger LOG = LoggerFactory.getLogger(TableSplit.class); 047 048 // should be < 0 (@see #readFields(DataInput)) 049 // version 1 supports Scan data member 050 enum Version { 051 UNVERSIONED(0), 052 // Initial number we put on TableSplit when we introduced versioning. 053 INITIAL(-1), 054 // Added an encoded region name field for easier identification of split -> region 055 WITH_ENCODED_REGION_NAME(-2); 056 057 final int code; 058 static final Version[] byCode; 059 static { 060 byCode = Version.values(); 061 for (int i = 0; i < byCode.length; i++) { 062 if (byCode[i].code != -1 * i) { 063 throw new AssertionError("Values in this enum should be descending by one"); 064 } 065 } 066 } 067 068 Version(int code) { 069 this.code = code; 070 } 071 072 boolean atLeast(Version other) { 073 return code <= other.code; 074 } 075 076 static Version fromCode(int code) { 077 return byCode[code * -1]; 078 } 079 } 080 081 private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME; 082 private TableName tableName; 083 private byte [] startRow; 084 private byte [] endRow; 085 private String regionLocation; 086 private String encodedRegionName = ""; 087 private String scan = ""; // stores the serialized form of the Scan 088 private long length; // Contains estimation of region size in bytes 089 090 /** Default constructor. */ 091 public TableSplit() { 092 this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY, 093 HConstants.EMPTY_BYTE_ARRAY, ""); 094 } 095 096 /** 097 * Creates a new instance while assigning all variables. 098 * Length of region is set to 0 099 * Encoded name of the region is set to blank 100 * 101 * @param tableName The name of the current table. 102 * @param scan The scan associated with this split. 103 * @param startRow The start row of the split. 104 * @param endRow The end row of the split. 105 * @param location The location of the region. 106 */ 107 public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow, 108 final String location) { 109 this(tableName, scan, startRow, endRow, location, 0L); 110 } 111 112 /** 113 * Creates a new instance while assigning all variables. 114 * Encoded name of region is set to blank 115 * 116 * @param tableName The name of the current table. 117 * @param scan The scan associated with this split. 118 * @param startRow The start row of the split. 119 * @param endRow The end row of the split. 120 * @param location The location of the region. 121 */ 122 public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow, 123 final String location, long length) { 124 this(tableName, scan, startRow, endRow, location, "", length); 125 } 126 127 /** 128 * Creates a new instance while assigning all variables. 129 * 130 * @param tableName The name of the current table. 131 * @param scan The scan associated with this split. 132 * @param startRow The start row of the split. 133 * @param endRow The end row of the split. 134 * @param encodedRegionName The region ID. 135 * @param location The location of the region. 136 */ 137 public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow, 138 final String location, final String encodedRegionName, long length) { 139 this.tableName = tableName; 140 try { 141 this.scan = 142 (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan); 143 } catch (IOException e) { 144 LOG.warn("Failed to convert Scan to String", e); 145 } 146 this.startRow = startRow; 147 this.endRow = endRow; 148 this.regionLocation = location; 149 this.encodedRegionName = encodedRegionName; 150 this.length = length; 151 } 152 153 /** 154 * Creates a new instance without a scanner. 155 * Length of region is set to 0 156 * 157 * @param tableName The name of the current table. 158 * @param startRow The start row of the split. 159 * @param endRow The end row of the split. 160 * @param location The location of the region. 161 */ 162 public TableSplit(TableName tableName, byte[] startRow, byte[] endRow, 163 final String location) { 164 this(tableName, null, startRow, endRow, location); 165 } 166 167 /** 168 * Creates a new instance without a scanner. 169 * 170 * @param tableName The name of the current table. 171 * @param startRow The start row of the split. 172 * @param endRow The end row of the split. 173 * @param location The location of the region. 174 * @param length Size of region in bytes 175 */ 176 public TableSplit(TableName tableName, byte[] startRow, byte[] endRow, 177 final String location, long length) { 178 this(tableName, null, startRow, endRow, location, length); 179 } 180 181 /** 182 * Returns a Scan object from the stored string representation. 183 * 184 * @return Returns a Scan object based on the stored scanner. 185 * @throws IOException 186 */ 187 public Scan getScan() throws IOException { 188 return TableMapReduceUtil.convertStringToScan(this.scan); 189 } 190 191 /** 192 * Returns the table name converted to a byte array. 193 * @see #getTable() 194 * @return The table name. 195 */ 196 public byte [] getTableName() { 197 return tableName.getName(); 198 } 199 200 /** 201 * Returns the table name. 202 * 203 * @return The table name. 204 */ 205 public TableName getTable() { 206 // It is ugly that usually to get a TableName, the method is called getTableName. We can't do 207 // that in here though because there was an existing getTableName in place already since 208 // deprecated. 209 return tableName; 210 } 211 212 /** 213 * Returns the start row. 214 * 215 * @return The start row. 216 */ 217 public byte [] getStartRow() { 218 return startRow; 219 } 220 221 /** 222 * Returns the end row. 223 * 224 * @return The end row. 225 */ 226 public byte [] getEndRow() { 227 return endRow; 228 } 229 230 /** 231 * Returns the region location. 232 * 233 * @return The region's location. 234 */ 235 public String getRegionLocation() { 236 return regionLocation; 237 } 238 239 /** 240 * Returns the region's location as an array. 241 * 242 * @return The array containing the region location. 243 * @see org.apache.hadoop.mapreduce.InputSplit#getLocations() 244 */ 245 @Override 246 public String[] getLocations() { 247 return new String[] {regionLocation}; 248 } 249 250 /** 251 * Returns the region's encoded name. 252 * 253 * @return The region's encoded name. 254 */ 255 public String getEncodedRegionName() { 256 return encodedRegionName; 257 } 258 259 /** 260 * Returns the length of the split. 261 * 262 * @return The length of the split. 263 * @see org.apache.hadoop.mapreduce.InputSplit#getLength() 264 */ 265 @Override 266 public long getLength() { 267 return length; 268 } 269 270 /** 271 * Reads the values of each field. 272 * 273 * @param in The input to read from. 274 * @throws IOException When reading the input fails. 275 */ 276 @Override 277 public void readFields(DataInput in) throws IOException { 278 Version version = Version.UNVERSIONED; 279 // TableSplit was not versioned in the beginning. 280 // In order to introduce it now, we make use of the fact 281 // that tableName was written with Bytes.writeByteArray, 282 // which encodes the array length as a vint which is >= 0. 283 // Hence if the vint is >= 0 we have an old version and the vint 284 // encodes the length of tableName. 285 // If < 0 we just read the version and the next vint is the length. 286 // @see Bytes#readByteArray(DataInput) 287 int len = WritableUtils.readVInt(in); 288 if (len < 0) { 289 // what we just read was the version 290 version = Version.fromCode(len); 291 len = WritableUtils.readVInt(in); 292 } 293 byte[] tableNameBytes = new byte[len]; 294 in.readFully(tableNameBytes); 295 tableName = TableName.valueOf(tableNameBytes); 296 startRow = Bytes.readByteArray(in); 297 endRow = Bytes.readByteArray(in); 298 regionLocation = Bytes.toString(Bytes.readByteArray(in)); 299 if (version.atLeast(Version.INITIAL)) { 300 scan = Bytes.toString(Bytes.readByteArray(in)); 301 } 302 length = WritableUtils.readVLong(in); 303 if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) { 304 encodedRegionName = Bytes.toString(Bytes.readByteArray(in)); 305 } 306 } 307 308 /** 309 * Writes the field values to the output. 310 * 311 * @param out The output to write to. 312 * @throws IOException When writing the values to the output fails. 313 */ 314 @Override 315 public void write(DataOutput out) throws IOException { 316 WritableUtils.writeVInt(out, VERSION.code); 317 Bytes.writeByteArray(out, tableName.getName()); 318 Bytes.writeByteArray(out, startRow); 319 Bytes.writeByteArray(out, endRow); 320 Bytes.writeByteArray(out, Bytes.toBytes(regionLocation)); 321 Bytes.writeByteArray(out, Bytes.toBytes(scan)); 322 WritableUtils.writeVLong(out, length); 323 Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName)); 324 } 325 326 /** 327 * Returns the details about this instance as a string. 328 * 329 * @return The values of this instance as a string. 330 * @see java.lang.Object#toString() 331 */ 332 @Override 333 public String toString() { 334 StringBuilder sb = new StringBuilder(); 335 sb.append("HBase table split("); 336 sb.append("table name: ").append(tableName); 337 // null scan input is represented by "" 338 String printScan = ""; 339 if (!scan.equals("")) { 340 try { 341 // get the real scan here in toString, not the Base64 string 342 printScan = TableMapReduceUtil.convertStringToScan(scan).toString(); 343 } 344 catch (IOException e) { 345 printScan = ""; 346 } 347 } 348 sb.append(", scan: ").append(printScan); 349 sb.append(", start row: ").append(Bytes.toStringBinary(startRow)); 350 sb.append(", end row: ").append(Bytes.toStringBinary(endRow)); 351 sb.append(", region location: ").append(regionLocation); 352 sb.append(", encoded region name: ").append(encodedRegionName); 353 sb.append(")"); 354 return sb.toString(); 355 } 356 357 /** 358 * Compares this split against the given one. 359 * 360 * @param split The split to compare to. 361 * @return The result of the comparison. 362 * @see java.lang.Comparable#compareTo(java.lang.Object) 363 */ 364 @Override 365 public int compareTo(TableSplit split) { 366 // If The table name of the two splits is the same then compare start row 367 // otherwise compare based on table names 368 int tableNameComparison = 369 getTable().compareTo(split.getTable()); 370 return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo( 371 getStartRow(), split.getStartRow()); 372 } 373 374 @Override 375 public boolean equals(Object o) { 376 if (o == null || !(o instanceof TableSplit)) { 377 return false; 378 } 379 return tableName.equals(((TableSplit)o).tableName) && 380 Bytes.equals(startRow, ((TableSplit)o).startRow) && 381 Bytes.equals(endRow, ((TableSplit)o).endRow) && 382 regionLocation.equals(((TableSplit)o).regionLocation); 383 } 384 385 @Override 386 public int hashCode() { 387 int result = tableName != null ? tableName.hashCode() : 0; 388 result = 31 * result + (scan != null ? scan.hashCode() : 0); 389 result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0); 390 result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0); 391 result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0); 392 result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0); 393 return result; 394 } 395}