001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import edu.umd.cs.findbugs.annotations.NonNull; 021import edu.umd.cs.findbugs.annotations.Nullable; 022import java.io.ByteArrayOutputStream; 023import java.io.Closeable; 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.Collections; 029import java.util.Iterator; 030import java.util.LinkedHashMap; 031import java.util.List; 032import java.util.Map; 033import java.util.NavigableMap; 034import java.util.SortedMap; 035import java.util.TreeMap; 036import java.util.concurrent.ThreadLocalRandom; 037import java.util.regex.Matcher; 038import java.util.regex.Pattern; 039import java.util.stream.Collectors; 040import org.apache.hadoop.conf.Configuration; 041import org.apache.hadoop.hbase.client.Connection; 042import org.apache.hadoop.hbase.client.Consistency; 043import org.apache.hadoop.hbase.client.Delete; 044import org.apache.hadoop.hbase.client.Get; 045import org.apache.hadoop.hbase.client.Mutation; 046import org.apache.hadoop.hbase.client.Put; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.RegionReplicaUtil; 050import org.apache.hadoop.hbase.client.Result; 051import org.apache.hadoop.hbase.client.ResultScanner; 052import org.apache.hadoop.hbase.client.Scan; 053import org.apache.hadoop.hbase.client.Table; 054import org.apache.hadoop.hbase.client.TableState; 055import org.apache.hadoop.hbase.client.coprocessor.Batch; 056import org.apache.hadoop.hbase.exceptions.DeserializationException; 057import org.apache.hadoop.hbase.filter.Filter; 058import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; 059import org.apache.hadoop.hbase.filter.RowFilter; 060import org.apache.hadoop.hbase.filter.SubstringComparator; 061import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils; 062import org.apache.hadoop.hbase.ipc.ServerRpcController; 063import org.apache.hadoop.hbase.master.RegionState; 064import org.apache.hadoop.hbase.master.RegionState.State; 065import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 066import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; 067import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MultiRowMutationService; 068import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest; 069import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse; 070import org.apache.hadoop.hbase.util.Bytes; 071import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 072import org.apache.hadoop.hbase.util.ExceptionUtil; 073import org.apache.hadoop.hbase.util.Pair; 074import org.apache.hadoop.hbase.util.PairOfSameType; 075import org.apache.yetus.audience.InterfaceAudience; 076import org.slf4j.Logger; 077import org.slf4j.LoggerFactory; 078 079import org.apache.hbase.thirdparty.com.google.common.base.Throwables; 080 081/** 082 * <p> 083 * Read/write operations on <code>hbase:meta</code> region as well as assignment information stored 084 * to <code>hbase:meta</code>. 085 * </p> 086 * <p> 087 * Some of the methods of this class take ZooKeeperWatcher as a param. The only reason for this is 088 * when this class is used on client-side (e.g. HBaseAdmin), we want to use short-lived connection 089 * (opened before each operation, closed right after), while when used on HM or HRS (like in 090 * AssignmentManager) we want permanent connection. 091 * </p> 092 * <p> 093 * HBASE-10070 adds a replicaId to HRI, meaning more than one HRI can be defined for the same table 094 * range (table, startKey, endKey). For every range, there will be at least one HRI defined which is 095 * called default replica. 096 * </p> 097 * <p> 098 * <h2>Meta layout</h2> 099 * 100 * <pre> 101 * For each table there is single row named for the table with a 'table' column family. 102 * The column family currently has one column in it, the 'state' column: 103 * 104 * table:state => contains table state 105 * 106 * Then for each table range ('Region'), there is a single row, formatted as: 107 * <tableName>,<startKey>,<regionId>,<encodedRegionName>. 108 * This row is the serialized regionName of the default region replica. 109 * Columns are: 110 * info:regioninfo => contains serialized HRI for the default region replica 111 * info:server => contains hostname:port (in string form) for the server hosting 112 * the default regionInfo replica 113 * info:server_<replicaId> => contains hostname:port (in string form) for the server hosting 114 * the regionInfo replica with replicaId 115 * info:serverstartcode => contains server start code (in binary long form) for the server 116 * hosting the default regionInfo replica 117 * info:serverstartcode_<replicaId> => contains server start code (in binary long form) for 118 * the server hosting the regionInfo replica with 119 * replicaId 120 * info:seqnumDuringOpen => contains seqNum (in binary long form) for the region at the time 121 * the server opened the region with default replicaId 122 * info:seqnumDuringOpen_<replicaId> => contains seqNum (in binary long form) for the region 123 * at the time the server opened the region with 124 * replicaId 125 * info:splitA => contains a serialized HRI for the first daughter region if the 126 * region is split 127 * info:splitB => contains a serialized HRI for the second daughter region if the 128 * region is split 129 * info:merge* => contains a serialized HRI for a merge parent region. There will be two 130 * or more of these columns in a row. A row that has these columns is 131 * undergoing a merge and is the result of the merge. Columns listed 132 * in marge* columns are the parents of this merged region. Example 133 * columns: info:merge0001, info:merge0002. You make also see 'mergeA', 134 * and 'mergeB'. This is old form replaced by the new format that allows 135 * for more than two parents to be merged at a time. 136 * TODO: Add rep_barrier for serial replication explaination. See SerialReplicationChecker. 137 * </pre> 138 * </p> 139 * <p> 140 * The actual layout of meta should be encapsulated inside MetaTableAccessor methods, and should not 141 * leak out of it (through Result objects, etc) 142 * </p> 143 */ 144@InterfaceAudience.Private 145public class MetaTableAccessor { 146 147 private static final Logger LOG = LoggerFactory.getLogger(MetaTableAccessor.class); 148 private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META"); 149 150 public static final byte[] REPLICATION_PARENT_QUALIFIER = Bytes.toBytes("parent"); 151 152 private static final byte ESCAPE_BYTE = (byte) 0xFF; 153 154 private static final byte SEPARATED_BYTE = 0x00; 155 156 @InterfaceAudience.Private 157 @SuppressWarnings("ImmutableEnumChecker") 158 public enum QueryType { 159 ALL(HConstants.TABLE_FAMILY, HConstants.CATALOG_FAMILY), 160 REGION(HConstants.CATALOG_FAMILY), 161 TABLE(HConstants.TABLE_FAMILY), 162 REPLICATION(HConstants.REPLICATION_BARRIER_FAMILY); 163 164 private final byte[][] families; 165 166 QueryType(byte[]... families) { 167 this.families = families; 168 } 169 170 byte[][] getFamilies() { 171 return this.families; 172 } 173 } 174 175 /** The delimiter for meta columns for replicaIds > 0 */ 176 static final char META_REPLICA_ID_DELIMITER = '_'; 177 178 /** A regex for parsing server columns from meta. See above javadoc for meta layout */ 179 private static final Pattern SERVER_COLUMN_PATTERN = 180 Pattern.compile("^server(_[0-9a-fA-F]{4})?$"); 181 182 //////////////////////// 183 // Reading operations // 184 //////////////////////// 185 186 /** 187 * Performs a full scan of <code>hbase:meta</code> for regions. 188 * @param connection connection we're using 189 * @param visitor Visitor invoked against each row in regions family. 190 */ 191 public static void fullScanRegions(Connection connection, final Visitor visitor) 192 throws IOException { 193 scanMeta(connection, null, null, QueryType.REGION, visitor); 194 } 195 196 /** 197 * Performs a full scan of <code>hbase:meta</code> for regions. 198 * @param connection connection we're using 199 */ 200 public static List<Result> fullScanRegions(Connection connection) throws IOException { 201 return fullScan(connection, QueryType.REGION); 202 } 203 204 /** 205 * Performs a full scan of <code>hbase:meta</code> for tables. 206 * @param connection connection we're using 207 * @param visitor Visitor invoked against each row in tables family. 208 */ 209 public static void fullScanTables(Connection connection, final Visitor visitor) 210 throws IOException { 211 scanMeta(connection, null, null, QueryType.TABLE, visitor); 212 } 213 214 /** 215 * Performs a full scan of <code>hbase:meta</code>. 216 * @param connection connection we're using 217 * @param type scanned part of meta 218 * @return List of {@link Result} 219 */ 220 private static List<Result> fullScan(Connection connection, QueryType type) throws IOException { 221 CollectAllVisitor v = new CollectAllVisitor(); 222 scanMeta(connection, null, null, type, v); 223 return v.getResults(); 224 } 225 226 /** 227 * Callers should call close on the returned {@link Table} instance. 228 * @param connection connection we're using to access Meta 229 * @return An {@link Table} for <code>hbase:meta</code> 230 */ 231 public static Table getMetaHTable(final Connection connection) throws IOException { 232 // We used to pass whole CatalogTracker in here, now we just pass in Connection 233 if (connection == null) { 234 throw new NullPointerException("No connection"); 235 } else if (connection.isClosed()) { 236 throw new IOException("connection is closed"); 237 } 238 return connection.getTable(TableName.META_TABLE_NAME); 239 } 240 241 /** 242 * @param t Table to use (will be closed when done). 243 * @param g Get to run 244 */ 245 private static Result get(final Table t, final Get g) throws IOException { 246 if (t == null) return null; 247 try { 248 return t.get(g); 249 } finally { 250 t.close(); 251 } 252 } 253 254 /** 255 * Gets the region info and assignment for the specified region. 256 * @param connection connection we're using 257 * @param regionName Region to lookup. 258 * @return Location and RegionInfo for <code>regionName</code> 259 * @deprecated use {@link #getRegionLocation(Connection, byte[])} instead 260 */ 261 @Deprecated 262 public static Pair<RegionInfo, ServerName> getRegion(Connection connection, byte[] regionName) 263 throws IOException { 264 HRegionLocation location = getRegionLocation(connection, regionName); 265 return location == null ? null : new Pair<>(location.getRegionInfo(), location.getServerName()); 266 } 267 268 /** 269 * Returns the HRegionLocation from meta for the given region 270 * @param connection connection we're using 271 * @param regionName region we're looking for 272 * @return HRegionLocation for the given region 273 */ 274 public static HRegionLocation getRegionLocation(Connection connection, byte[] regionName) 275 throws IOException { 276 byte[] row = regionName; 277 RegionInfo parsedInfo = null; 278 try { 279 parsedInfo = parseRegionInfoFromRegionName(regionName); 280 row = getMetaKeyForRegion(parsedInfo); 281 } catch (Exception parseEx) { 282 // If it is not a valid regionName(i.e, tableName), it needs to return null here 283 // as querying meta table wont help. 284 return null; 285 } 286 Get get = new Get(row); 287 get.addFamily(HConstants.CATALOG_FAMILY); 288 Result r = get(getMetaHTable(connection), get); 289 RegionLocations locations = getRegionLocations(r); 290 return locations == null 291 ? null 292 : locations.getRegionLocation(parsedInfo == null ? 0 : parsedInfo.getReplicaId()); 293 } 294 295 /** 296 * Returns the HRegionLocation from meta for the given region 297 * @param connection connection we're using 298 * @param regionInfo region information 299 * @return HRegionLocation for the given region 300 */ 301 public static HRegionLocation getRegionLocation(Connection connection, RegionInfo regionInfo) 302 throws IOException { 303 return getRegionLocation(getCatalogFamilyRow(connection, regionInfo), regionInfo, 304 regionInfo.getReplicaId()); 305 } 306 307 /** Returns Return the {@link HConstants#CATALOG_FAMILY} row from hbase:meta. */ 308 public static Result getCatalogFamilyRow(Connection connection, RegionInfo ri) 309 throws IOException { 310 Get get = new Get(getMetaKeyForRegion(ri)); 311 get.addFamily(HConstants.CATALOG_FAMILY); 312 return get(getMetaHTable(connection), get); 313 } 314 315 /** Returns the row key to use for this regionInfo */ 316 public static byte[] getMetaKeyForRegion(RegionInfo regionInfo) { 317 return RegionReplicaUtil.getRegionInfoForDefaultReplica(regionInfo).getRegionName(); 318 } 319 320 /** 321 * Returns an HRI parsed from this regionName. Not all the fields of the HRI is stored in the 322 * name, so the returned object should only be used for the fields in the regionName. 323 */ 324 // This should be moved to RegionInfo? TODO. 325 public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException { 326 byte[][] fields = RegionInfo.parseRegionName(regionName); 327 long regionId = Long.parseLong(Bytes.toString(fields[2])); 328 int replicaId = fields.length > 3 ? Integer.parseInt(Bytes.toString(fields[3]), 16) : 0; 329 return RegionInfoBuilder.newBuilder(TableName.valueOf(fields[0])).setStartKey(fields[1]) 330 .setRegionId(regionId).setReplicaId(replicaId).build(); 331 } 332 333 /** 334 * Gets the result in hbase:meta for the specified region. 335 * @param connection connection we're using 336 * @param regionName region we're looking for 337 * @return result of the specified region 338 */ 339 public static Result getRegionResult(Connection connection, byte[] regionName) 340 throws IOException { 341 Get get = new Get(regionName); 342 get.addFamily(HConstants.CATALOG_FAMILY); 343 return get(getMetaHTable(connection), get); 344 } 345 346 /** 347 * Scans META table for a row whose key contains the specified <B>regionEncodedName</B>, returning 348 * a single related <code>Result</code> instance if any row is found, null otherwise. 349 * @param connection the connection to query META table. 350 * @param regionEncodedName the region encoded name to look for at META. 351 * @return <code>Result</code> instance with the row related info in META, null otherwise. 352 * @throws IOException if any errors occur while querying META. 353 */ 354 public static Result scanByRegionEncodedName(Connection connection, String regionEncodedName) 355 throws IOException { 356 RowFilter rowFilter = 357 new RowFilter(CompareOperator.EQUAL, new SubstringComparator(regionEncodedName)); 358 Scan scan = getMetaScan(connection.getConfiguration(), 1); 359 scan.setFilter(rowFilter); 360 try (Table table = getMetaHTable(connection); 361 ResultScanner resultScanner = table.getScanner(scan)) { 362 return resultScanner.next(); 363 } 364 } 365 366 /** 367 * Returns Return all regioninfos listed in the 'info:merge*' columns of the 368 * <code>regionName</code> row. 369 */ 370 @Nullable 371 public static List<RegionInfo> getMergeRegions(Connection connection, byte[] regionName) 372 throws IOException { 373 return getMergeRegions(getRegionResult(connection, regionName).rawCells()); 374 } 375 376 /** 377 * Check whether the given {@code regionName} has any 'info:merge*' columns. 378 */ 379 public static boolean hasMergeRegions(Connection conn, byte[] regionName) throws IOException { 380 return hasMergeRegions(getRegionResult(conn, regionName).rawCells()); 381 } 382 383 /** 384 * Returns Deserialized values of <qualifier,regioninfo> pairs taken from column values that 385 * match the regex 'info:merge.*' in array of <code>cells</code>. 386 */ 387 @Nullable 388 public static Map<String, RegionInfo> getMergeRegionsWithName(Cell[] cells) { 389 if (cells == null) { 390 return null; 391 } 392 Map<String, RegionInfo> regionsToMerge = null; 393 for (Cell cell : cells) { 394 if (!isMergeQualifierPrefix(cell)) { 395 continue; 396 } 397 // Ok. This cell is that of a info:merge* column. 398 RegionInfo ri = RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(), 399 cell.getValueLength()); 400 if (ri != null) { 401 if (regionsToMerge == null) { 402 regionsToMerge = new LinkedHashMap<>(); 403 } 404 regionsToMerge.put(Bytes.toString(CellUtil.cloneQualifier(cell)), ri); 405 } 406 } 407 return regionsToMerge; 408 } 409 410 /** 411 * Returns Deserialized regioninfo values taken from column values that match the regex 412 * 'info:merge.*' in array of <code>cells</code>. 413 */ 414 @Nullable 415 public static List<RegionInfo> getMergeRegions(Cell[] cells) { 416 Map<String, RegionInfo> mergeRegionsWithName = getMergeRegionsWithName(cells); 417 return (mergeRegionsWithName == null) ? null : new ArrayList<>(mergeRegionsWithName.values()); 418 } 419 420 /** 421 * Returns True if any merge regions present in <code>cells</code>; i.e. the column in 422 * <code>cell</code> matches the regex 'info:merge.*'. 423 */ 424 public static boolean hasMergeRegions(Cell[] cells) { 425 for (Cell cell : cells) { 426 if (!isMergeQualifierPrefix(cell)) { 427 continue; 428 } 429 return true; 430 } 431 return false; 432 } 433 434 /** Returns True if the column in <code>cell</code> matches the regex 'info:merge.*'. */ 435 private static boolean isMergeQualifierPrefix(Cell cell) { 436 // Check to see if has family and that qualifier starts with the merge qualifier 'merge' 437 return CellUtil.matchingFamily(cell, HConstants.CATALOG_FAMILY) 438 && PrivateCellUtil.qualifierStartsWith(cell, HConstants.MERGE_QUALIFIER_PREFIX); 439 } 440 441 /** 442 * Lists all of the regions currently in META. 443 * @param connection to connect with 444 * @param excludeOfflinedSplitParents False if we are to include offlined/splitparents regions, 445 * true and we'll leave out offlined regions from returned list 446 * @return List of all user-space regions. 447 */ 448 public static List<RegionInfo> getAllRegions(Connection connection, 449 boolean excludeOfflinedSplitParents) throws IOException { 450 List<Pair<RegionInfo, ServerName>> result; 451 452 result = getTableRegionsAndLocations(connection, null, excludeOfflinedSplitParents); 453 454 return getListOfRegionInfos(result); 455 456 } 457 458 /** 459 * Gets all of the regions of the specified table. Do not use this method to get meta table 460 * regions, use methods in MetaTableLocator instead. 461 * @param connection connection we're using 462 * @param tableName table we're looking for 463 * @return Ordered list of {@link RegionInfo}. 464 */ 465 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName) 466 throws IOException { 467 return getTableRegions(connection, tableName, false); 468 } 469 470 /** 471 * Gets all of the regions of the specified table. Do not use this method to get meta table 472 * regions, use methods in MetaTableLocator instead. 473 * @param connection connection we're using 474 * @param tableName table we're looking for 475 * @param excludeOfflinedSplitParents If true, do not include offlined split parents in the 476 * return. 477 * @return Ordered list of {@link RegionInfo}. 478 */ 479 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName, 480 final boolean excludeOfflinedSplitParents) throws IOException { 481 List<Pair<RegionInfo, ServerName>> result = 482 getTableRegionsAndLocations(connection, tableName, excludeOfflinedSplitParents); 483 return getListOfRegionInfos(result); 484 } 485 486 @SuppressWarnings("MixedMutabilityReturnType") 487 private static List<RegionInfo> 488 getListOfRegionInfos(final List<Pair<RegionInfo, ServerName>> pairs) { 489 if (pairs == null || pairs.isEmpty()) { 490 return Collections.emptyList(); 491 } 492 List<RegionInfo> result = new ArrayList<>(pairs.size()); 493 for (Pair<RegionInfo, ServerName> pair : pairs) { 494 result.add(pair.getFirst()); 495 } 496 return result; 497 } 498 499 /** 500 * Returns start row for scanning META according to query type 501 */ 502 public static byte[] getTableStartRowForMeta(TableName tableName, QueryType type) { 503 if (tableName == null) { 504 return null; 505 } 506 switch (type) { 507 case REGION: 508 byte[] startRow = new byte[tableName.getName().length + 2]; 509 System.arraycopy(tableName.getName(), 0, startRow, 0, tableName.getName().length); 510 startRow[startRow.length - 2] = HConstants.DELIMITER; 511 startRow[startRow.length - 1] = HConstants.DELIMITER; 512 return startRow; 513 case ALL: 514 case TABLE: 515 default: 516 return tableName.getName(); 517 } 518 } 519 520 /** 521 * Returns stop row for scanning META according to query type 522 */ 523 public static byte[] getTableStopRowForMeta(TableName tableName, QueryType type) { 524 if (tableName == null) { 525 return null; 526 } 527 final byte[] stopRow; 528 switch (type) { 529 case REGION: 530 stopRow = new byte[tableName.getName().length + 3]; 531 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 532 stopRow[stopRow.length - 3] = ' '; 533 stopRow[stopRow.length - 2] = HConstants.DELIMITER; 534 stopRow[stopRow.length - 1] = HConstants.DELIMITER; 535 break; 536 case ALL: 537 case TABLE: 538 default: 539 stopRow = new byte[tableName.getName().length + 1]; 540 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 541 stopRow[stopRow.length - 1] = ' '; 542 break; 543 } 544 return stopRow; 545 } 546 547 /** 548 * This method creates a Scan object that will only scan catalog rows that belong to the specified 549 * table. It doesn't specify any columns. This is a better alternative to just using a start row 550 * and scan until it hits a new table since that requires parsing the HRI to get the table name. 551 * @param tableName bytes of table's name 552 * @return configured Scan object 553 */ 554 public static Scan getScanForTableName(Configuration conf, TableName tableName) { 555 // Start key is just the table name with delimiters 556 byte[] startKey = getTableStartRowForMeta(tableName, QueryType.REGION); 557 // Stop key appends the smallest possible char to the table name 558 byte[] stopKey = getTableStopRowForMeta(tableName, QueryType.REGION); 559 560 Scan scan = getMetaScan(conf, -1); 561 scan.setStartRow(startKey); 562 scan.setStopRow(stopKey); 563 return scan; 564 } 565 566 private static Scan getMetaScan(Configuration conf, int rowUpperLimit) { 567 Scan scan = new Scan(); 568 int scannerCaching = conf.getInt(HConstants.HBASE_META_SCANNER_CACHING, 569 HConstants.DEFAULT_HBASE_META_SCANNER_CACHING); 570 if (conf.getBoolean(HConstants.USE_META_REPLICAS, HConstants.DEFAULT_USE_META_REPLICAS)) { 571 scan.setConsistency(Consistency.TIMELINE); 572 } 573 if (rowUpperLimit > 0) { 574 scan.setLimit(rowUpperLimit); 575 scan.setReadType(Scan.ReadType.PREAD); 576 } 577 scan.setCaching(scannerCaching); 578 return scan; 579 } 580 581 /** 582 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 583 * @param connection connection we're using 584 * @param tableName table we're looking for 585 * @return Return list of regioninfos and server. 586 */ 587 public static List<Pair<RegionInfo, ServerName>> 588 getTableRegionsAndLocations(Connection connection, TableName tableName) throws IOException { 589 return getTableRegionsAndLocations(connection, tableName, true); 590 } 591 592 /** 593 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 594 * @param connection connection we're using 595 * @param tableName table to work with, can be null for getting all regions 596 * @param excludeOfflinedSplitParents don't return split parents 597 * @return Return list of regioninfos and server addresses. 598 */ 599 // What happens here when 1M regions in hbase:meta? This won't scale? 600 public static List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations( 601 Connection connection, @Nullable final TableName tableName, 602 final boolean excludeOfflinedSplitParents) throws IOException { 603 if (tableName != null && tableName.equals(TableName.META_TABLE_NAME)) { 604 throw new IOException( 605 "This method can't be used to locate meta regions;" + " use MetaTableLocator instead"); 606 } 607 // Make a version of CollectingVisitor that collects RegionInfo and ServerAddress 608 CollectingVisitor<Pair<RegionInfo, ServerName>> visitor = 609 new CollectingVisitor<Pair<RegionInfo, ServerName>>() { 610 private RegionLocations current = null; 611 612 @Override 613 public boolean visit(Result r) throws IOException { 614 current = getRegionLocations(r); 615 if (current == null || current.getRegionLocation().getRegion() == null) { 616 LOG.warn("No serialized RegionInfo in " + r); 617 return true; 618 } 619 RegionInfo hri = current.getRegionLocation().getRegion(); 620 if (excludeOfflinedSplitParents && hri.isSplitParent()) return true; 621 // Else call super and add this Result to the collection. 622 return super.visit(r); 623 } 624 625 @Override 626 void add(Result r) { 627 if (current == null) { 628 return; 629 } 630 for (HRegionLocation loc : current.getRegionLocations()) { 631 if (loc != null) { 632 this.results.add(new Pair<>(loc.getRegion(), loc.getServerName())); 633 } 634 } 635 } 636 }; 637 scanMeta(connection, getTableStartRowForMeta(tableName, QueryType.REGION), 638 getTableStopRowForMeta(tableName, QueryType.REGION), QueryType.REGION, visitor); 639 return visitor.getResults(); 640 } 641 642 /** 643 * Get the user regions a given server is hosting. 644 * @param connection connection we're using 645 * @param serverName server whose regions we're interested in 646 * @return List of user regions installed on this server (does not include catalog regions). 647 */ 648 public static NavigableMap<RegionInfo, Result> getServerUserRegions(Connection connection, 649 final ServerName serverName) throws IOException { 650 final NavigableMap<RegionInfo, Result> hris = new TreeMap<>(); 651 // Fill the above hris map with entries from hbase:meta that have the passed 652 // servername. 653 CollectingVisitor<Result> v = new CollectingVisitor<Result>() { 654 @Override 655 void add(Result r) { 656 if (r == null || r.isEmpty()) return; 657 RegionLocations locations = getRegionLocations(r); 658 if (locations == null) return; 659 for (HRegionLocation loc : locations.getRegionLocations()) { 660 if (loc != null) { 661 if (loc.getServerName() != null && loc.getServerName().equals(serverName)) { 662 hris.put(loc.getRegion(), r); 663 } 664 } 665 } 666 } 667 }; 668 scanMeta(connection, null, null, QueryType.REGION, v); 669 return hris; 670 } 671 672 public static void fullScanMetaAndPrint(Connection connection) throws IOException { 673 Visitor v = r -> { 674 if (r == null || r.isEmpty()) { 675 return true; 676 } 677 LOG.info("fullScanMetaAndPrint.Current Meta Row: " + r); 678 TableState state = getTableState(r); 679 if (state != null) { 680 LOG.info("fullScanMetaAndPrint.Table State={}" + state); 681 } else { 682 RegionLocations locations = getRegionLocations(r); 683 if (locations == null) { 684 return true; 685 } 686 for (HRegionLocation loc : locations.getRegionLocations()) { 687 if (loc != null) { 688 LOG.info("fullScanMetaAndPrint.HRI Print={}", loc.getRegion()); 689 } 690 } 691 } 692 return true; 693 }; 694 scanMeta(connection, null, null, QueryType.ALL, v); 695 } 696 697 public static void scanMetaForTableRegions(Connection connection, Visitor visitor, 698 TableName tableName, CatalogReplicaMode metaReplicaMode) throws IOException { 699 scanMeta(connection, tableName, QueryType.REGION, Integer.MAX_VALUE, visitor, metaReplicaMode); 700 } 701 702 public static void scanMetaForTableRegions(Connection connection, Visitor visitor, 703 TableName tableName) throws IOException { 704 scanMetaForTableRegions(connection, visitor, tableName, CatalogReplicaMode.NONE); 705 } 706 707 private static void scanMeta(Connection connection, TableName table, QueryType type, int maxRows, 708 final Visitor visitor, CatalogReplicaMode metaReplicaMode) throws IOException { 709 scanMeta(connection, getTableStartRowForMeta(table, type), getTableStopRowForMeta(table, type), 710 type, null, maxRows, visitor, metaReplicaMode); 711 712 } 713 714 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 715 @Nullable final byte[] stopRow, QueryType type, final Visitor visitor) throws IOException { 716 scanMeta(connection, startRow, stopRow, type, Integer.MAX_VALUE, visitor); 717 } 718 719 /** 720 * Performs a scan of META table for given table starting from given row. 721 * @param connection connection we're using 722 * @param visitor visitor to call 723 * @param tableName table withing we scan 724 * @param row start scan from this row 725 * @param rowLimit max number of rows to return 726 */ 727 public static void scanMeta(Connection connection, final Visitor visitor, 728 final TableName tableName, final byte[] row, final int rowLimit) throws IOException { 729 byte[] startRow = null; 730 byte[] stopRow = null; 731 if (tableName != null) { 732 startRow = getTableStartRowForMeta(tableName, QueryType.REGION); 733 if (row != null) { 734 RegionInfo closestRi = getClosestRegionInfo(connection, tableName, row); 735 startRow = 736 RegionInfo.createRegionName(tableName, closestRi.getStartKey(), HConstants.ZEROES, false); 737 } 738 stopRow = getTableStopRowForMeta(tableName, QueryType.REGION); 739 } 740 scanMeta(connection, startRow, stopRow, QueryType.REGION, rowLimit, visitor); 741 } 742 743 /** 744 * Performs a scan of META table. 745 * @param connection connection we're using 746 * @param startRow Where to start the scan. Pass null if want to begin scan at first row. 747 * @param stopRow Where to stop the scan. Pass null if want to scan all rows from the start one 748 * @param type scanned part of meta 749 * @param maxRows maximum rows to return 750 * @param visitor Visitor invoked against each row. 751 */ 752 static void scanMeta(Connection connection, @Nullable final byte[] startRow, 753 @Nullable final byte[] stopRow, QueryType type, int maxRows, final Visitor visitor) 754 throws IOException { 755 scanMeta(connection, startRow, stopRow, type, null, maxRows, visitor, CatalogReplicaMode.NONE); 756 } 757 758 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 759 @Nullable final byte[] stopRow, QueryType type, @Nullable Filter filter, int maxRows, 760 final Visitor visitor, CatalogReplicaMode metaReplicaMode) throws IOException { 761 int rowUpperLimit = maxRows > 0 ? maxRows : Integer.MAX_VALUE; 762 Scan scan = getMetaScan(connection.getConfiguration(), rowUpperLimit); 763 764 for (byte[] family : type.getFamilies()) { 765 scan.addFamily(family); 766 } 767 if (startRow != null) { 768 scan.withStartRow(startRow); 769 } 770 if (stopRow != null) { 771 scan.withStopRow(stopRow); 772 } 773 if (filter != null) { 774 scan.setFilter(filter); 775 } 776 777 if (LOG.isTraceEnabled()) { 778 LOG.trace("Scanning META" + " starting at row=" + Bytes.toStringBinary(startRow) 779 + " stopping at row=" + Bytes.toStringBinary(stopRow) + " for max=" + rowUpperLimit 780 + " with caching=" + scan.getCaching()); 781 } 782 783 int currentRow = 0; 784 try (Table metaTable = getMetaHTable(connection)) { 785 switch (metaReplicaMode) { 786 case LOAD_BALANCE: 787 int numOfReplicas = metaTable.getDescriptor().getRegionReplication(); 788 if (numOfReplicas > 1) { 789 int replicaId = ThreadLocalRandom.current().nextInt(numOfReplicas); 790 791 // When the replicaId is 0, do not set to Consistency.TIMELINE 792 if (replicaId > 0) { 793 scan.setReplicaId(replicaId); 794 scan.setConsistency(Consistency.TIMELINE); 795 } 796 } 797 break; 798 case HEDGED_READ: 799 scan.setConsistency(Consistency.TIMELINE); 800 break; 801 default: 802 // Do nothing 803 } 804 try (ResultScanner scanner = metaTable.getScanner(scan)) { 805 Result data; 806 while ((data = scanner.next()) != null) { 807 if (data.isEmpty()) continue; 808 // Break if visit returns false. 809 if (!visitor.visit(data)) break; 810 if (++currentRow >= rowUpperLimit) break; 811 } 812 } 813 } 814 if (visitor instanceof Closeable) { 815 try { 816 ((Closeable) visitor).close(); 817 } catch (Throwable t) { 818 ExceptionUtil.rethrowIfInterrupt(t); 819 LOG.debug("Got exception in closing the meta scanner visitor", t); 820 } 821 } 822 } 823 824 /** Returns Get closest metatable region row to passed <code>row</code> */ 825 @NonNull 826 private static RegionInfo getClosestRegionInfo(Connection connection, 827 @NonNull final TableName tableName, @NonNull final byte[] row) throws IOException { 828 byte[] searchRow = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 829 Scan scan = getMetaScan(connection.getConfiguration(), 1); 830 scan.setReversed(true); 831 scan.withStartRow(searchRow); 832 try (ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan)) { 833 Result result = resultScanner.next(); 834 if (result == null) { 835 throw new TableNotFoundException("Cannot find row in META " + " for table: " + tableName 836 + ", row=" + Bytes.toStringBinary(row)); 837 } 838 RegionInfo regionInfo = getRegionInfo(result); 839 if (regionInfo == null) { 840 throw new IOException("RegionInfo was null or empty in Meta for " + tableName + ", row=" 841 + Bytes.toStringBinary(row)); 842 } 843 return regionInfo; 844 } 845 } 846 847 /** 848 * Returns the column family used for meta columns. 849 * @return HConstants.CATALOG_FAMILY. 850 */ 851 public static byte[] getCatalogFamily() { 852 return HConstants.CATALOG_FAMILY; 853 } 854 855 /** 856 * Returns the column family used for table columns. 857 * @return HConstants.TABLE_FAMILY. 858 */ 859 private static byte[] getTableFamily() { 860 return HConstants.TABLE_FAMILY; 861 } 862 863 /** 864 * Returns the column qualifier for serialized region info 865 * @return HConstants.REGIONINFO_QUALIFIER 866 */ 867 public static byte[] getRegionInfoColumn() { 868 return HConstants.REGIONINFO_QUALIFIER; 869 } 870 871 /** 872 * Returns the column qualifier for serialized table state 873 * @return HConstants.TABLE_STATE_QUALIFIER 874 */ 875 private static byte[] getTableStateColumn() { 876 return HConstants.TABLE_STATE_QUALIFIER; 877 } 878 879 /** 880 * Returns the column qualifier for serialized region state 881 * @return HConstants.STATE_QUALIFIER 882 */ 883 private static byte[] getRegionStateColumn() { 884 return HConstants.STATE_QUALIFIER; 885 } 886 887 /** 888 * Returns the column qualifier for serialized region state 889 * @param replicaId the replicaId of the region 890 * @return a byte[] for state qualifier 891 */ 892 public static byte[] getRegionStateColumn(int replicaId) { 893 return replicaId == 0 894 ? HConstants.STATE_QUALIFIER 895 : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 896 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 897 } 898 899 /** 900 * Returns the column qualifier for serialized region state 901 * @param replicaId the replicaId of the region 902 * @return a byte[] for sn column qualifier 903 */ 904 public static byte[] getServerNameColumn(int replicaId) { 905 return replicaId == 0 906 ? HConstants.SERVERNAME_QUALIFIER 907 : Bytes.toBytes(HConstants.SERVERNAME_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 908 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 909 } 910 911 /** 912 * Returns the column qualifier for server column for replicaId 913 * @param replicaId the replicaId of the region 914 * @return a byte[] for server column qualifier 915 */ 916 public static byte[] getServerColumn(int replicaId) { 917 return replicaId == 0 918 ? HConstants.SERVER_QUALIFIER 919 : Bytes.toBytes(HConstants.SERVER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 920 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 921 } 922 923 /** 924 * Returns the column qualifier for server start code column for replicaId 925 * @param replicaId the replicaId of the region 926 * @return a byte[] for server start code column qualifier 927 */ 928 public static byte[] getStartCodeColumn(int replicaId) { 929 return replicaId == 0 930 ? HConstants.STARTCODE_QUALIFIER 931 : Bytes.toBytes(HConstants.STARTCODE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 932 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 933 } 934 935 /** 936 * Returns the column qualifier for seqNum column for replicaId 937 * @param replicaId the replicaId of the region 938 * @return a byte[] for seqNum column qualifier 939 */ 940 public static byte[] getSeqNumColumn(int replicaId) { 941 return replicaId == 0 942 ? HConstants.SEQNUM_QUALIFIER 943 : Bytes.toBytes(HConstants.SEQNUM_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 944 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 945 } 946 947 /** 948 * Parses the replicaId from the server column qualifier. See top of the class javadoc for the 949 * actual meta layout 950 * @param serverColumn the column qualifier 951 * @return an int for the replicaId 952 */ 953 static int parseReplicaIdFromServerColumn(byte[] serverColumn) { 954 String serverStr = Bytes.toString(serverColumn); 955 956 Matcher matcher = SERVER_COLUMN_PATTERN.matcher(serverStr); 957 if (matcher.matches() && matcher.groupCount() > 0) { 958 String group = matcher.group(1); 959 if (group != null && group.length() > 0) { 960 return Integer.parseInt(group.substring(1), 16); 961 } else { 962 return 0; 963 } 964 } 965 return -1; 966 } 967 968 /** 969 * Returns a {@link ServerName} from catalog table {@link Result}. 970 * @param r Result to pull from 971 * @return A ServerName instance or null if necessary fields not found or empty. 972 */ 973 @Nullable 974 @InterfaceAudience.Private // for use by HMaster#getTableRegionRow which is used for testing only 975 public static ServerName getServerName(final Result r, final int replicaId) { 976 byte[] serverColumn = getServerColumn(replicaId); 977 Cell cell = r.getColumnLatestCell(getCatalogFamily(), serverColumn); 978 if (cell == null || cell.getValueLength() == 0) return null; 979 String hostAndPort = 980 Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 981 byte[] startcodeColumn = getStartCodeColumn(replicaId); 982 cell = r.getColumnLatestCell(getCatalogFamily(), startcodeColumn); 983 if (cell == null || cell.getValueLength() == 0) return null; 984 try { 985 return ServerName.valueOf(hostAndPort, 986 Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())); 987 } catch (IllegalArgumentException e) { 988 LOG.error("Ignoring invalid region for server " + hostAndPort + "; cell=" + cell, e); 989 return null; 990 } 991 } 992 993 /** 994 * Returns the {@link ServerName} from catalog table {@link Result} where the region is 995 * transitioning on. It should be the same as {@link MetaTableAccessor#getServerName(Result,int)} 996 * if the server is at OPEN state. 997 * @param r Result to pull the transitioning server name from 998 * @return A ServerName instance or {@link MetaTableAccessor#getServerName(Result,int)} if 999 * necessary fields not found or empty. 1000 */ 1001 @Nullable 1002 public static ServerName getTargetServerName(final Result r, final int replicaId) { 1003 final Cell cell = 1004 r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getServerNameColumn(replicaId)); 1005 if (cell == null || cell.getValueLength() == 0) { 1006 RegionLocations locations = MetaTableAccessor.getRegionLocations(r); 1007 if (locations != null) { 1008 HRegionLocation location = locations.getRegionLocation(replicaId); 1009 if (location != null) { 1010 return location.getServerName(); 1011 } 1012 } 1013 return null; 1014 } 1015 return ServerName.parseServerName( 1016 Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())); 1017 } 1018 1019 /** 1020 * The latest seqnum that the server writing to meta observed when opening the region. E.g. the 1021 * seqNum when the result of {@link #getServerName(Result, int)} was written. 1022 * @param r Result to pull the seqNum from 1023 * @return SeqNum, or HConstants.NO_SEQNUM if there's no value written. 1024 */ 1025 private static long getSeqNumDuringOpen(final Result r, final int replicaId) { 1026 Cell cell = r.getColumnLatestCell(getCatalogFamily(), getSeqNumColumn(replicaId)); 1027 if (cell == null || cell.getValueLength() == 0) return HConstants.NO_SEQNUM; 1028 return Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 1029 } 1030 1031 /** 1032 * Returns the daughter regions by reading the corresponding columns of the catalog table Result. 1033 * @param data a Result object from the catalog table scan 1034 * @return pair of RegionInfo or PairOfSameType(null, null) if region is not a split parent 1035 */ 1036 public static PairOfSameType<RegionInfo> getDaughterRegions(Result data) { 1037 RegionInfo splitA = getRegionInfo(data, HConstants.SPLITA_QUALIFIER); 1038 RegionInfo splitB = getRegionInfo(data, HConstants.SPLITB_QUALIFIER); 1039 return new PairOfSameType<>(splitA, splitB); 1040 } 1041 1042 /** 1043 * Returns an HRegionLocationList extracted from the result. 1044 * @return an HRegionLocationList containing all locations for the region range or null if we 1045 * can't deserialize the result. 1046 */ 1047 @Nullable 1048 public static RegionLocations getRegionLocations(final Result r) { 1049 if (r == null) return null; 1050 RegionInfo regionInfo = getRegionInfo(r, getRegionInfoColumn()); 1051 if (regionInfo == null) return null; 1052 1053 List<HRegionLocation> locations = new ArrayList<>(1); 1054 NavigableMap<byte[], NavigableMap<byte[], byte[]>> familyMap = r.getNoVersionMap(); 1055 1056 locations.add(getRegionLocation(r, regionInfo, 0)); 1057 1058 NavigableMap<byte[], byte[]> infoMap = familyMap.get(getCatalogFamily()); 1059 if (infoMap == null) return new RegionLocations(locations); 1060 1061 // iterate until all serverName columns are seen 1062 int replicaId = 0; 1063 byte[] serverColumn = getServerColumn(replicaId); 1064 SortedMap<byte[], byte[]> serverMap; 1065 serverMap = infoMap.tailMap(serverColumn, false); 1066 1067 if (serverMap.isEmpty()) return new RegionLocations(locations); 1068 1069 for (Map.Entry<byte[], byte[]> entry : serverMap.entrySet()) { 1070 replicaId = parseReplicaIdFromServerColumn(entry.getKey()); 1071 if (replicaId < 0) { 1072 break; 1073 } 1074 HRegionLocation location = getRegionLocation(r, regionInfo, replicaId); 1075 // In case the region replica is newly created, it's location might be null. We usually do not 1076 // have HRL's in RegionLocations object with null ServerName. They are handled as null HRLs. 1077 if (location.getServerName() == null) { 1078 locations.add(null); 1079 } else { 1080 locations.add(location); 1081 } 1082 } 1083 1084 return new RegionLocations(locations); 1085 } 1086 1087 /** 1088 * Returns the HRegionLocation parsed from the given meta row Result for the given regionInfo and 1089 * replicaId. The regionInfo can be the default region info for the replica. 1090 * @param r the meta row result 1091 * @param regionInfo RegionInfo for default replica 1092 * @param replicaId the replicaId for the HRegionLocation 1093 * @return HRegionLocation parsed from the given meta row Result for the given replicaId 1094 */ 1095 private static HRegionLocation getRegionLocation(final Result r, final RegionInfo regionInfo, 1096 final int replicaId) { 1097 ServerName serverName = getServerName(r, replicaId); 1098 long seqNum = getSeqNumDuringOpen(r, replicaId); 1099 RegionInfo replicaInfo = RegionReplicaUtil.getRegionInfoForReplica(regionInfo, replicaId); 1100 return new HRegionLocation(replicaInfo, serverName, seqNum); 1101 } 1102 1103 /** 1104 * Returns RegionInfo object from the column 1105 * HConstants.CATALOG_FAMILY:HConstants.REGIONINFO_QUALIFIER of the catalog table Result. 1106 * @param data a Result object from the catalog table scan 1107 * @return RegionInfo or null 1108 */ 1109 public static RegionInfo getRegionInfo(Result data) { 1110 return getRegionInfo(data, HConstants.REGIONINFO_QUALIFIER); 1111 } 1112 1113 /** 1114 * Returns the RegionInfo object from the column {@link HConstants#CATALOG_FAMILY} and 1115 * <code>qualifier</code> of the catalog table result. 1116 * @param r a Result object from the catalog table scan 1117 * @param qualifier Column family qualifier 1118 * @return An RegionInfo instance or null. 1119 */ 1120 @Nullable 1121 public static RegionInfo getRegionInfo(final Result r, byte[] qualifier) { 1122 Cell cell = r.getColumnLatestCell(getCatalogFamily(), qualifier); 1123 if (cell == null) return null; 1124 return RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(), 1125 cell.getValueLength()); 1126 } 1127 1128 /** 1129 * Fetch table state for given table from META table 1130 * @param conn connection to use 1131 * @param tableName table to fetch state for 1132 */ 1133 @Nullable 1134 public static TableState getTableState(Connection conn, TableName tableName) throws IOException { 1135 if (tableName.equals(TableName.META_TABLE_NAME)) { 1136 return new TableState(tableName, TableState.State.ENABLED); 1137 } 1138 Table metaHTable = getMetaHTable(conn); 1139 Get get = new Get(tableName.getName()).addColumn(getTableFamily(), getTableStateColumn()); 1140 Result result = metaHTable.get(get); 1141 return getTableState(result); 1142 } 1143 1144 /** 1145 * Fetch table states from META table 1146 * @param conn connection to use 1147 * @return map {tableName -> state} 1148 */ 1149 public static Map<TableName, TableState> getTableStates(Connection conn) throws IOException { 1150 final Map<TableName, TableState> states = new LinkedHashMap<>(); 1151 Visitor collector = r -> { 1152 TableState state = getTableState(r); 1153 if (state != null) { 1154 states.put(state.getTableName(), state); 1155 } 1156 return true; 1157 }; 1158 fullScanTables(conn, collector); 1159 return states; 1160 } 1161 1162 /** 1163 * Updates state in META Do not use. For internal use only. 1164 * @param conn connection to use 1165 * @param tableName table to look for 1166 */ 1167 public static void updateTableState(Connection conn, TableName tableName, TableState.State actual) 1168 throws IOException { 1169 updateTableState(conn, new TableState(tableName, actual)); 1170 } 1171 1172 /** 1173 * Decode table state from META Result. Should contain cell from HConstants.TABLE_FAMILY 1174 * @return null if not found 1175 */ 1176 @Nullable 1177 public static TableState getTableState(Result r) throws IOException { 1178 Cell cell = r.getColumnLatestCell(getTableFamily(), getTableStateColumn()); 1179 if (cell == null) { 1180 return null; 1181 } 1182 try { 1183 return TableState.parseFrom(TableName.valueOf(r.getRow()), 1184 Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), 1185 cell.getValueOffset() + cell.getValueLength())); 1186 } catch (DeserializationException e) { 1187 throw new IOException(e); 1188 } 1189 } 1190 1191 /** 1192 * Implementations 'visit' a catalog table row. 1193 */ 1194 public interface Visitor { 1195 /** 1196 * Visit the catalog table row. 1197 * @param r A row from catalog table 1198 * @return True if we are to proceed scanning the table, else false if we are to stop now. 1199 */ 1200 boolean visit(final Result r) throws IOException; 1201 } 1202 1203 /** 1204 * Implementations 'visit' a catalog table row but with close() at the end. 1205 */ 1206 public interface CloseableVisitor extends Visitor, Closeable { 1207 } 1208 1209 /** 1210 * A {@link Visitor} that collects content out of passed {@link Result}. 1211 */ 1212 static abstract class CollectingVisitor<T> implements Visitor { 1213 final List<T> results = new ArrayList<>(); 1214 1215 @Override 1216 public boolean visit(Result r) throws IOException { 1217 if (r != null && !r.isEmpty()) { 1218 add(r); 1219 } 1220 return true; 1221 } 1222 1223 abstract void add(Result r); 1224 1225 /** Returns Collected results; wait till visits complete to collect all possible results */ 1226 List<T> getResults() { 1227 return this.results; 1228 } 1229 } 1230 1231 /** 1232 * Collects all returned. 1233 */ 1234 static class CollectAllVisitor extends CollectingVisitor<Result> { 1235 @Override 1236 void add(Result r) { 1237 this.results.add(r); 1238 } 1239 } 1240 1241 /** 1242 * A Visitor that skips offline regions and split parents 1243 */ 1244 public static abstract class DefaultVisitorBase implements Visitor { 1245 1246 DefaultVisitorBase() { 1247 super(); 1248 } 1249 1250 public abstract boolean visitInternal(Result rowResult) throws IOException; 1251 1252 @Override 1253 public boolean visit(Result rowResult) throws IOException { 1254 RegionInfo info = getRegionInfo(rowResult); 1255 if (info == null) { 1256 return true; 1257 } 1258 1259 // skip over offline and split regions 1260 if (!(info.isOffline() || info.isSplit())) { 1261 return visitInternal(rowResult); 1262 } 1263 return true; 1264 } 1265 } 1266 1267 /** 1268 * A Visitor for a table. Provides a consistent view of the table's hbase:meta entries during 1269 * concurrent splits (see HBASE-5986 for details). This class does not guarantee ordered traversal 1270 * of meta entries, and can block until the hbase:meta entries for daughters are available during 1271 * splits. 1272 */ 1273 public static abstract class TableVisitorBase extends DefaultVisitorBase { 1274 private TableName tableName; 1275 1276 public TableVisitorBase(TableName tableName) { 1277 super(); 1278 this.tableName = tableName; 1279 } 1280 1281 @Override 1282 public final boolean visit(Result rowResult) throws IOException { 1283 RegionInfo info = getRegionInfo(rowResult); 1284 if (info == null) { 1285 return true; 1286 } 1287 if (!info.getTable().equals(tableName)) { 1288 return false; 1289 } 1290 return super.visit(rowResult); 1291 } 1292 } 1293 1294 //////////////////////// 1295 // Editing operations // 1296 //////////////////////// 1297 /** 1298 * Generates and returns a Put containing the region into for the catalog table 1299 */ 1300 public static Put makePutFromRegionInfo(RegionInfo regionInfo, long ts) throws IOException { 1301 return addRegionInfo(new Put(regionInfo.getRegionName(), ts), regionInfo); 1302 } 1303 1304 /** 1305 * Generates and returns a Delete containing the region info for the catalog table 1306 */ 1307 public static Delete makeDeleteFromRegionInfo(RegionInfo regionInfo, long ts) { 1308 if (regionInfo == null) { 1309 throw new IllegalArgumentException("Can't make a delete for null region"); 1310 } 1311 Delete delete = new Delete(regionInfo.getRegionName()); 1312 delete.addFamily(getCatalogFamily(), ts); 1313 return delete; 1314 } 1315 1316 /** 1317 * Adds split daughters to the Put 1318 */ 1319 private static Put addDaughtersToPut(Put put, RegionInfo splitA, RegionInfo splitB) 1320 throws IOException { 1321 if (splitA != null) { 1322 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1323 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(HConstants.SPLITA_QUALIFIER) 1324 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put) 1325 .setValue(RegionInfo.toByteArray(splitA)).build()); 1326 } 1327 if (splitB != null) { 1328 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1329 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(HConstants.SPLITB_QUALIFIER) 1330 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put) 1331 .setValue(RegionInfo.toByteArray(splitB)).build()); 1332 } 1333 return put; 1334 } 1335 1336 /** 1337 * Put the passed <code>p</code> to the <code>hbase:meta</code> table. 1338 * @param connection connection we're using 1339 * @param p Put to add to hbase:meta 1340 */ 1341 private static void putToMetaTable(Connection connection, Put p) throws IOException { 1342 try (Table table = getMetaHTable(connection)) { 1343 put(table, p); 1344 } 1345 } 1346 1347 /** 1348 * @param t Table to use 1349 * @param p put to make 1350 */ 1351 private static void put(Table t, Put p) throws IOException { 1352 debugLogMutation(p); 1353 t.put(p); 1354 } 1355 1356 /** 1357 * Put the passed <code>ps</code> to the <code>hbase:meta</code> table. 1358 * @param connection connection we're using 1359 * @param ps Put to add to hbase:meta 1360 */ 1361 public static void putsToMetaTable(final Connection connection, final List<Put> ps) 1362 throws IOException { 1363 if (ps.isEmpty()) { 1364 return; 1365 } 1366 try (Table t = getMetaHTable(connection)) { 1367 debugLogMutations(ps); 1368 // the implementation for putting a single Put is much simpler so here we do a check first. 1369 if (ps.size() == 1) { 1370 t.put(ps.get(0)); 1371 } else { 1372 t.put(ps); 1373 } 1374 } 1375 } 1376 1377 /** 1378 * Delete the passed <code>d</code> from the <code>hbase:meta</code> table. 1379 * @param connection connection we're using 1380 * @param d Delete to add to hbase:meta 1381 */ 1382 private static void deleteFromMetaTable(final Connection connection, final Delete d) 1383 throws IOException { 1384 List<Delete> dels = new ArrayList<>(1); 1385 dels.add(d); 1386 deleteFromMetaTable(connection, dels); 1387 } 1388 1389 /** 1390 * Delete the passed <code>deletes</code> from the <code>hbase:meta</code> table. 1391 * @param connection connection we're using 1392 * @param deletes Deletes to add to hbase:meta This list should support #remove. 1393 */ 1394 private static void deleteFromMetaTable(final Connection connection, final List<Delete> deletes) 1395 throws IOException { 1396 try (Table t = getMetaHTable(connection)) { 1397 debugLogMutations(deletes); 1398 t.delete(deletes); 1399 } 1400 } 1401 1402 private static Put addRegionStateToPut(Put put, RegionState.State state) throws IOException { 1403 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1404 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getRegionStateColumn()) 1405 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(state.name())) 1406 .build()); 1407 return put; 1408 } 1409 1410 /** 1411 * Update state column in hbase:meta. 1412 */ 1413 public static void updateRegionState(Connection connection, RegionInfo ri, 1414 RegionState.State state) throws IOException { 1415 Put put = new Put(RegionReplicaUtil.getRegionInfoForDefaultReplica(ri).getRegionName()); 1416 MetaTableAccessor.putsToMetaTable(connection, 1417 Collections.singletonList(addRegionStateToPut(put, state))); 1418 } 1419 1420 /** 1421 * Adds daughter region infos to hbase:meta row for the specified region. Note that this does not 1422 * add its daughter's as different rows, but adds information about the daughters in the same row 1423 * as the parent. Use 1424 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} if 1425 * you want to do that. 1426 * @param connection connection we're using 1427 * @param regionInfo RegionInfo of parent region 1428 * @param splitA first split daughter of the parent regionInfo 1429 * @param splitB second split daughter of the parent regionInfo 1430 * @throws IOException if problem connecting or updating meta 1431 */ 1432 public static void addSplitsToParent(Connection connection, RegionInfo regionInfo, 1433 RegionInfo splitA, RegionInfo splitB) throws IOException { 1434 try (Table meta = getMetaHTable(connection)) { 1435 Put put = makePutFromRegionInfo(regionInfo, EnvironmentEdgeManager.currentTime()); 1436 addDaughtersToPut(put, splitA, splitB); 1437 meta.put(put); 1438 debugLogMutation(put); 1439 LOG.debug("Added region {}", regionInfo.getRegionNameAsString()); 1440 } 1441 } 1442 1443 /** 1444 * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this 1445 * does not add its daughter's as different rows, but adds information about the daughters in the 1446 * same row as the parent. Use 1447 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} if 1448 * you want to do that. 1449 * @param connection connection we're using 1450 * @param regionInfo region information 1451 * @throws IOException if problem connecting or updating meta 1452 */ 1453 public static void addRegionToMeta(Connection connection, RegionInfo regionInfo) 1454 throws IOException { 1455 addRegionsToMeta(connection, Collections.singletonList(regionInfo), 1); 1456 } 1457 1458 /** 1459 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions is 1460 * CLOSED. 1461 * @param connection connection we're using 1462 * @param regionInfos region information list 1463 * @throws IOException if problem connecting or updating meta 1464 */ 1465 public static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1466 int regionReplication) throws IOException { 1467 addRegionsToMeta(connection, regionInfos, regionReplication, 1468 EnvironmentEdgeManager.currentTime()); 1469 } 1470 1471 /** 1472 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions is 1473 * CLOSED. 1474 * @param connection connection we're using 1475 * @param regionInfos region information list 1476 * @param ts desired timestamp 1477 * @throws IOException if problem connecting or updating meta 1478 */ 1479 private static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1480 int regionReplication, long ts) throws IOException { 1481 List<Put> puts = new ArrayList<>(); 1482 for (RegionInfo regionInfo : regionInfos) { 1483 if (RegionReplicaUtil.isDefaultReplica(regionInfo)) { 1484 Put put = makePutFromRegionInfo(regionInfo, ts); 1485 // New regions are added with initial state of CLOSED. 1486 addRegionStateToPut(put, RegionState.State.CLOSED); 1487 // Add empty locations for region replicas so that number of replicas can be cached 1488 // whenever the primary region is looked up from meta 1489 for (int i = 1; i < regionReplication; i++) { 1490 addEmptyLocation(put, i); 1491 } 1492 puts.add(put); 1493 } 1494 } 1495 putsToMetaTable(connection, puts); 1496 LOG.info("Added {} regions to meta.", puts.size()); 1497 } 1498 1499 static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException { 1500 int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more. 1501 int max = mergeRegions.size(); 1502 if (max > limit) { 1503 // Should never happen!!!!! But just in case. 1504 throw new RuntimeException( 1505 "Can't merge " + max + " regions in one go; " + limit + " is upper-limit."); 1506 } 1507 int counter = 0; 1508 for (RegionInfo ri : mergeRegions) { 1509 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++); 1510 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1511 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(Bytes.toBytes(qualifier)) 1512 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put) 1513 .setValue(RegionInfo.toByteArray(ri)).build()); 1514 } 1515 return put; 1516 } 1517 1518 /** 1519 * Merge regions into one in an atomic operation. Deletes the merging regions in hbase:meta and 1520 * adds the merged region. 1521 * @param connection connection we're using 1522 * @param mergedRegion the merged region 1523 * @param parentSeqNum Parent regions to merge and their next open sequence id used by serial 1524 * replication. Set to -1 if not needed by this table. 1525 * @param sn the location of the region 1526 */ 1527 public static void mergeRegions(Connection connection, RegionInfo mergedRegion, 1528 Map<RegionInfo, Long> parentSeqNum, ServerName sn, int regionReplication) throws IOException { 1529 try (Table meta = getMetaHTable(connection)) { 1530 long time = HConstants.LATEST_TIMESTAMP; 1531 List<Mutation> mutations = new ArrayList<>(); 1532 List<RegionInfo> replicationParents = new ArrayList<>(); 1533 for (Map.Entry<RegionInfo, Long> e : parentSeqNum.entrySet()) { 1534 RegionInfo ri = e.getKey(); 1535 long seqNum = e.getValue(); 1536 // Deletes for merging regions 1537 mutations.add(makeDeleteFromRegionInfo(ri, time)); 1538 if (seqNum > 0) { 1539 mutations.add(makePutForReplicationBarrier(ri, seqNum, time)); 1540 replicationParents.add(ri); 1541 } 1542 } 1543 // Put for parent 1544 Put putOfMerged = makePutFromRegionInfo(mergedRegion, time); 1545 putOfMerged = addMergeRegions(putOfMerged, parentSeqNum.keySet()); 1546 // Set initial state to CLOSED. 1547 // NOTE: If initial state is not set to CLOSED then merged region gets added with the 1548 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1549 // master tries to assign this offline region. This is followed by re-assignments of the 1550 // merged region from resumed {@link MergeTableRegionsProcedure} 1551 addRegionStateToPut(putOfMerged, RegionState.State.CLOSED); 1552 mutations.add(putOfMerged); 1553 // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null 1554 // if crash after merge happened but before we got to here.. means in-memory 1555 // locations of offlined merged, now-closed, regions is lost. Should be ok. We 1556 // assign the merged region later. 1557 if (sn != null) { 1558 addLocation(putOfMerged, sn, 1, mergedRegion.getReplicaId()); 1559 } 1560 1561 // Add empty locations for region replicas of the merged region so that number of replicas 1562 // can be cached whenever the primary region is looked up from meta 1563 for (int i = 1; i < regionReplication; i++) { 1564 addEmptyLocation(putOfMerged, i); 1565 } 1566 // add parent reference for serial replication 1567 if (!replicationParents.isEmpty()) { 1568 addReplicationParent(putOfMerged, replicationParents); 1569 } 1570 byte[] tableRow = Bytes.toBytes(mergedRegion.getRegionNameAsString() + HConstants.DELIMITER); 1571 multiMutate(meta, tableRow, mutations); 1572 } 1573 } 1574 1575 /** 1576 * Splits the region into two in an atomic operation. Offlines the parent region with the 1577 * information that it is split into two, and also adds the daughter regions. Does not add the 1578 * location information to the daughter regions since they are not open yet. 1579 * @param connection connection we're using 1580 * @param parent the parent region which is split 1581 * @param parentOpenSeqNum the next open sequence id for parent region, used by serial 1582 * replication. -1 if not necessary. 1583 * @param splitA Split daughter region A 1584 * @param splitB Split daughter region B 1585 * @param sn the location of the region 1586 */ 1587 public static void splitRegion(Connection connection, RegionInfo parent, long parentOpenSeqNum, 1588 RegionInfo splitA, RegionInfo splitB, ServerName sn, int regionReplication) throws IOException { 1589 try (Table meta = getMetaHTable(connection)) { 1590 long time = EnvironmentEdgeManager.currentTime(); 1591 // Put for parent 1592 Put putParent = makePutFromRegionInfo( 1593 RegionInfoBuilder.newBuilder(parent).setOffline(true).setSplit(true).build(), time); 1594 addDaughtersToPut(putParent, splitA, splitB); 1595 1596 // Puts for daughters 1597 Put putA = makePutFromRegionInfo(splitA, time); 1598 Put putB = makePutFromRegionInfo(splitB, time); 1599 if (parentOpenSeqNum > 0) { 1600 addReplicationBarrier(putParent, parentOpenSeqNum); 1601 addReplicationParent(putA, Collections.singletonList(parent)); 1602 addReplicationParent(putB, Collections.singletonList(parent)); 1603 } 1604 // Set initial state to CLOSED 1605 // NOTE: If initial state is not set to CLOSED then daughter regions get added with the 1606 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1607 // master tries to assign these offline regions. This is followed by re-assignments of the 1608 // daughter regions from resumed {@link SplitTableRegionProcedure} 1609 addRegionStateToPut(putA, RegionState.State.CLOSED); 1610 addRegionStateToPut(putB, RegionState.State.CLOSED); 1611 1612 addSequenceNum(putA, 1, splitA.getReplicaId()); // new regions, openSeqNum = 1 is fine. 1613 addSequenceNum(putB, 1, splitB.getReplicaId()); 1614 1615 // Add empty locations for region replicas of daughters so that number of replicas can be 1616 // cached whenever the primary region is looked up from meta 1617 for (int i = 1; i < regionReplication; i++) { 1618 addEmptyLocation(putA, i); 1619 addEmptyLocation(putB, i); 1620 } 1621 1622 byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER); 1623 multiMutate(meta, tableRow, putParent, putA, putB); 1624 } 1625 } 1626 1627 /** 1628 * Update state of the table in meta. 1629 * @param connection what we use for update 1630 * @param state new state 1631 */ 1632 private static void updateTableState(Connection connection, TableState state) throws IOException { 1633 Put put = makePutFromTableState(state, EnvironmentEdgeManager.currentTime()); 1634 putToMetaTable(connection, put); 1635 LOG.info("Updated {} in hbase:meta", state); 1636 } 1637 1638 /** 1639 * Construct PUT for given state 1640 * @param state new state 1641 */ 1642 public static Put makePutFromTableState(TableState state, long ts) { 1643 Put put = new Put(state.getTableName().getName(), ts); 1644 put.addColumn(getTableFamily(), getTableStateColumn(), state.convert().toByteArray()); 1645 return put; 1646 } 1647 1648 /** 1649 * Remove state for table from meta 1650 * @param connection to use for deletion 1651 * @param table to delete state for 1652 */ 1653 public static void deleteTableState(Connection connection, TableName table) throws IOException { 1654 long time = EnvironmentEdgeManager.currentTime(); 1655 Delete delete = new Delete(table.getName()); 1656 delete.addColumns(getTableFamily(), getTableStateColumn(), time); 1657 deleteFromMetaTable(connection, delete); 1658 LOG.info("Deleted table " + table + " state from META"); 1659 } 1660 1661 private static void multiMutate(Table table, byte[] row, Mutation... mutations) 1662 throws IOException { 1663 multiMutate(table, row, Arrays.asList(mutations)); 1664 } 1665 1666 /** 1667 * Performs an atomic multi-mutate operation against the given table. Used by the likes of merge 1668 * and split as these want to make atomic mutations across multiple rows. 1669 * @throws IOException even if we encounter a RuntimeException, we'll still wrap it in an IOE. 1670 */ 1671 static void multiMutate(final Table table, byte[] row, final List<Mutation> mutations) 1672 throws IOException { 1673 debugLogMutations(mutations); 1674 Batch.Call<MultiRowMutationService, MutateRowsResponse> callable = instance -> { 1675 MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder(); 1676 for (Mutation mutation : mutations) { 1677 if (mutation instanceof Put) { 1678 builder.addMutationRequest( 1679 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.PUT, mutation)); 1680 } else if (mutation instanceof Delete) { 1681 builder.addMutationRequest( 1682 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.DELETE, mutation)); 1683 } else { 1684 throw new DoNotRetryIOException( 1685 "multi in MetaEditor doesn't support " + mutation.getClass().getName()); 1686 } 1687 } 1688 ServerRpcController controller = new ServerRpcController(); 1689 CoprocessorRpcUtils.BlockingRpcCallback<MutateRowsResponse> rpcCallback = 1690 new CoprocessorRpcUtils.BlockingRpcCallback<>(); 1691 instance.mutateRows(controller, builder.build(), rpcCallback); 1692 MutateRowsResponse resp = rpcCallback.get(); 1693 if (controller.failedOnException()) { 1694 throw controller.getFailedOn(); 1695 } 1696 return resp; 1697 }; 1698 try { 1699 table.coprocessorService(MultiRowMutationService.class, row, row, callable); 1700 } catch (Throwable e) { 1701 // Throw if an IOE else wrap in an IOE EVEN IF IT IS a RuntimeException (e.g. 1702 // a RejectedExecutionException because the hosting exception is shutting down. 1703 // This is old behavior worth reexamining. Procedures doing merge or split 1704 // currently don't handle RuntimeExceptions coming up out of meta table edits. 1705 // Would have to work on this at least. See HBASE-23904. 1706 Throwables.throwIfInstanceOf(e, IOException.class); 1707 throw new IOException(e); 1708 } 1709 } 1710 1711 /** 1712 * Updates the location of the specified region in hbase:meta to be the specified server hostname 1713 * and startcode. 1714 * <p> 1715 * Uses passed catalog tracker to get a connection to the server hosting hbase:meta and makes 1716 * edits to that region. 1717 * @param connection connection we're using 1718 * @param regionInfo region to update location of 1719 * @param openSeqNum the latest sequence number obtained when the region was open 1720 * @param sn Server name 1721 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1722 */ 1723 public static void updateRegionLocation(Connection connection, RegionInfo regionInfo, 1724 ServerName sn, long openSeqNum, long masterSystemTime) throws IOException { 1725 updateLocation(connection, regionInfo, sn, openSeqNum, masterSystemTime); 1726 } 1727 1728 /** 1729 * Updates the location of the specified region to be the specified server. 1730 * <p> 1731 * Connects to the specified server which should be hosting the specified catalog region name to 1732 * perform the edit. 1733 * @param connection connection we're using 1734 * @param regionInfo region to update location of 1735 * @param sn Server name 1736 * @param openSeqNum the latest sequence number obtained when the region was open 1737 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1738 * @throws IOException In particular could throw {@link java.net.ConnectException} if the server 1739 * is down on other end. 1740 */ 1741 private static void updateLocation(Connection connection, RegionInfo regionInfo, ServerName sn, 1742 long openSeqNum, long masterSystemTime) throws IOException { 1743 // region replicas are kept in the primary region's row 1744 Put put = new Put(getMetaKeyForRegion(regionInfo), masterSystemTime); 1745 addRegionInfo(put, regionInfo); 1746 addLocation(put, sn, openSeqNum, regionInfo.getReplicaId()); 1747 putToMetaTable(connection, put); 1748 LOG.info("Updated row {} with server=", regionInfo.getRegionNameAsString(), sn); 1749 } 1750 1751 /** 1752 * Deletes the specified region from META. 1753 * @param connection connection we're using 1754 * @param regionInfo region to be deleted from META 1755 */ 1756 public static void deleteRegionInfo(Connection connection, RegionInfo regionInfo) 1757 throws IOException { 1758 Delete delete = new Delete(regionInfo.getRegionName()); 1759 delete.addFamily(getCatalogFamily(), HConstants.LATEST_TIMESTAMP); 1760 deleteFromMetaTable(connection, delete); 1761 LOG.info("Deleted " + regionInfo.getRegionNameAsString()); 1762 } 1763 1764 /** 1765 * Deletes the specified regions from META. 1766 * @param connection connection we're using 1767 * @param regionsInfo list of regions to be deleted from META 1768 */ 1769 public static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo) 1770 throws IOException { 1771 deleteRegionInfos(connection, regionsInfo, EnvironmentEdgeManager.currentTime()); 1772 } 1773 1774 /** 1775 * Deletes the specified regions from META. 1776 * @param connection connection we're using 1777 * @param regionsInfo list of regions to be deleted from META 1778 */ 1779 private static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo, 1780 long ts) throws IOException { 1781 List<Delete> deletes = new ArrayList<>(regionsInfo.size()); 1782 for (RegionInfo hri : regionsInfo) { 1783 Delete e = new Delete(hri.getRegionName()); 1784 e.addFamily(getCatalogFamily(), ts); 1785 deletes.add(e); 1786 } 1787 deleteFromMetaTable(connection, deletes); 1788 LOG.info("Deleted {} regions from META", regionsInfo.size()); 1789 LOG.debug("Deleted regions: {}", regionsInfo); 1790 } 1791 1792 /** 1793 * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and 1794 * adds new ones. Regions added back have state CLOSED. 1795 * @param connection connection we're using 1796 * @param regionInfos list of regions to be added to META 1797 */ 1798 public static void overwriteRegions(Connection connection, List<RegionInfo> regionInfos, 1799 int regionReplication) throws IOException { 1800 // use master time for delete marker and the Put 1801 long now = EnvironmentEdgeManager.currentTime(); 1802 deleteRegionInfos(connection, regionInfos, now); 1803 // Why sleep? This is the easiest way to ensure that the previous deletes does not 1804 // eclipse the following puts, that might happen in the same ts from the server. 1805 // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed, 1806 // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep. 1807 // 1808 // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed 1809 addRegionsToMeta(connection, regionInfos, regionReplication, now + 1); 1810 LOG.info("Overwritten " + regionInfos.size() + " regions to Meta"); 1811 LOG.debug("Overwritten regions: {} ", regionInfos); 1812 } 1813 1814 /** 1815 * Deletes merge qualifiers for the specified merge region. 1816 * @param connection connection we're using 1817 * @param mergeRegion the merged region 1818 */ 1819 public static void deleteMergeQualifiers(Connection connection, final RegionInfo mergeRegion) 1820 throws IOException { 1821 Delete delete = new Delete(mergeRegion.getRegionName()); 1822 // NOTE: We are doing a new hbase:meta read here. 1823 Cell[] cells = getRegionResult(connection, mergeRegion.getRegionName()).rawCells(); 1824 if (cells == null || cells.length == 0) { 1825 return; 1826 } 1827 List<byte[]> qualifiers = new ArrayList<>(); 1828 for (Cell cell : cells) { 1829 if (!isMergeQualifierPrefix(cell)) { 1830 continue; 1831 } 1832 byte[] qualifier = CellUtil.cloneQualifier(cell); 1833 qualifiers.add(qualifier); 1834 delete.addColumns(getCatalogFamily(), qualifier, HConstants.LATEST_TIMESTAMP); 1835 } 1836 1837 // There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while 1838 // the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second 1839 // GCMultipleMergedRegionsProcedure could delete the merged region by accident! 1840 if (qualifiers.isEmpty()) { 1841 LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString() 1842 + " in meta table, they are cleaned up already, Skip."); 1843 return; 1844 } 1845 1846 deleteFromMetaTable(connection, delete); 1847 LOG.info( 1848 "Deleted merge references in " + mergeRegion.getRegionNameAsString() + ", deleted qualifiers " 1849 + qualifiers.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", "))); 1850 } 1851 1852 public static Put addRegionInfo(final Put p, final RegionInfo hri) throws IOException { 1853 p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow()) 1854 .setFamily(getCatalogFamily()).setQualifier(HConstants.REGIONINFO_QUALIFIER) 1855 .setTimestamp(p.getTimestamp()).setType(Cell.Type.Put) 1856 // Serialize the Default Replica HRI otherwise scan of hbase:meta 1857 // shows an info:regioninfo value with encoded name and region 1858 // name that differs from that of the hbase;meta row. 1859 .setValue(RegionInfo.toByteArray(RegionReplicaUtil.getRegionInfoForDefaultReplica(hri))) 1860 .build()); 1861 return p; 1862 } 1863 1864 public static Put addLocation(Put p, ServerName sn, long openSeqNum, int replicaId) 1865 throws IOException { 1866 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 1867 return p 1868 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1869 .setQualifier(getServerColumn(replicaId)).setTimestamp(p.getTimestamp()) 1870 .setType(Cell.Type.Put).setValue(Bytes.toBytes(sn.getAddress().toString())).build()) 1871 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1872 .setQualifier(getStartCodeColumn(replicaId)).setTimestamp(p.getTimestamp()) 1873 .setType(Cell.Type.Put).setValue(Bytes.toBytes(sn.getStartcode())).build()) 1874 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1875 .setQualifier(getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp()) 1876 .setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum)).build()); 1877 } 1878 1879 private static void writeRegionName(ByteArrayOutputStream out, byte[] regionName) { 1880 for (byte b : regionName) { 1881 if (b == ESCAPE_BYTE) { 1882 out.write(ESCAPE_BYTE); 1883 } 1884 out.write(b); 1885 } 1886 } 1887 1888 public static byte[] getParentsBytes(List<RegionInfo> parents) { 1889 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1890 Iterator<RegionInfo> iter = parents.iterator(); 1891 writeRegionName(bos, iter.next().getRegionName()); 1892 while (iter.hasNext()) { 1893 bos.write(ESCAPE_BYTE); 1894 bos.write(SEPARATED_BYTE); 1895 writeRegionName(bos, iter.next().getRegionName()); 1896 } 1897 return bos.toByteArray(); 1898 } 1899 1900 private static List<byte[]> parseParentsBytes(byte[] bytes) { 1901 List<byte[]> parents = new ArrayList<>(); 1902 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1903 for (int i = 0; i < bytes.length; i++) { 1904 if (bytes[i] == ESCAPE_BYTE) { 1905 i++; 1906 if (bytes[i] == SEPARATED_BYTE) { 1907 parents.add(bos.toByteArray()); 1908 bos.reset(); 1909 continue; 1910 } 1911 // fall through to append the byte 1912 } 1913 bos.write(bytes[i]); 1914 } 1915 if (bos.size() > 0) { 1916 parents.add(bos.toByteArray()); 1917 } 1918 return parents; 1919 } 1920 1921 private static void addReplicationParent(Put put, List<RegionInfo> parents) throws IOException { 1922 byte[] value = getParentsBytes(parents); 1923 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1924 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(REPLICATION_PARENT_QUALIFIER) 1925 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(value).build()); 1926 } 1927 1928 public static Put makePutForReplicationBarrier(RegionInfo regionInfo, long openSeqNum, long ts) 1929 throws IOException { 1930 Put put = new Put(regionInfo.getRegionName(), ts); 1931 addReplicationBarrier(put, openSeqNum); 1932 return put; 1933 } 1934 1935 /** 1936 * See class comment on SerialReplicationChecker 1937 */ 1938 public static void addReplicationBarrier(Put put, long openSeqNum) throws IOException { 1939 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1940 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(HConstants.SEQNUM_QUALIFIER) 1941 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum)) 1942 .build()); 1943 } 1944 1945 public static Put addEmptyLocation(Put p, int replicaId) throws IOException { 1946 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 1947 return p 1948 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1949 .setQualifier(getServerColumn(replicaId)).setTimestamp(p.getTimestamp()) 1950 .setType(Cell.Type.Put).build()) 1951 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1952 .setQualifier(getStartCodeColumn(replicaId)).setTimestamp(p.getTimestamp()) 1953 .setType(Cell.Type.Put).build()) 1954 .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily()) 1955 .setQualifier(getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp()) 1956 .setType(Cell.Type.Put).build()); 1957 } 1958 1959 public static final class ReplicationBarrierResult { 1960 private final long[] barriers; 1961 private final RegionState.State state; 1962 private final List<byte[]> parentRegionNames; 1963 1964 ReplicationBarrierResult(long[] barriers, State state, List<byte[]> parentRegionNames) { 1965 this.barriers = barriers; 1966 this.state = state; 1967 this.parentRegionNames = parentRegionNames; 1968 } 1969 1970 public long[] getBarriers() { 1971 return barriers; 1972 } 1973 1974 public RegionState.State getState() { 1975 return state; 1976 } 1977 1978 public List<byte[]> getParentRegionNames() { 1979 return parentRegionNames; 1980 } 1981 1982 @Override 1983 public String toString() { 1984 return "ReplicationBarrierResult [barriers=" + Arrays.toString(barriers) + ", state=" + state 1985 + ", parentRegionNames=" 1986 + parentRegionNames.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", ")) 1987 + "]"; 1988 } 1989 } 1990 1991 private static long getReplicationBarrier(Cell c) { 1992 return Bytes.toLong(c.getValueArray(), c.getValueOffset(), c.getValueLength()); 1993 } 1994 1995 public static long[] getReplicationBarriers(Result result) { 1996 return result.getColumnCells(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 1997 .stream().mapToLong(MetaTableAccessor::getReplicationBarrier).sorted().distinct().toArray(); 1998 } 1999 2000 private static ReplicationBarrierResult getReplicationBarrierResult(Result result) { 2001 long[] barriers = getReplicationBarriers(result); 2002 byte[] stateBytes = result.getValue(getCatalogFamily(), getRegionStateColumn()); 2003 RegionState.State state = 2004 stateBytes != null ? RegionState.State.valueOf(Bytes.toString(stateBytes)) : null; 2005 byte[] parentRegionsBytes = 2006 result.getValue(HConstants.REPLICATION_BARRIER_FAMILY, REPLICATION_PARENT_QUALIFIER); 2007 List<byte[]> parentRegionNames = 2008 parentRegionsBytes != null ? parseParentsBytes(parentRegionsBytes) : Collections.emptyList(); 2009 return new ReplicationBarrierResult(barriers, state, parentRegionNames); 2010 } 2011 2012 public static ReplicationBarrierResult getReplicationBarrierResult(Connection conn, 2013 TableName tableName, byte[] row, byte[] encodedRegionName) throws IOException { 2014 byte[] metaStartKey = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 2015 byte[] metaStopKey = 2016 RegionInfo.createRegionName(tableName, HConstants.EMPTY_START_ROW, "", false); 2017 Scan scan = new Scan().withStartRow(metaStartKey).withStopRow(metaStopKey) 2018 .addColumn(getCatalogFamily(), getRegionStateColumn()) 2019 .addFamily(HConstants.REPLICATION_BARRIER_FAMILY).readAllVersions().setReversed(true) 2020 .setCaching(10); 2021 try (Table table = getMetaHTable(conn); ResultScanner scanner = table.getScanner(scan)) { 2022 for (Result result;;) { 2023 result = scanner.next(); 2024 if (result == null) { 2025 return new ReplicationBarrierResult(new long[0], null, Collections.emptyList()); 2026 } 2027 byte[] regionName = result.getRow(); 2028 // TODO: we may look up a region which has already been split or merged so we need to check 2029 // whether the encoded name matches. Need to find a way to quit earlier when there is no 2030 // record for the given region, for now it will scan to the end of the table. 2031 if ( 2032 !Bytes.equals(encodedRegionName, Bytes.toBytes(RegionInfo.encodeRegionName(regionName))) 2033 ) { 2034 continue; 2035 } 2036 return getReplicationBarrierResult(result); 2037 } 2038 } 2039 } 2040 2041 public static long[] getReplicationBarrier(Connection conn, byte[] regionName) 2042 throws IOException { 2043 try (Table table = getMetaHTable(conn)) { 2044 Result result = table.get(new Get(regionName) 2045 .addColumn(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2046 .readAllVersions()); 2047 return getReplicationBarriers(result); 2048 } 2049 } 2050 2051 public static List<Pair<String, Long>> getTableEncodedRegionNameAndLastBarrier(Connection conn, 2052 TableName tableName) throws IOException { 2053 List<Pair<String, Long>> list = new ArrayList<>(); 2054 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2055 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, r -> { 2056 byte[] value = 2057 r.getValue(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER); 2058 if (value == null) { 2059 return true; 2060 } 2061 long lastBarrier = Bytes.toLong(value); 2062 String encodedRegionName = RegionInfo.encodeRegionName(r.getRow()); 2063 list.add(Pair.newPair(encodedRegionName, lastBarrier)); 2064 return true; 2065 }); 2066 return list; 2067 } 2068 2069 public static List<String> getTableEncodedRegionNamesForSerialReplication(Connection conn, 2070 TableName tableName) throws IOException { 2071 List<String> list = new ArrayList<>(); 2072 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2073 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, 2074 new FirstKeyOnlyFilter(), Integer.MAX_VALUE, r -> { 2075 list.add(RegionInfo.encodeRegionName(r.getRow())); 2076 return true; 2077 }, CatalogReplicaMode.NONE); 2078 return list; 2079 } 2080 2081 private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException { 2082 if (!METALOG.isDebugEnabled()) { 2083 return; 2084 } 2085 // Logging each mutation in separate line makes it easier to see diff between them visually 2086 // because of common starting indentation. 2087 for (Mutation mutation : mutations) { 2088 debugLogMutation(mutation); 2089 } 2090 } 2091 2092 private static void debugLogMutation(Mutation p) throws IOException { 2093 METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON()); 2094 } 2095 2096 private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException { 2097 return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow()) 2098 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getSeqNumColumn(replicaId)) 2099 .setTimestamp(p.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum)) 2100 .build()); 2101 } 2102}