001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import edu.umd.cs.findbugs.annotations.NonNull; 021import edu.umd.cs.findbugs.annotations.Nullable; 022import java.io.ByteArrayOutputStream; 023import java.io.Closeable; 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.Collections; 029import java.util.Iterator; 030import java.util.LinkedHashMap; 031import java.util.List; 032import java.util.Map; 033import java.util.NavigableMap; 034import java.util.SortedMap; 035import java.util.TreeMap; 036import java.util.regex.Matcher; 037import java.util.regex.Pattern; 038import java.util.stream.Collectors; 039import org.apache.hadoop.conf.Configuration; 040import org.apache.hadoop.hbase.Cell.Type; 041import org.apache.hadoop.hbase.client.Connection; 042import org.apache.hadoop.hbase.client.Consistency; 043import org.apache.hadoop.hbase.client.Delete; 044import org.apache.hadoop.hbase.client.Get; 045import org.apache.hadoop.hbase.client.Mutation; 046import org.apache.hadoop.hbase.client.Put; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.RegionReplicaUtil; 050import org.apache.hadoop.hbase.client.Result; 051import org.apache.hadoop.hbase.client.ResultScanner; 052import org.apache.hadoop.hbase.client.Scan; 053import org.apache.hadoop.hbase.client.Table; 054import org.apache.hadoop.hbase.client.TableState; 055import org.apache.hadoop.hbase.client.coprocessor.Batch; 056import org.apache.hadoop.hbase.exceptions.DeserializationException; 057import org.apache.hadoop.hbase.filter.Filter; 058import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; 059import org.apache.hadoop.hbase.filter.RowFilter; 060import org.apache.hadoop.hbase.filter.SubstringComparator; 061import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils; 062import org.apache.hadoop.hbase.ipc.ServerRpcController; 063import org.apache.hadoop.hbase.master.RegionState; 064import org.apache.hadoop.hbase.master.RegionState.State; 065import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 066import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; 067import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MultiRowMutationService; 068import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest; 069import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse; 070import org.apache.hadoop.hbase.util.Bytes; 071import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 072import org.apache.hadoop.hbase.util.ExceptionUtil; 073import org.apache.hadoop.hbase.util.Pair; 074import org.apache.hadoop.hbase.util.PairOfSameType; 075import org.apache.hbase.thirdparty.com.google.common.base.Throwables; 076import org.apache.yetus.audience.InterfaceAudience; 077import org.slf4j.Logger; 078import org.slf4j.LoggerFactory; 079 080/** 081 * <p> 082 * Read/write operations on <code>hbase:meta</code> region as well as assignment information stored 083 * to <code>hbase:meta</code>. 084 * </p> 085 * <p> 086 * Some of the methods of this class take ZooKeeperWatcher as a param. The only reason for this is 087 * when this class is used on client-side (e.g. HBaseAdmin), we want to use short-lived connection 088 * (opened before each operation, closed right after), while when used on HM or HRS (like in 089 * AssignmentManager) we want permanent connection. 090 * </p> 091 * <p> 092 * HBASE-10070 adds a replicaId to HRI, meaning more than one HRI can be defined for the same table 093 * range (table, startKey, endKey). For every range, there will be at least one HRI defined which is 094 * called default replica. 095 * </p> 096 * <p> 097 * <h2>Meta layout</h2> 098 * 099 * <pre> 100 * For each table there is single row named for the table with a 'table' column family. 101 * The column family currently has one column in it, the 'state' column: 102 * 103 * table:state => contains table state 104 * 105 * Then for each table range ('Region'), there is a single row, formatted as: 106 * <tableName>,<startKey>,<regionId>,<encodedRegionName>. 107 * This row is the serialized regionName of the default region replica. 108 * Columns are: 109 * info:regioninfo => contains serialized HRI for the default region replica 110 * info:server => contains hostname:port (in string form) for the server hosting 111 * the default regionInfo replica 112 * info:server_<replicaId> => contains hostname:port (in string form) for the server hosting 113 * the regionInfo replica with replicaId 114 * info:serverstartcode => contains server start code (in binary long form) for the server 115 * hosting the default regionInfo replica 116 * info:serverstartcode_<replicaId> => contains server start code (in binary long form) for 117 * the server hosting the regionInfo replica with 118 * replicaId 119 * info:seqnumDuringOpen => contains seqNum (in binary long form) for the region at the time 120 * the server opened the region with default replicaId 121 * info:seqnumDuringOpen_<replicaId> => contains seqNum (in binary long form) for the region 122 * at the time the server opened the region with 123 * replicaId 124 * info:splitA => contains a serialized HRI for the first daughter region if the 125 * region is split 126 * info:splitB => contains a serialized HRI for the second daughter region if the 127 * region is split 128 * info:merge* => contains a serialized HRI for a merge parent region. There will be two 129 * or more of these columns in a row. A row that has these columns is 130 * undergoing a merge and is the result of the merge. Columns listed 131 * in marge* columns are the parents of this merged region. Example 132 * columns: info:merge0001, info:merge0002. You make also see 'mergeA', 133 * and 'mergeB'. This is old form replaced by the new format that allows 134 * for more than two parents to be merged at a time. 135 * TODO: Add rep_barrier for serial replication explaination. See SerialReplicationChecker. 136 * </pre> 137 * </p> 138 * <p> 139 * The actual layout of meta should be encapsulated inside MetaTableAccessor methods, and should not 140 * leak out of it (through Result objects, etc) 141 * </p> 142 */ 143@InterfaceAudience.Private 144public class MetaTableAccessor { 145 146 private static final Logger LOG = LoggerFactory.getLogger(MetaTableAccessor.class); 147 private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META"); 148 149 public static final byte[] REPLICATION_PARENT_QUALIFIER = Bytes.toBytes("parent"); 150 151 private static final byte ESCAPE_BYTE = (byte) 0xFF; 152 153 private static final byte SEPARATED_BYTE = 0x00; 154 155 @InterfaceAudience.Private 156 public enum QueryType { 157 ALL(HConstants.TABLE_FAMILY, HConstants.CATALOG_FAMILY), 158 REGION(HConstants.CATALOG_FAMILY), 159 TABLE(HConstants.TABLE_FAMILY), 160 REPLICATION(HConstants.REPLICATION_BARRIER_FAMILY); 161 162 private final byte[][] families; 163 164 QueryType(byte[]... families) { 165 this.families = families; 166 } 167 168 byte[][] getFamilies() { 169 return this.families; 170 } 171 } 172 173 /** The delimiter for meta columns for replicaIds > 0 */ 174 static final char META_REPLICA_ID_DELIMITER = '_'; 175 176 /** A regex for parsing server columns from meta. See above javadoc for meta layout */ 177 private static final Pattern SERVER_COLUMN_PATTERN 178 = Pattern.compile("^server(_[0-9a-fA-F]{4})?$"); 179 180 //////////////////////// 181 // Reading operations // 182 //////////////////////// 183 184 /** 185 * Performs a full scan of <code>hbase:meta</code> for regions. 186 * @param connection connection we're using 187 * @param visitor Visitor invoked against each row in regions family. 188 */ 189 public static void fullScanRegions(Connection connection, final Visitor visitor) 190 throws IOException { 191 scanMeta(connection, null, null, QueryType.REGION, visitor); 192 } 193 194 /** 195 * Performs a full scan of <code>hbase:meta</code> for regions. 196 * @param connection connection we're using 197 */ 198 public static List<Result> fullScanRegions(Connection connection) throws IOException { 199 return fullScan(connection, QueryType.REGION); 200 } 201 202 /** 203 * Performs a full scan of <code>hbase:meta</code> for tables. 204 * @param connection connection we're using 205 * @param visitor Visitor invoked against each row in tables family. 206 */ 207 public static void fullScanTables(Connection connection, final Visitor visitor) 208 throws IOException { 209 scanMeta(connection, null, null, QueryType.TABLE, visitor); 210 } 211 212 /** 213 * Performs a full scan of <code>hbase:meta</code>. 214 * @param connection connection we're using 215 * @param type scanned part of meta 216 * @return List of {@link Result} 217 */ 218 private static List<Result> fullScan(Connection connection, QueryType type) throws IOException { 219 CollectAllVisitor v = new CollectAllVisitor(); 220 scanMeta(connection, null, null, type, v); 221 return v.getResults(); 222 } 223 224 /** 225 * Callers should call close on the returned {@link Table} instance. 226 * @param connection connection we're using to access Meta 227 * @return An {@link Table} for <code>hbase:meta</code> 228 */ 229 public static Table getMetaHTable(final Connection connection) throws IOException { 230 // We used to pass whole CatalogTracker in here, now we just pass in Connection 231 if (connection == null) { 232 throw new NullPointerException("No connection"); 233 } else if (connection.isClosed()) { 234 throw new IOException("connection is closed"); 235 } 236 return connection.getTable(TableName.META_TABLE_NAME); 237 } 238 239 /** 240 * @param t Table to use (will be closed when done). 241 * @param g Get to run 242 */ 243 private static Result get(final Table t, final Get g) throws IOException { 244 if (t == null) return null; 245 try { 246 return t.get(g); 247 } finally { 248 t.close(); 249 } 250 } 251 252 /** 253 * Gets the region info and assignment for the specified region. 254 * @param connection connection we're using 255 * @param regionName Region to lookup. 256 * @return Location and RegionInfo for <code>regionName</code> 257 * @deprecated use {@link #getRegionLocation(Connection, byte[])} instead 258 */ 259 @Deprecated 260 public static Pair<RegionInfo, ServerName> getRegion(Connection connection, byte [] regionName) 261 throws IOException { 262 HRegionLocation location = getRegionLocation(connection, regionName); 263 return location == null 264 ? null 265 : new Pair<>(location.getRegionInfo(), location.getServerName()); 266 } 267 268 /** 269 * Returns the HRegionLocation from meta for the given region 270 * @param connection connection we're using 271 * @param regionName region we're looking for 272 * @return HRegionLocation for the given region 273 */ 274 public static HRegionLocation getRegionLocation(Connection connection, byte[] regionName) 275 throws IOException { 276 byte[] row = regionName; 277 RegionInfo parsedInfo = null; 278 try { 279 parsedInfo = parseRegionInfoFromRegionName(regionName); 280 row = getMetaKeyForRegion(parsedInfo); 281 } catch (Exception parseEx) { 282 // Ignore. This is used with tableName passed as regionName. 283 } 284 Get get = new Get(row); 285 get.addFamily(HConstants.CATALOG_FAMILY); 286 Result r = get(getMetaHTable(connection), get); 287 RegionLocations locations = getRegionLocations(r); 288 return locations == null ? null 289 : locations.getRegionLocation(parsedInfo == null ? 0 : parsedInfo.getReplicaId()); 290 } 291 292 /** 293 * Returns the HRegionLocation from meta for the given region 294 * @param connection connection we're using 295 * @param regionInfo region information 296 * @return HRegionLocation for the given region 297 */ 298 public static HRegionLocation getRegionLocation(Connection connection, RegionInfo regionInfo) 299 throws IOException { 300 return getRegionLocation(getCatalogFamilyRow(connection, regionInfo), 301 regionInfo, regionInfo.getReplicaId()); 302 } 303 304 /** 305 * @return Return the {@link HConstants#CATALOG_FAMILY} row from hbase:meta. 306 */ 307 public static Result getCatalogFamilyRow(Connection connection, RegionInfo ri) 308 throws IOException { 309 Get get = new Get(getMetaKeyForRegion(ri)); 310 get.addFamily(HConstants.CATALOG_FAMILY); 311 return get(getMetaHTable(connection), get); 312 } 313 314 /** Returns the row key to use for this regionInfo */ 315 public static byte[] getMetaKeyForRegion(RegionInfo regionInfo) { 316 return RegionReplicaUtil.getRegionInfoForDefaultReplica(regionInfo).getRegionName(); 317 } 318 319 /** Returns an HRI parsed from this regionName. Not all the fields of the HRI 320 * is stored in the name, so the returned object should only be used for the fields 321 * in the regionName. 322 */ 323 // This should be moved to RegionInfo? TODO. 324 public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException { 325 byte[][] fields = RegionInfo.parseRegionName(regionName); 326 long regionId = Long.parseLong(Bytes.toString(fields[2])); 327 int replicaId = fields.length > 3 ? Integer.parseInt(Bytes.toString(fields[3]), 16) : 0; 328 return RegionInfoBuilder.newBuilder(TableName.valueOf(fields[0])) 329 .setStartKey(fields[1]).setRegionId(regionId).setReplicaId(replicaId).build(); 330 } 331 332 /** 333 * Gets the result in hbase:meta for the specified region. 334 * @param connection connection we're using 335 * @param regionName region we're looking for 336 * @return result of the specified region 337 */ 338 public static Result getRegionResult(Connection connection, 339 byte[] regionName) throws IOException { 340 Get get = new Get(regionName); 341 get.addFamily(HConstants.CATALOG_FAMILY); 342 return get(getMetaHTable(connection), get); 343 } 344 345 /** 346 * Scans META table for a row whose key contains the specified <B>regionEncodedName</B>, 347 * returning a single related <code>Result</code> instance if any row is found, null otherwise. 348 * 349 * @param connection the connection to query META table. 350 * @param regionEncodedName the region encoded name to look for at META. 351 * @return <code>Result</code> instance with the row related info in META, null otherwise. 352 * @throws IOException if any errors occur while querying META. 353 */ 354 public static Result scanByRegionEncodedName(Connection connection, 355 String regionEncodedName) throws IOException { 356 RowFilter rowFilter = new RowFilter(CompareOperator.EQUAL, 357 new SubstringComparator(regionEncodedName)); 358 Scan scan = getMetaScan(connection.getConfiguration(), 1); 359 scan.setFilter(rowFilter); 360 try (Table table = getMetaHTable(connection); 361 ResultScanner resultScanner = table.getScanner(scan)) { 362 return resultScanner.next(); 363 } 364 } 365 366 /** 367 * @return Return all regioninfos listed in the 'info:merge*' columns of 368 * the <code>regionName</code> row. 369 */ 370 @Nullable 371 public static List<RegionInfo> getMergeRegions(Connection connection, byte[] regionName) 372 throws IOException { 373 return getMergeRegions(getRegionResult(connection, regionName).rawCells()); 374 } 375 376 /** 377 * Check whether the given {@code regionName} has any 'info:merge*' columns. 378 */ 379 public static boolean hasMergeRegions(Connection conn, byte[] regionName) throws IOException { 380 return hasMergeRegions(getRegionResult(conn, regionName).rawCells()); 381 } 382 383 /** 384 * @return Deserialized values of <qualifier,regioninfo> pairs taken from column values that 385 * match the regex 'info:merge.*' in array of <code>cells</code>. 386 */ 387 @Nullable 388 public static Map<String, RegionInfo> getMergeRegionsWithName(Cell [] cells) { 389 if (cells == null) { 390 return null; 391 } 392 Map<String, RegionInfo> regionsToMerge = null; 393 for (Cell cell: cells) { 394 if (!isMergeQualifierPrefix(cell)) { 395 continue; 396 } 397 // Ok. This cell is that of a info:merge* column. 398 RegionInfo ri = RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(), 399 cell.getValueLength()); 400 if (ri != null) { 401 if (regionsToMerge == null) { 402 regionsToMerge = new LinkedHashMap<>(); 403 } 404 regionsToMerge.put(Bytes.toString(CellUtil.cloneQualifier(cell)), ri); 405 } 406 } 407 return regionsToMerge; 408 } 409 410 /** 411 * @return Deserialized regioninfo values taken from column values that match 412 * the regex 'info:merge.*' in array of <code>cells</code>. 413 */ 414 @Nullable 415 public static List<RegionInfo> getMergeRegions(Cell [] cells) { 416 Map<String, RegionInfo> mergeRegionsWithName = getMergeRegionsWithName(cells); 417 return (mergeRegionsWithName == null) ? null : new ArrayList<>(mergeRegionsWithName.values()); 418 } 419 420 /** 421 * @return True if any merge regions present in <code>cells</code>; i.e. 422 * the column in <code>cell</code> matches the regex 'info:merge.*'. 423 */ 424 public static boolean hasMergeRegions(Cell [] cells) { 425 for (Cell cell: cells) { 426 if (!isMergeQualifierPrefix(cell)) { 427 continue; 428 } 429 return true; 430 } 431 return false; 432 } 433 434 /** 435 * @return True if the column in <code>cell</code> matches the regex 'info:merge.*'. 436 */ 437 private static boolean isMergeQualifierPrefix(Cell cell) { 438 // Check to see if has family and that qualifier starts with the merge qualifier 'merge' 439 return CellUtil.matchingFamily(cell, HConstants.CATALOG_FAMILY) && 440 PrivateCellUtil.qualifierStartsWith(cell, HConstants.MERGE_QUALIFIER_PREFIX); 441 } 442 443 /** 444 * Lists all of the regions currently in META. 445 * 446 * @param connection to connect with 447 * @param excludeOfflinedSplitParents False if we are to include offlined/splitparents regions, 448 * true and we'll leave out offlined regions from returned list 449 * @return List of all user-space regions. 450 */ 451 public static List<RegionInfo> getAllRegions(Connection connection, 452 boolean excludeOfflinedSplitParents) 453 throws IOException { 454 List<Pair<RegionInfo, ServerName>> result; 455 456 result = getTableRegionsAndLocations(connection, null, 457 excludeOfflinedSplitParents); 458 459 return getListOfRegionInfos(result); 460 461 } 462 463 /** 464 * Gets all of the regions of the specified table. Do not use this method 465 * to get meta table regions, use methods in MetaTableLocator instead. 466 * @param connection connection we're using 467 * @param tableName table we're looking for 468 * @return Ordered list of {@link RegionInfo}. 469 */ 470 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName) 471 throws IOException { 472 return getTableRegions(connection, tableName, false); 473 } 474 475 /** 476 * Gets all of the regions of the specified table. Do not use this method 477 * to get meta table regions, use methods in MetaTableLocator instead. 478 * @param connection connection we're using 479 * @param tableName table we're looking for 480 * @param excludeOfflinedSplitParents If true, do not include offlined split 481 * parents in the return. 482 * @return Ordered list of {@link RegionInfo}. 483 */ 484 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName, 485 final boolean excludeOfflinedSplitParents) throws IOException { 486 List<Pair<RegionInfo, ServerName>> result = 487 getTableRegionsAndLocations(connection, tableName, excludeOfflinedSplitParents); 488 return getListOfRegionInfos(result); 489 } 490 491 private static List<RegionInfo> getListOfRegionInfos( 492 final List<Pair<RegionInfo, ServerName>> pairs) { 493 if (pairs == null || pairs.isEmpty()) { 494 return Collections.emptyList(); 495 } 496 List<RegionInfo> result = new ArrayList<>(pairs.size()); 497 for (Pair<RegionInfo, ServerName> pair : pairs) { 498 result.add(pair.getFirst()); 499 } 500 return result; 501 } 502 503 /** 504 * @param tableName table we're working with 505 * @return start row for scanning META according to query type 506 */ 507 public static byte[] getTableStartRowForMeta(TableName tableName, QueryType type) { 508 if (tableName == null) { 509 return null; 510 } 511 switch (type) { 512 case REGION: 513 byte[] startRow = new byte[tableName.getName().length + 2]; 514 System.arraycopy(tableName.getName(), 0, startRow, 0, tableName.getName().length); 515 startRow[startRow.length - 2] = HConstants.DELIMITER; 516 startRow[startRow.length - 1] = HConstants.DELIMITER; 517 return startRow; 518 case ALL: 519 case TABLE: 520 default: 521 return tableName.getName(); 522 } 523 } 524 525 /** 526 * @param tableName table we're working with 527 * @return stop row for scanning META according to query type 528 */ 529 public static byte[] getTableStopRowForMeta(TableName tableName, QueryType type) { 530 if (tableName == null) { 531 return null; 532 } 533 final byte[] stopRow; 534 switch (type) { 535 case REGION: 536 stopRow = new byte[tableName.getName().length + 3]; 537 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 538 stopRow[stopRow.length - 3] = ' '; 539 stopRow[stopRow.length - 2] = HConstants.DELIMITER; 540 stopRow[stopRow.length - 1] = HConstants.DELIMITER; 541 break; 542 case ALL: 543 case TABLE: 544 default: 545 stopRow = new byte[tableName.getName().length + 1]; 546 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 547 stopRow[stopRow.length - 1] = ' '; 548 break; 549 } 550 return stopRow; 551 } 552 553 /** 554 * This method creates a Scan object that will only scan catalog rows that 555 * belong to the specified table. It doesn't specify any columns. 556 * This is a better alternative to just using a start row and scan until 557 * it hits a new table since that requires parsing the HRI to get the table 558 * name. 559 * @param tableName bytes of table's name 560 * @return configured Scan object 561 */ 562 public static Scan getScanForTableName(Configuration conf, TableName tableName) { 563 // Start key is just the table name with delimiters 564 byte[] startKey = getTableStartRowForMeta(tableName, QueryType.REGION); 565 // Stop key appends the smallest possible char to the table name 566 byte[] stopKey = getTableStopRowForMeta(tableName, QueryType.REGION); 567 568 Scan scan = getMetaScan(conf, -1); 569 scan.setStartRow(startKey); 570 scan.setStopRow(stopKey); 571 return scan; 572 } 573 574 private static Scan getMetaScan(Configuration conf, int rowUpperLimit) { 575 Scan scan = new Scan(); 576 int scannerCaching = conf.getInt(HConstants.HBASE_META_SCANNER_CACHING, 577 HConstants.DEFAULT_HBASE_META_SCANNER_CACHING); 578 if (conf.getBoolean(HConstants.USE_META_REPLICAS, HConstants.DEFAULT_USE_META_REPLICAS)) { 579 scan.setConsistency(Consistency.TIMELINE); 580 } 581 if (rowUpperLimit > 0) { 582 scan.setLimit(rowUpperLimit); 583 scan.setReadType(Scan.ReadType.PREAD); 584 } 585 scan.setCaching(scannerCaching); 586 return scan; 587 } 588 589 /** 590 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 591 * @param connection connection we're using 592 * @param tableName table we're looking for 593 * @return Return list of regioninfos and server. 594 */ 595 public static List<Pair<RegionInfo, ServerName>> 596 getTableRegionsAndLocations(Connection connection, TableName tableName) 597 throws IOException { 598 return getTableRegionsAndLocations(connection, tableName, true); 599 } 600 601 /** 602 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 603 * @param connection connection we're using 604 * @param tableName table to work with, can be null for getting all regions 605 * @param excludeOfflinedSplitParents don't return split parents 606 * @return Return list of regioninfos and server addresses. 607 */ 608 // What happens here when 1M regions in hbase:meta? This won't scale? 609 public static List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations( 610 Connection connection, @Nullable final TableName tableName, 611 final boolean excludeOfflinedSplitParents) throws IOException { 612 if (tableName != null && tableName.equals(TableName.META_TABLE_NAME)) { 613 throw new IOException("This method can't be used to locate meta regions;" 614 + " use MetaTableLocator instead"); 615 } 616 // Make a version of CollectingVisitor that collects RegionInfo and ServerAddress 617 CollectingVisitor<Pair<RegionInfo, ServerName>> visitor = 618 new CollectingVisitor<Pair<RegionInfo, ServerName>>() { 619 private RegionLocations current = null; 620 621 @Override 622 public boolean visit(Result r) throws IOException { 623 current = getRegionLocations(r); 624 if (current == null || current.getRegionLocation().getRegion() == null) { 625 LOG.warn("No serialized RegionInfo in " + r); 626 return true; 627 } 628 RegionInfo hri = current.getRegionLocation().getRegion(); 629 if (excludeOfflinedSplitParents && hri.isSplitParent()) return true; 630 // Else call super and add this Result to the collection. 631 return super.visit(r); 632 } 633 634 @Override 635 void add(Result r) { 636 if (current == null) { 637 return; 638 } 639 for (HRegionLocation loc : current.getRegionLocations()) { 640 if (loc != null) { 641 this.results.add(new Pair<>(loc.getRegion(), loc.getServerName())); 642 } 643 } 644 } 645 }; 646 scanMeta(connection, 647 getTableStartRowForMeta(tableName, QueryType.REGION), 648 getTableStopRowForMeta(tableName, QueryType.REGION), 649 QueryType.REGION, visitor); 650 return visitor.getResults(); 651 } 652 653 /** 654 * @param connection connection we're using 655 * @param serverName server whose regions we're interested in 656 * @return List of user regions installed on this server (does not include 657 * catalog regions). 658 * @throws IOException 659 */ 660 public static NavigableMap<RegionInfo, Result> 661 getServerUserRegions(Connection connection, final ServerName serverName) 662 throws IOException { 663 final NavigableMap<RegionInfo, Result> hris = new TreeMap<>(); 664 // Fill the above hris map with entries from hbase:meta that have the passed 665 // servername. 666 CollectingVisitor<Result> v = new CollectingVisitor<Result>() { 667 @Override 668 void add(Result r) { 669 if (r == null || r.isEmpty()) return; 670 RegionLocations locations = getRegionLocations(r); 671 if (locations == null) return; 672 for (HRegionLocation loc : locations.getRegionLocations()) { 673 if (loc != null) { 674 if (loc.getServerName() != null && loc.getServerName().equals(serverName)) { 675 hris.put(loc.getRegion(), r); 676 } 677 } 678 } 679 } 680 }; 681 scanMeta(connection, null, null, QueryType.REGION, v); 682 return hris; 683 } 684 685 public static void fullScanMetaAndPrint(Connection connection) 686 throws IOException { 687 Visitor v = r -> { 688 if (r == null || r.isEmpty()) { 689 return true; 690 } 691 LOG.info("fullScanMetaAndPrint.Current Meta Row: " + r); 692 TableState state = getTableState(r); 693 if (state != null) { 694 LOG.info("fullScanMetaAndPrint.Table State={}" + state); 695 } else { 696 RegionLocations locations = getRegionLocations(r); 697 if (locations == null) { 698 return true; 699 } 700 for (HRegionLocation loc : locations.getRegionLocations()) { 701 if (loc != null) { 702 LOG.info("fullScanMetaAndPrint.HRI Print={}", loc.getRegion()); 703 } 704 } 705 } 706 return true; 707 }; 708 scanMeta(connection, null, null, QueryType.ALL, v); 709 } 710 711 public static void scanMetaForTableRegions(Connection connection, Visitor visitor, 712 TableName tableName) throws IOException { 713 scanMeta(connection, tableName, QueryType.REGION, Integer.MAX_VALUE, visitor); 714 } 715 716 private static void scanMeta(Connection connection, TableName table, QueryType type, int maxRows, 717 final Visitor visitor) throws IOException { 718 scanMeta(connection, getTableStartRowForMeta(table, type), getTableStopRowForMeta(table, type), 719 type, maxRows, visitor); 720 } 721 722 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 723 @Nullable final byte[] stopRow, QueryType type, final Visitor visitor) throws IOException { 724 scanMeta(connection, startRow, stopRow, type, Integer.MAX_VALUE, visitor); 725 } 726 727 /** 728 * Performs a scan of META table for given table starting from given row. 729 * @param connection connection we're using 730 * @param visitor visitor to call 731 * @param tableName table withing we scan 732 * @param row start scan from this row 733 * @param rowLimit max number of rows to return 734 */ 735 public static void scanMeta(Connection connection, final Visitor visitor, 736 final TableName tableName, final byte[] row, final int rowLimit) throws IOException { 737 byte[] startRow = null; 738 byte[] stopRow = null; 739 if (tableName != null) { 740 startRow = getTableStartRowForMeta(tableName, QueryType.REGION); 741 if (row != null) { 742 RegionInfo closestRi = getClosestRegionInfo(connection, tableName, row); 743 startRow = 744 RegionInfo.createRegionName(tableName, closestRi.getStartKey(), HConstants.ZEROES, false); 745 } 746 stopRow = getTableStopRowForMeta(tableName, QueryType.REGION); 747 } 748 scanMeta(connection, startRow, stopRow, QueryType.REGION, rowLimit, visitor); 749 } 750 751 /** 752 * Performs a scan of META table. 753 * @param connection connection we're using 754 * @param startRow Where to start the scan. Pass null if want to begin scan 755 * at first row. 756 * @param stopRow Where to stop the scan. Pass null if want to scan all rows 757 * from the start one 758 * @param type scanned part of meta 759 * @param maxRows maximum rows to return 760 * @param visitor Visitor invoked against each row. 761 */ 762 static void scanMeta(Connection connection, @Nullable final byte[] startRow, 763 @Nullable final byte[] stopRow, QueryType type, int maxRows, final Visitor visitor) 764 throws IOException { 765 scanMeta(connection, startRow, stopRow, type, null, maxRows, visitor); 766 } 767 768 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 769 @Nullable final byte[] stopRow, QueryType type, @Nullable Filter filter, int maxRows, 770 final Visitor visitor) throws IOException { 771 int rowUpperLimit = maxRows > 0 ? maxRows : Integer.MAX_VALUE; 772 Scan scan = getMetaScan(connection.getConfiguration(), rowUpperLimit); 773 774 for (byte[] family : type.getFamilies()) { 775 scan.addFamily(family); 776 } 777 if (startRow != null) { 778 scan.withStartRow(startRow); 779 } 780 if (stopRow != null) { 781 scan.withStopRow(stopRow); 782 } 783 if (filter != null) { 784 scan.setFilter(filter); 785 } 786 787 if (LOG.isTraceEnabled()) { 788 LOG.trace("Scanning META" + " starting at row=" + Bytes.toStringBinary(startRow) + 789 " stopping at row=" + Bytes.toStringBinary(stopRow) + " for max=" + rowUpperLimit + 790 " with caching=" + scan.getCaching()); 791 } 792 793 int currentRow = 0; 794 try (Table metaTable = getMetaHTable(connection)) { 795 try (ResultScanner scanner = metaTable.getScanner(scan)) { 796 Result data; 797 while ((data = scanner.next()) != null) { 798 if (data.isEmpty()) continue; 799 // Break if visit returns false. 800 if (!visitor.visit(data)) break; 801 if (++currentRow >= rowUpperLimit) break; 802 } 803 } 804 } 805 if (visitor instanceof Closeable) { 806 try { 807 ((Closeable) visitor).close(); 808 } catch (Throwable t) { 809 ExceptionUtil.rethrowIfInterrupt(t); 810 LOG.debug("Got exception in closing the meta scanner visitor", t); 811 } 812 } 813 } 814 815 /** 816 * @return Get closest metatable region row to passed <code>row</code> 817 */ 818 @NonNull 819 private static RegionInfo getClosestRegionInfo(Connection connection, 820 @NonNull final TableName tableName, @NonNull final byte[] row) throws IOException { 821 byte[] searchRow = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 822 Scan scan = getMetaScan(connection.getConfiguration(), 1); 823 scan.setReversed(true); 824 scan.withStartRow(searchRow); 825 try (ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan)) { 826 Result result = resultScanner.next(); 827 if (result == null) { 828 throw new TableNotFoundException("Cannot find row in META " + 829 " for table: " + tableName + ", row=" + Bytes.toStringBinary(row)); 830 } 831 RegionInfo regionInfo = getRegionInfo(result); 832 if (regionInfo == null) { 833 throw new IOException("RegionInfo was null or empty in Meta for " + 834 tableName + ", row=" + Bytes.toStringBinary(row)); 835 } 836 return regionInfo; 837 } 838 } 839 840 /** 841 * Returns the column family used for meta columns. 842 * @return HConstants.CATALOG_FAMILY. 843 */ 844 public static byte[] getCatalogFamily() { 845 return HConstants.CATALOG_FAMILY; 846 } 847 848 /** 849 * Returns the column family used for table columns. 850 * @return HConstants.TABLE_FAMILY. 851 */ 852 private static byte[] getTableFamily() { 853 return HConstants.TABLE_FAMILY; 854 } 855 856 /** 857 * Returns the column qualifier for serialized region info 858 * @return HConstants.REGIONINFO_QUALIFIER 859 */ 860 public static byte[] getRegionInfoColumn() { 861 return HConstants.REGIONINFO_QUALIFIER; 862 } 863 864 /** 865 * Returns the column qualifier for serialized table state 866 * @return HConstants.TABLE_STATE_QUALIFIER 867 */ 868 private static byte[] getTableStateColumn() { 869 return HConstants.TABLE_STATE_QUALIFIER; 870 } 871 872 /** 873 * Returns the column qualifier for serialized region state 874 * @return HConstants.STATE_QUALIFIER 875 */ 876 private static byte[] getRegionStateColumn() { 877 return HConstants.STATE_QUALIFIER; 878 } 879 880 /** 881 * Returns the column qualifier for serialized region state 882 * @param replicaId the replicaId of the region 883 * @return a byte[] for state qualifier 884 */ 885 public static byte[] getRegionStateColumn(int replicaId) { 886 return replicaId == 0 ? HConstants.STATE_QUALIFIER 887 : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 888 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 889 } 890 891 /** 892 * Returns the column qualifier for serialized region state 893 * @param replicaId the replicaId of the region 894 * @return a byte[] for sn column qualifier 895 */ 896 public static byte[] getServerNameColumn(int replicaId) { 897 return replicaId == 0 ? HConstants.SERVERNAME_QUALIFIER 898 : Bytes.toBytes(HConstants.SERVERNAME_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 899 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 900 } 901 902 /** 903 * Returns the column qualifier for server column for replicaId 904 * @param replicaId the replicaId of the region 905 * @return a byte[] for server column qualifier 906 */ 907 public static byte[] getServerColumn(int replicaId) { 908 return replicaId == 0 909 ? HConstants.SERVER_QUALIFIER 910 : Bytes.toBytes(HConstants.SERVER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 911 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 912 } 913 914 /** 915 * Returns the column qualifier for server start code column for replicaId 916 * @param replicaId the replicaId of the region 917 * @return a byte[] for server start code column qualifier 918 */ 919 public static byte[] getStartCodeColumn(int replicaId) { 920 return replicaId == 0 921 ? HConstants.STARTCODE_QUALIFIER 922 : Bytes.toBytes(HConstants.STARTCODE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 923 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 924 } 925 926 /** 927 * Returns the column qualifier for seqNum column for replicaId 928 * @param replicaId the replicaId of the region 929 * @return a byte[] for seqNum column qualifier 930 */ 931 public static byte[] getSeqNumColumn(int replicaId) { 932 return replicaId == 0 933 ? HConstants.SEQNUM_QUALIFIER 934 : Bytes.toBytes(HConstants.SEQNUM_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 935 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 936 } 937 938 /** 939 * Parses the replicaId from the server column qualifier. See top of the class javadoc 940 * for the actual meta layout 941 * @param serverColumn the column qualifier 942 * @return an int for the replicaId 943 */ 944 static int parseReplicaIdFromServerColumn(byte[] serverColumn) { 945 String serverStr = Bytes.toString(serverColumn); 946 947 Matcher matcher = SERVER_COLUMN_PATTERN.matcher(serverStr); 948 if (matcher.matches() && matcher.groupCount() > 0) { 949 String group = matcher.group(1); 950 if (group != null && group.length() > 0) { 951 return Integer.parseInt(group.substring(1), 16); 952 } else { 953 return 0; 954 } 955 } 956 return -1; 957 } 958 959 /** 960 * Returns a {@link ServerName} from catalog table {@link Result}. 961 * @param r Result to pull from 962 * @return A ServerName instance or null if necessary fields not found or empty. 963 */ 964 @Nullable 965 @InterfaceAudience.Private // for use by HMaster#getTableRegionRow which is used for testing only 966 public static ServerName getServerName(final Result r, final int replicaId) { 967 byte[] serverColumn = getServerColumn(replicaId); 968 Cell cell = r.getColumnLatestCell(getCatalogFamily(), serverColumn); 969 if (cell == null || cell.getValueLength() == 0) return null; 970 String hostAndPort = Bytes.toString( 971 cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 972 byte[] startcodeColumn = getStartCodeColumn(replicaId); 973 cell = r.getColumnLatestCell(getCatalogFamily(), startcodeColumn); 974 if (cell == null || cell.getValueLength() == 0) return null; 975 try { 976 return ServerName.valueOf(hostAndPort, 977 Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())); 978 } catch (IllegalArgumentException e) { 979 LOG.error("Ignoring invalid region for server " + hostAndPort + "; cell=" + cell, e); 980 return null; 981 } 982 } 983 984 /** 985 * Returns the {@link ServerName} from catalog table {@link Result} where the region is 986 * transitioning on. It should be the same as {@link MetaTableAccessor#getServerName(Result,int)} 987 * if the server is at OPEN state. 988 * 989 * @param r Result to pull the transitioning server name from 990 * @return A ServerName instance or {@link MetaTableAccessor#getServerName(Result,int)} 991 * if necessary fields not found or empty. 992 */ 993 @Nullable 994 public static ServerName getTargetServerName(final Result r, final int replicaId) { 995 final Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, 996 getServerNameColumn(replicaId)); 997 if (cell == null || cell.getValueLength() == 0) { 998 RegionLocations locations = MetaTableAccessor.getRegionLocations(r); 999 if (locations != null) { 1000 HRegionLocation location = locations.getRegionLocation(replicaId); 1001 if (location != null) { 1002 return location.getServerName(); 1003 } 1004 } 1005 return null; 1006 } 1007 return ServerName.parseServerName(Bytes.toString(cell.getValueArray(), cell.getValueOffset(), 1008 cell.getValueLength())); 1009 } 1010 1011 /** 1012 * The latest seqnum that the server writing to meta observed when opening the region. 1013 * E.g. the seqNum when the result of {@link #getServerName(Result, int)} was written. 1014 * @param r Result to pull the seqNum from 1015 * @return SeqNum, or HConstants.NO_SEQNUM if there's no value written. 1016 */ 1017 private static long getSeqNumDuringOpen(final Result r, final int replicaId) { 1018 Cell cell = r.getColumnLatestCell(getCatalogFamily(), getSeqNumColumn(replicaId)); 1019 if (cell == null || cell.getValueLength() == 0) return HConstants.NO_SEQNUM; 1020 return Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 1021 } 1022 1023 /** 1024 * Returns the daughter regions by reading the corresponding columns of the catalog table 1025 * Result. 1026 * @param data a Result object from the catalog table scan 1027 * @return pair of RegionInfo or PairOfSameType(null, null) if region is not a split parent 1028 */ 1029 public static PairOfSameType<RegionInfo> getDaughterRegions(Result data) { 1030 RegionInfo splitA = getRegionInfo(data, HConstants.SPLITA_QUALIFIER); 1031 RegionInfo splitB = getRegionInfo(data, HConstants.SPLITB_QUALIFIER); 1032 return new PairOfSameType<>(splitA, splitB); 1033 } 1034 1035 /** 1036 * Returns an HRegionLocationList extracted from the result. 1037 * @return an HRegionLocationList containing all locations for the region range or null if 1038 * we can't deserialize the result. 1039 */ 1040 @Nullable 1041 public static RegionLocations getRegionLocations(final Result r) { 1042 if (r == null) return null; 1043 RegionInfo regionInfo = getRegionInfo(r, getRegionInfoColumn()); 1044 if (regionInfo == null) return null; 1045 1046 List<HRegionLocation> locations = new ArrayList<>(1); 1047 NavigableMap<byte[],NavigableMap<byte[],byte[]>> familyMap = r.getNoVersionMap(); 1048 1049 locations.add(getRegionLocation(r, regionInfo, 0)); 1050 1051 NavigableMap<byte[], byte[]> infoMap = familyMap.get(getCatalogFamily()); 1052 if (infoMap == null) return new RegionLocations(locations); 1053 1054 // iterate until all serverName columns are seen 1055 int replicaId = 0; 1056 byte[] serverColumn = getServerColumn(replicaId); 1057 SortedMap<byte[], byte[]> serverMap; 1058 serverMap = infoMap.tailMap(serverColumn, false); 1059 1060 if (serverMap.isEmpty()) return new RegionLocations(locations); 1061 1062 for (Map.Entry<byte[], byte[]> entry : serverMap.entrySet()) { 1063 replicaId = parseReplicaIdFromServerColumn(entry.getKey()); 1064 if (replicaId < 0) { 1065 break; 1066 } 1067 HRegionLocation location = getRegionLocation(r, regionInfo, replicaId); 1068 // In case the region replica is newly created, it's location might be null. We usually do not 1069 // have HRL's in RegionLocations object with null ServerName. They are handled as null HRLs. 1070 if (location.getServerName() == null) { 1071 locations.add(null); 1072 } else { 1073 locations.add(location); 1074 } 1075 } 1076 1077 return new RegionLocations(locations); 1078 } 1079 1080 /** 1081 * Returns the HRegionLocation parsed from the given meta row Result 1082 * for the given regionInfo and replicaId. The regionInfo can be the default region info 1083 * for the replica. 1084 * @param r the meta row result 1085 * @param regionInfo RegionInfo for default replica 1086 * @param replicaId the replicaId for the HRegionLocation 1087 * @return HRegionLocation parsed from the given meta row Result for the given replicaId 1088 */ 1089 private static HRegionLocation getRegionLocation(final Result r, final RegionInfo regionInfo, 1090 final int replicaId) { 1091 ServerName serverName = getServerName(r, replicaId); 1092 long seqNum = getSeqNumDuringOpen(r, replicaId); 1093 RegionInfo replicaInfo = RegionReplicaUtil.getRegionInfoForReplica(regionInfo, replicaId); 1094 return new HRegionLocation(replicaInfo, serverName, seqNum); 1095 } 1096 1097 /** 1098 * Returns RegionInfo object from the column 1099 * HConstants.CATALOG_FAMILY:HConstants.REGIONINFO_QUALIFIER of the catalog 1100 * table Result. 1101 * @param data a Result object from the catalog table scan 1102 * @return RegionInfo or null 1103 */ 1104 public static RegionInfo getRegionInfo(Result data) { 1105 return getRegionInfo(data, HConstants.REGIONINFO_QUALIFIER); 1106 } 1107 1108 /** 1109 * Returns the RegionInfo object from the column {@link HConstants#CATALOG_FAMILY} and 1110 * <code>qualifier</code> of the catalog table result. 1111 * @param r a Result object from the catalog table scan 1112 * @param qualifier Column family qualifier 1113 * @return An RegionInfo instance or null. 1114 */ 1115 @Nullable 1116 public static RegionInfo getRegionInfo(final Result r, byte [] qualifier) { 1117 Cell cell = r.getColumnLatestCell(getCatalogFamily(), qualifier); 1118 if (cell == null) return null; 1119 return RegionInfo.parseFromOrNull(cell.getValueArray(), 1120 cell.getValueOffset(), cell.getValueLength()); 1121 } 1122 1123 /** 1124 * Fetch table state for given table from META table 1125 * @param conn connection to use 1126 * @param tableName table to fetch state for 1127 */ 1128 @Nullable 1129 public static TableState getTableState(Connection conn, TableName tableName) 1130 throws IOException { 1131 if (tableName.equals(TableName.META_TABLE_NAME)) { 1132 return new TableState(tableName, TableState.State.ENABLED); 1133 } 1134 Table metaHTable = getMetaHTable(conn); 1135 Get get = new Get(tableName.getName()).addColumn(getTableFamily(), getTableStateColumn()); 1136 Result result = metaHTable.get(get); 1137 return getTableState(result); 1138 } 1139 1140 /** 1141 * Fetch table states from META table 1142 * @param conn connection to use 1143 * @return map {tableName -> state} 1144 */ 1145 public static Map<TableName, TableState> getTableStates(Connection conn) 1146 throws IOException { 1147 final Map<TableName, TableState> states = new LinkedHashMap<>(); 1148 Visitor collector = r -> { 1149 TableState state = getTableState(r); 1150 if (state != null) { 1151 states.put(state.getTableName(), state); 1152 } 1153 return true; 1154 }; 1155 fullScanTables(conn, collector); 1156 return states; 1157 } 1158 1159 /** 1160 * Updates state in META 1161 * Do not use. For internal use only. 1162 * @param conn connection to use 1163 * @param tableName table to look for 1164 */ 1165 public static void updateTableState(Connection conn, TableName tableName, 1166 TableState.State actual) throws IOException { 1167 updateTableState(conn, new TableState(tableName, actual)); 1168 } 1169 1170 /** 1171 * Decode table state from META Result. 1172 * Should contain cell from HConstants.TABLE_FAMILY 1173 * @return null if not found 1174 */ 1175 @Nullable 1176 public static TableState getTableState(Result r) throws IOException { 1177 Cell cell = r.getColumnLatestCell(getTableFamily(), getTableStateColumn()); 1178 if (cell == null) { 1179 return null; 1180 } 1181 try { 1182 return TableState.parseFrom(TableName.valueOf(r.getRow()), 1183 Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), 1184 cell.getValueOffset() + cell.getValueLength())); 1185 } catch (DeserializationException e) { 1186 throw new IOException(e); 1187 } 1188 } 1189 1190 /** 1191 * Implementations 'visit' a catalog table row. 1192 */ 1193 public interface Visitor { 1194 /** 1195 * Visit the catalog table row. 1196 * @param r A row from catalog table 1197 * @return True if we are to proceed scanning the table, else false if 1198 * we are to stop now. 1199 */ 1200 boolean visit(final Result r) throws IOException; 1201 } 1202 1203 /** 1204 * Implementations 'visit' a catalog table row but with close() at the end. 1205 */ 1206 public interface CloseableVisitor extends Visitor, Closeable { 1207 } 1208 1209 /** 1210 * A {@link Visitor} that collects content out of passed {@link Result}. 1211 */ 1212 static abstract class CollectingVisitor<T> implements Visitor { 1213 final List<T> results = new ArrayList<>(); 1214 @Override 1215 public boolean visit(Result r) throws IOException { 1216 if (r != null && !r.isEmpty()) { 1217 add(r); 1218 } 1219 return true; 1220 } 1221 1222 abstract void add(Result r); 1223 1224 /** 1225 * @return Collected results; wait till visits complete to collect all 1226 * possible results 1227 */ 1228 List<T> getResults() { 1229 return this.results; 1230 } 1231 } 1232 1233 /** 1234 * Collects all returned. 1235 */ 1236 static class CollectAllVisitor extends CollectingVisitor<Result> { 1237 @Override 1238 void add(Result r) { 1239 this.results.add(r); 1240 } 1241 } 1242 1243 /** 1244 * A Visitor that skips offline regions and split parents 1245 */ 1246 public static abstract class DefaultVisitorBase implements Visitor { 1247 1248 DefaultVisitorBase() { 1249 super(); 1250 } 1251 1252 public abstract boolean visitInternal(Result rowResult) throws IOException; 1253 1254 @Override 1255 public boolean visit(Result rowResult) throws IOException { 1256 RegionInfo info = getRegionInfo(rowResult); 1257 if (info == null) { 1258 return true; 1259 } 1260 1261 //skip over offline and split regions 1262 if (!(info.isOffline() || info.isSplit())) { 1263 return visitInternal(rowResult); 1264 } 1265 return true; 1266 } 1267 } 1268 1269 /** 1270 * A Visitor for a table. Provides a consistent view of the table's 1271 * hbase:meta entries during concurrent splits (see HBASE-5986 for details). This class 1272 * does not guarantee ordered traversal of meta entries, and can block until the 1273 * hbase:meta entries for daughters are available during splits. 1274 */ 1275 public static abstract class TableVisitorBase extends DefaultVisitorBase { 1276 private TableName tableName; 1277 1278 public TableVisitorBase(TableName tableName) { 1279 super(); 1280 this.tableName = tableName; 1281 } 1282 1283 @Override 1284 public final boolean visit(Result rowResult) throws IOException { 1285 RegionInfo info = getRegionInfo(rowResult); 1286 if (info == null) { 1287 return true; 1288 } 1289 if (!(info.getTable().equals(tableName))) { 1290 return false; 1291 } 1292 return super.visit(rowResult); 1293 } 1294 } 1295 1296 //////////////////////// 1297 // Editing operations // 1298 //////////////////////// 1299 /** 1300 * Generates and returns a Put containing the region into for the catalog table 1301 */ 1302 public static Put makePutFromRegionInfo(RegionInfo regionInfo, long ts) throws IOException { 1303 return addRegionInfo(new Put(regionInfo.getRegionName(), ts), regionInfo); 1304 } 1305 1306 /** 1307 * Generates and returns a Delete containing the region info for the catalog table 1308 */ 1309 public static Delete makeDeleteFromRegionInfo(RegionInfo regionInfo, long ts) { 1310 if (regionInfo == null) { 1311 throw new IllegalArgumentException("Can't make a delete for null region"); 1312 } 1313 Delete delete = new Delete(regionInfo.getRegionName()); 1314 delete.addFamily(getCatalogFamily(), ts); 1315 return delete; 1316 } 1317 1318 /** 1319 * Adds split daughters to the Put 1320 */ 1321 private static Put addDaughtersToPut(Put put, RegionInfo splitA, RegionInfo splitB) 1322 throws IOException { 1323 if (splitA != null) { 1324 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1325 .setRow(put.getRow()) 1326 .setFamily(HConstants.CATALOG_FAMILY) 1327 .setQualifier(HConstants.SPLITA_QUALIFIER) 1328 .setTimestamp(put.getTimestamp()) 1329 .setType(Type.Put) 1330 .setValue(RegionInfo.toByteArray(splitA)) 1331 .build()); 1332 } 1333 if (splitB != null) { 1334 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1335 .setRow(put.getRow()) 1336 .setFamily(HConstants.CATALOG_FAMILY) 1337 .setQualifier(HConstants.SPLITB_QUALIFIER) 1338 .setTimestamp(put.getTimestamp()) 1339 .setType(Type.Put) 1340 .setValue(RegionInfo.toByteArray(splitB)) 1341 .build()); 1342 } 1343 return put; 1344 } 1345 1346 /** 1347 * Put the passed <code>p</code> to the <code>hbase:meta</code> table. 1348 * @param connection connection we're using 1349 * @param p Put to add to hbase:meta 1350 */ 1351 private static void putToMetaTable(Connection connection, Put p) throws IOException { 1352 try (Table table = getMetaHTable(connection)) { 1353 put(table, p); 1354 } 1355 } 1356 1357 /** 1358 * @param t Table to use 1359 * @param p put to make 1360 */ 1361 private static void put(Table t, Put p) throws IOException { 1362 debugLogMutation(p); 1363 t.put(p); 1364 } 1365 1366 /** 1367 * Put the passed <code>ps</code> to the <code>hbase:meta</code> table. 1368 * @param connection connection we're using 1369 * @param ps Put to add to hbase:meta 1370 */ 1371 public static void putsToMetaTable(final Connection connection, final List<Put> ps) 1372 throws IOException { 1373 if (ps.isEmpty()) { 1374 return; 1375 } 1376 try (Table t = getMetaHTable(connection)) { 1377 debugLogMutations(ps); 1378 // the implementation for putting a single Put is much simpler so here we do a check first. 1379 if (ps.size() == 1) { 1380 t.put(ps.get(0)); 1381 } else { 1382 t.put(ps); 1383 } 1384 } 1385 } 1386 1387 /** 1388 * Delete the passed <code>d</code> from the <code>hbase:meta</code> table. 1389 * @param connection connection we're using 1390 * @param d Delete to add to hbase:meta 1391 */ 1392 private static void deleteFromMetaTable(final Connection connection, final Delete d) 1393 throws IOException { 1394 List<Delete> dels = new ArrayList<>(1); 1395 dels.add(d); 1396 deleteFromMetaTable(connection, dels); 1397 } 1398 1399 /** 1400 * Delete the passed <code>deletes</code> from the <code>hbase:meta</code> table. 1401 * @param connection connection we're using 1402 * @param deletes Deletes to add to hbase:meta This list should support #remove. 1403 */ 1404 private static void deleteFromMetaTable(final Connection connection, final List<Delete> deletes) 1405 throws IOException { 1406 try (Table t = getMetaHTable(connection)) { 1407 debugLogMutations(deletes); 1408 t.delete(deletes); 1409 } 1410 } 1411 1412 private static Put addRegionStateToPut(Put put, RegionState.State state) throws IOException { 1413 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1414 .setRow(put.getRow()) 1415 .setFamily(HConstants.CATALOG_FAMILY) 1416 .setQualifier(getRegionStateColumn()) 1417 .setTimestamp(put.getTimestamp()) 1418 .setType(Cell.Type.Put) 1419 .setValue(Bytes.toBytes(state.name())) 1420 .build()); 1421 return put; 1422 } 1423 1424 /** 1425 * Update state column in hbase:meta. 1426 */ 1427 public static void updateRegionState(Connection connection, RegionInfo ri, 1428 RegionState.State state) throws IOException { 1429 Put put = new Put(RegionReplicaUtil.getRegionInfoForDefaultReplica(ri).getRegionName()); 1430 MetaTableAccessor.putsToMetaTable(connection, 1431 Collections.singletonList(addRegionStateToPut(put, state))); 1432 } 1433 1434 /** 1435 * Adds daughter region infos to hbase:meta row for the specified region. Note that this does not 1436 * add its daughter's as different rows, but adds information about the daughters in the same row 1437 * as the parent. Use 1438 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} 1439 * if you want to do that. 1440 * @param connection connection we're using 1441 * @param regionInfo RegionInfo of parent region 1442 * @param splitA first split daughter of the parent regionInfo 1443 * @param splitB second split daughter of the parent regionInfo 1444 * @throws IOException if problem connecting or updating meta 1445 */ 1446 public static void addSplitsToParent(Connection connection, RegionInfo regionInfo, 1447 RegionInfo splitA, RegionInfo splitB) throws IOException { 1448 try (Table meta = getMetaHTable(connection)) { 1449 Put put = makePutFromRegionInfo(regionInfo, EnvironmentEdgeManager.currentTime()); 1450 addDaughtersToPut(put, splitA, splitB); 1451 meta.put(put); 1452 debugLogMutation(put); 1453 LOG.debug("Added region {}", regionInfo.getRegionNameAsString()); 1454 } 1455 } 1456 1457 /** 1458 * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this 1459 * does not add its daughter's as different rows, but adds information about the daughters 1460 * in the same row as the parent. Use 1461 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} 1462 * if you want to do that. 1463 * @param connection connection we're using 1464 * @param regionInfo region information 1465 * @throws IOException if problem connecting or updating meta 1466 */ 1467 public static void addRegionToMeta(Connection connection, RegionInfo regionInfo) 1468 throws IOException { 1469 addRegionsToMeta(connection, Collections.singletonList(regionInfo), 1); 1470 } 1471 1472 /** 1473 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions 1474 * is CLOSED. 1475 * @param connection connection we're using 1476 * @param regionInfos region information list 1477 * @throws IOException if problem connecting or updating meta 1478 */ 1479 public static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1480 int regionReplication) throws IOException { 1481 addRegionsToMeta(connection, regionInfos, regionReplication, 1482 EnvironmentEdgeManager.currentTime()); 1483 } 1484 1485 /** 1486 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions 1487 * is CLOSED. 1488 * @param connection connection we're using 1489 * @param regionInfos region information list 1490 * @param ts desired timestamp 1491 * @throws IOException if problem connecting or updating meta 1492 */ 1493 private static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1494 int regionReplication, long ts) throws IOException { 1495 List<Put> puts = new ArrayList<>(); 1496 for (RegionInfo regionInfo : regionInfos) { 1497 if (RegionReplicaUtil.isDefaultReplica(regionInfo)) { 1498 Put put = makePutFromRegionInfo(regionInfo, ts); 1499 // New regions are added with initial state of CLOSED. 1500 addRegionStateToPut(put, RegionState.State.CLOSED); 1501 // Add empty locations for region replicas so that number of replicas can be cached 1502 // whenever the primary region is looked up from meta 1503 for (int i = 1; i < regionReplication; i++) { 1504 addEmptyLocation(put, i); 1505 } 1506 puts.add(put); 1507 } 1508 } 1509 putsToMetaTable(connection, puts); 1510 LOG.info("Added {} regions to meta.", puts.size()); 1511 } 1512 1513 static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException { 1514 int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more. 1515 int max = mergeRegions.size(); 1516 if (max > limit) { 1517 // Should never happen!!!!! But just in case. 1518 throw new RuntimeException("Can't merge " + max + " regions in one go; " + limit + 1519 " is upper-limit."); 1520 } 1521 int counter = 0; 1522 for (RegionInfo ri: mergeRegions) { 1523 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++); 1524 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY). 1525 setRow(put.getRow()). 1526 setFamily(HConstants.CATALOG_FAMILY). 1527 setQualifier(Bytes.toBytes(qualifier)). 1528 setTimestamp(put.getTimestamp()). 1529 setType(Type.Put). 1530 setValue(RegionInfo.toByteArray(ri)). 1531 build()); 1532 } 1533 return put; 1534 } 1535 1536 /** 1537 * Merge regions into one in an atomic operation. Deletes the merging regions in 1538 * hbase:meta and adds the merged region. 1539 * @param connection connection we're using 1540 * @param mergedRegion the merged region 1541 * @param parentSeqNum Parent regions to merge and their next open sequence id used 1542 * by serial replication. Set to -1 if not needed by this table. 1543 * @param sn the location of the region 1544 */ 1545 public static void mergeRegions(Connection connection, RegionInfo mergedRegion, 1546 Map<RegionInfo, Long> parentSeqNum, ServerName sn, int regionReplication) 1547 throws IOException { 1548 try (Table meta = getMetaHTable(connection)) { 1549 long time = HConstants.LATEST_TIMESTAMP; 1550 List<Mutation> mutations = new ArrayList<>(); 1551 List<RegionInfo> replicationParents = new ArrayList<>(); 1552 for (Map.Entry<RegionInfo, Long> e: parentSeqNum.entrySet()) { 1553 RegionInfo ri = e.getKey(); 1554 long seqNum = e.getValue(); 1555 // Deletes for merging regions 1556 mutations.add(makeDeleteFromRegionInfo(ri, time)); 1557 if (seqNum > 0) { 1558 mutations.add(makePutForReplicationBarrier(ri, seqNum, time)); 1559 replicationParents.add(ri); 1560 } 1561 } 1562 // Put for parent 1563 Put putOfMerged = makePutFromRegionInfo(mergedRegion, time); 1564 putOfMerged = addMergeRegions(putOfMerged, parentSeqNum.keySet()); 1565 // Set initial state to CLOSED. 1566 // NOTE: If initial state is not set to CLOSED then merged region gets added with the 1567 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1568 // master tries to assign this offline region. This is followed by re-assignments of the 1569 // merged region from resumed {@link MergeTableRegionsProcedure} 1570 addRegionStateToPut(putOfMerged, RegionState.State.CLOSED); 1571 mutations.add(putOfMerged); 1572 // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null 1573 // if crash after merge happened but before we got to here.. means in-memory 1574 // locations of offlined merged, now-closed, regions is lost. Should be ok. We 1575 // assign the merged region later. 1576 if (sn != null) { 1577 addLocation(putOfMerged, sn, 1, mergedRegion.getReplicaId()); 1578 } 1579 1580 // Add empty locations for region replicas of the merged region so that number of replicas 1581 // can be cached whenever the primary region is looked up from meta 1582 for (int i = 1; i < regionReplication; i++) { 1583 addEmptyLocation(putOfMerged, i); 1584 } 1585 // add parent reference for serial replication 1586 if (!replicationParents.isEmpty()) { 1587 addReplicationParent(putOfMerged, replicationParents); 1588 } 1589 byte[] tableRow = Bytes.toBytes(mergedRegion.getRegionNameAsString() + HConstants.DELIMITER); 1590 multiMutate(meta, tableRow, mutations); 1591 } 1592 } 1593 1594 /** 1595 * Splits the region into two in an atomic operation. Offlines the parent region with the 1596 * information that it is split into two, and also adds the daughter regions. Does not add the 1597 * location information to the daughter regions since they are not open yet. 1598 * @param connection connection we're using 1599 * @param parent the parent region which is split 1600 * @param parentOpenSeqNum the next open sequence id for parent region, used by serial 1601 * replication. -1 if not necessary. 1602 * @param splitA Split daughter region A 1603 * @param splitB Split daughter region B 1604 * @param sn the location of the region 1605 */ 1606 public static void splitRegion(Connection connection, RegionInfo parent, long parentOpenSeqNum, 1607 RegionInfo splitA, RegionInfo splitB, ServerName sn, int regionReplication) 1608 throws IOException { 1609 try (Table meta = getMetaHTable(connection)) { 1610 long time = EnvironmentEdgeManager.currentTime(); 1611 // Put for parent 1612 Put putParent = makePutFromRegionInfo(RegionInfoBuilder.newBuilder(parent) 1613 .setOffline(true) 1614 .setSplit(true).build(), time); 1615 addDaughtersToPut(putParent, splitA, splitB); 1616 1617 // Puts for daughters 1618 Put putA = makePutFromRegionInfo(splitA, time); 1619 Put putB = makePutFromRegionInfo(splitB, time); 1620 if (parentOpenSeqNum > 0) { 1621 addReplicationBarrier(putParent, parentOpenSeqNum); 1622 addReplicationParent(putA, Collections.singletonList(parent)); 1623 addReplicationParent(putB, Collections.singletonList(parent)); 1624 } 1625 // Set initial state to CLOSED 1626 // NOTE: If initial state is not set to CLOSED then daughter regions get added with the 1627 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1628 // master tries to assign these offline regions. This is followed by re-assignments of the 1629 // daughter regions from resumed {@link SplitTableRegionProcedure} 1630 addRegionStateToPut(putA, RegionState.State.CLOSED); 1631 addRegionStateToPut(putB, RegionState.State.CLOSED); 1632 1633 addSequenceNum(putA, 1, splitA.getReplicaId()); // new regions, openSeqNum = 1 is fine. 1634 addSequenceNum(putB, 1, splitB.getReplicaId()); 1635 1636 // Add empty locations for region replicas of daughters so that number of replicas can be 1637 // cached whenever the primary region is looked up from meta 1638 for (int i = 1; i < regionReplication; i++) { 1639 addEmptyLocation(putA, i); 1640 addEmptyLocation(putB, i); 1641 } 1642 1643 byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER); 1644 multiMutate(meta, tableRow, putParent, putA, putB); 1645 } 1646 } 1647 1648 /** 1649 * Update state of the table in meta. 1650 * @param connection what we use for update 1651 * @param state new state 1652 */ 1653 private static void updateTableState(Connection connection, TableState state) throws IOException { 1654 Put put = makePutFromTableState(state, EnvironmentEdgeManager.currentTime()); 1655 putToMetaTable(connection, put); 1656 LOG.info("Updated {} in hbase:meta", state); 1657 } 1658 1659 /** 1660 * Construct PUT for given state 1661 * @param state new state 1662 */ 1663 public static Put makePutFromTableState(TableState state, long ts) { 1664 Put put = new Put(state.getTableName().getName(), ts); 1665 put.addColumn(getTableFamily(), getTableStateColumn(), state.convert().toByteArray()); 1666 return put; 1667 } 1668 1669 /** 1670 * Remove state for table from meta 1671 * @param connection to use for deletion 1672 * @param table to delete state for 1673 */ 1674 public static void deleteTableState(Connection connection, TableName table) 1675 throws IOException { 1676 long time = EnvironmentEdgeManager.currentTime(); 1677 Delete delete = new Delete(table.getName()); 1678 delete.addColumns(getTableFamily(), getTableStateColumn(), time); 1679 deleteFromMetaTable(connection, delete); 1680 LOG.info("Deleted table " + table + " state from META"); 1681 } 1682 1683 private static void multiMutate(Table table, byte[] row, 1684 Mutation... mutations) throws IOException { 1685 multiMutate(table, row, Arrays.asList(mutations)); 1686 } 1687 1688 /** 1689 * Performs an atomic multi-mutate operation against the given table. Used by the likes of 1690 * merge and split as these want to make atomic mutations across multiple rows. 1691 * @throws IOException even if we encounter a RuntimeException, we'll still wrap it in an IOE. 1692 */ 1693 static void multiMutate(final Table table, byte[] row, final List<Mutation> mutations) 1694 throws IOException { 1695 debugLogMutations(mutations); 1696 Batch.Call<MultiRowMutationService, MutateRowsResponse> callable = instance -> { 1697 MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder(); 1698 for (Mutation mutation : mutations) { 1699 if (mutation instanceof Put) { 1700 builder.addMutationRequest( 1701 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.PUT, mutation)); 1702 } else if (mutation instanceof Delete) { 1703 builder.addMutationRequest( 1704 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.DELETE, mutation)); 1705 } else { 1706 throw new DoNotRetryIOException( 1707 "multi in MetaEditor doesn't support " + mutation.getClass().getName()); 1708 } 1709 } 1710 ServerRpcController controller = new ServerRpcController(); 1711 CoprocessorRpcUtils.BlockingRpcCallback<MutateRowsResponse> rpcCallback = 1712 new CoprocessorRpcUtils.BlockingRpcCallback<>(); 1713 instance.mutateRows(controller, builder.build(), rpcCallback); 1714 MutateRowsResponse resp = rpcCallback.get(); 1715 if (controller.failedOnException()) { 1716 throw controller.getFailedOn(); 1717 } 1718 return resp; 1719 }; 1720 try { 1721 table.coprocessorService(MultiRowMutationService.class, row, row, callable); 1722 } catch (Throwable e) { 1723 // Throw if an IOE else wrap in an IOE EVEN IF IT IS a RuntimeException (e.g. 1724 // a RejectedExecutionException because the hosting exception is shutting down. 1725 // This is old behavior worth reexamining. Procedures doing merge or split 1726 // currently don't handle RuntimeExceptions coming up out of meta table edits. 1727 // Would have to work on this at least. See HBASE-23904. 1728 Throwables.throwIfInstanceOf(e, IOException.class); 1729 throw new IOException(e); 1730 } 1731 } 1732 1733 /** 1734 * Updates the location of the specified region in hbase:meta to be the specified server hostname 1735 * and startcode. 1736 * <p> 1737 * Uses passed catalog tracker to get a connection to the server hosting hbase:meta and makes 1738 * edits to that region. 1739 * @param connection connection we're using 1740 * @param regionInfo region to update location of 1741 * @param openSeqNum the latest sequence number obtained when the region was open 1742 * @param sn Server name 1743 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1744 */ 1745 public static void updateRegionLocation(Connection connection, RegionInfo regionInfo, 1746 ServerName sn, long openSeqNum, long masterSystemTime) throws IOException { 1747 updateLocation(connection, regionInfo, sn, openSeqNum, masterSystemTime); 1748 } 1749 1750 /** 1751 * Updates the location of the specified region to be the specified server. 1752 * <p> 1753 * Connects to the specified server which should be hosting the specified catalog region name to 1754 * perform the edit. 1755 * @param connection connection we're using 1756 * @param regionInfo region to update location of 1757 * @param sn Server name 1758 * @param openSeqNum the latest sequence number obtained when the region was open 1759 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1760 * @throws IOException In particular could throw {@link java.net.ConnectException} if the server 1761 * is down on other end. 1762 */ 1763 private static void updateLocation(Connection connection, RegionInfo regionInfo, ServerName sn, 1764 long openSeqNum, long masterSystemTime) throws IOException { 1765 // region replicas are kept in the primary region's row 1766 Put put = new Put(getMetaKeyForRegion(regionInfo), masterSystemTime); 1767 addRegionInfo(put, regionInfo); 1768 addLocation(put, sn, openSeqNum, regionInfo.getReplicaId()); 1769 putToMetaTable(connection, put); 1770 LOG.info("Updated row {} with server=", regionInfo.getRegionNameAsString(), sn); 1771 } 1772 1773 /** 1774 * Deletes the specified region from META. 1775 * @param connection connection we're using 1776 * @param regionInfo region to be deleted from META 1777 */ 1778 public static void deleteRegionInfo(Connection connection, RegionInfo regionInfo) 1779 throws IOException { 1780 Delete delete = new Delete(regionInfo.getRegionName()); 1781 delete.addFamily(getCatalogFamily(), HConstants.LATEST_TIMESTAMP); 1782 deleteFromMetaTable(connection, delete); 1783 LOG.info("Deleted " + regionInfo.getRegionNameAsString()); 1784 } 1785 1786 /** 1787 * Deletes the specified regions from META. 1788 * @param connection connection we're using 1789 * @param regionsInfo list of regions to be deleted from META 1790 */ 1791 public static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo) 1792 throws IOException { 1793 deleteRegionInfos(connection, regionsInfo, EnvironmentEdgeManager.currentTime()); 1794 } 1795 1796 /** 1797 * Deletes the specified regions from META. 1798 * @param connection connection we're using 1799 * @param regionsInfo list of regions to be deleted from META 1800 */ 1801 private static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo, 1802 long ts) 1803 throws IOException { 1804 List<Delete> deletes = new ArrayList<>(regionsInfo.size()); 1805 for (RegionInfo hri : regionsInfo) { 1806 Delete e = new Delete(hri.getRegionName()); 1807 e.addFamily(getCatalogFamily(), ts); 1808 deletes.add(e); 1809 } 1810 deleteFromMetaTable(connection, deletes); 1811 LOG.info("Deleted {} regions from META", regionsInfo.size()); 1812 LOG.debug("Deleted regions: {}", regionsInfo); 1813 } 1814 1815 /** 1816 * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and 1817 * adds new ones. Regions added back have state CLOSED. 1818 * @param connection connection we're using 1819 * @param regionInfos list of regions to be added to META 1820 */ 1821 public static void overwriteRegions(Connection connection, List<RegionInfo> regionInfos, 1822 int regionReplication) throws IOException { 1823 // use master time for delete marker and the Put 1824 long now = EnvironmentEdgeManager.currentTime(); 1825 deleteRegionInfos(connection, regionInfos, now); 1826 // Why sleep? This is the easiest way to ensure that the previous deletes does not 1827 // eclipse the following puts, that might happen in the same ts from the server. 1828 // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed, 1829 // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep. 1830 // 1831 // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed 1832 addRegionsToMeta(connection, regionInfos, regionReplication, now + 1); 1833 LOG.info("Overwritten " + regionInfos.size() + " regions to Meta"); 1834 LOG.debug("Overwritten regions: {} ", regionInfos); 1835 } 1836 1837 /** 1838 * Deletes merge qualifiers for the specified merge region. 1839 * @param connection connection we're using 1840 * @param mergeRegion the merged region 1841 */ 1842 public static void deleteMergeQualifiers(Connection connection, final RegionInfo mergeRegion) 1843 throws IOException { 1844 Delete delete = new Delete(mergeRegion.getRegionName()); 1845 // NOTE: We are doing a new hbase:meta read here. 1846 Cell[] cells = getRegionResult(connection, mergeRegion.getRegionName()).rawCells(); 1847 if (cells == null || cells.length == 0) { 1848 return; 1849 } 1850 List<byte[]> qualifiers = new ArrayList<>(); 1851 for (Cell cell : cells) { 1852 if (!isMergeQualifierPrefix(cell)) { 1853 continue; 1854 } 1855 byte[] qualifier = CellUtil.cloneQualifier(cell); 1856 qualifiers.add(qualifier); 1857 delete.addColumns(getCatalogFamily(), qualifier, HConstants.LATEST_TIMESTAMP); 1858 } 1859 1860 // There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while 1861 // the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second 1862 // GCMultipleMergedRegionsProcedure could delete the merged region by accident! 1863 if (qualifiers.isEmpty()) { 1864 LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString() + 1865 " in meta table, they are cleaned up already, Skip."); 1866 return; 1867 } 1868 1869 deleteFromMetaTable(connection, delete); 1870 LOG.info("Deleted merge references in " + mergeRegion.getRegionNameAsString() + 1871 ", deleted qualifiers " + qualifiers.stream().map(Bytes::toStringBinary). 1872 collect(Collectors.joining(", "))); 1873 } 1874 1875 public static Put addRegionInfo(final Put p, final RegionInfo hri) 1876 throws IOException { 1877 p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1878 .setRow(p.getRow()) 1879 .setFamily(getCatalogFamily()) 1880 .setQualifier(HConstants.REGIONINFO_QUALIFIER) 1881 .setTimestamp(p.getTimestamp()) 1882 .setType(Type.Put) 1883 // Serialize the Default Replica HRI otherwise scan of hbase:meta 1884 // shows an info:regioninfo value with encoded name and region 1885 // name that differs from that of the hbase;meta row. 1886 .setValue(RegionInfo.toByteArray(RegionReplicaUtil.getRegionInfoForDefaultReplica(hri))) 1887 .build()); 1888 return p; 1889 } 1890 1891 public static Put addLocation(Put p, ServerName sn, long openSeqNum, int replicaId) 1892 throws IOException { 1893 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 1894 return p.add(builder.clear() 1895 .setRow(p.getRow()) 1896 .setFamily(getCatalogFamily()) 1897 .setQualifier(getServerColumn(replicaId)) 1898 .setTimestamp(p.getTimestamp()) 1899 .setType(Cell.Type.Put) 1900 .setValue(Bytes.toBytes(sn.getAddress().toString())) 1901 .build()) 1902 .add(builder.clear() 1903 .setRow(p.getRow()) 1904 .setFamily(getCatalogFamily()) 1905 .setQualifier(getStartCodeColumn(replicaId)) 1906 .setTimestamp(p.getTimestamp()) 1907 .setType(Cell.Type.Put) 1908 .setValue(Bytes.toBytes(sn.getStartcode())) 1909 .build()) 1910 .add(builder.clear() 1911 .setRow(p.getRow()) 1912 .setFamily(getCatalogFamily()) 1913 .setQualifier(getSeqNumColumn(replicaId)) 1914 .setTimestamp(p.getTimestamp()) 1915 .setType(Type.Put) 1916 .setValue(Bytes.toBytes(openSeqNum)) 1917 .build()); 1918 } 1919 1920 private static void writeRegionName(ByteArrayOutputStream out, byte[] regionName) { 1921 for (byte b : regionName) { 1922 if (b == ESCAPE_BYTE) { 1923 out.write(ESCAPE_BYTE); 1924 } 1925 out.write(b); 1926 } 1927 } 1928 1929 public static byte[] getParentsBytes(List<RegionInfo> parents) { 1930 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1931 Iterator<RegionInfo> iter = parents.iterator(); 1932 writeRegionName(bos, iter.next().getRegionName()); 1933 while (iter.hasNext()) { 1934 bos.write(ESCAPE_BYTE); 1935 bos.write(SEPARATED_BYTE); 1936 writeRegionName(bos, iter.next().getRegionName()); 1937 } 1938 return bos.toByteArray(); 1939 } 1940 1941 private static List<byte[]> parseParentsBytes(byte[] bytes) { 1942 List<byte[]> parents = new ArrayList<>(); 1943 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1944 for (int i = 0; i < bytes.length; i++) { 1945 if (bytes[i] == ESCAPE_BYTE) { 1946 i++; 1947 if (bytes[i] == SEPARATED_BYTE) { 1948 parents.add(bos.toByteArray()); 1949 bos.reset(); 1950 continue; 1951 } 1952 // fall through to append the byte 1953 } 1954 bos.write(bytes[i]); 1955 } 1956 if (bos.size() > 0) { 1957 parents.add(bos.toByteArray()); 1958 } 1959 return parents; 1960 } 1961 1962 private static void addReplicationParent(Put put, List<RegionInfo> parents) throws IOException { 1963 byte[] value = getParentsBytes(parents); 1964 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1965 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(REPLICATION_PARENT_QUALIFIER) 1966 .setTimestamp(put.getTimestamp()).setType(Type.Put).setValue(value).build()); 1967 } 1968 1969 public static Put makePutForReplicationBarrier(RegionInfo regionInfo, long openSeqNum, long ts) 1970 throws IOException { 1971 Put put = new Put(regionInfo.getRegionName(), ts); 1972 addReplicationBarrier(put, openSeqNum); 1973 return put; 1974 } 1975 1976 /** 1977 * See class comment on SerialReplicationChecker 1978 */ 1979 public static void addReplicationBarrier(Put put, long openSeqNum) throws IOException { 1980 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1981 .setRow(put.getRow()) 1982 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY) 1983 .setQualifier(HConstants.SEQNUM_QUALIFIER) 1984 .setTimestamp(put.getTimestamp()) 1985 .setType(Type.Put) 1986 .setValue(Bytes.toBytes(openSeqNum)) 1987 .build()); 1988 } 1989 1990 public static Put addEmptyLocation(Put p, int replicaId) throws IOException { 1991 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 1992 return p.add(builder.clear() 1993 .setRow(p.getRow()) 1994 .setFamily(getCatalogFamily()) 1995 .setQualifier(getServerColumn(replicaId)) 1996 .setTimestamp(p.getTimestamp()) 1997 .setType(Type.Put) 1998 .build()) 1999 .add(builder.clear() 2000 .setRow(p.getRow()) 2001 .setFamily(getCatalogFamily()) 2002 .setQualifier(getStartCodeColumn(replicaId)) 2003 .setTimestamp(p.getTimestamp()) 2004 .setType(Cell.Type.Put) 2005 .build()) 2006 .add(builder.clear() 2007 .setRow(p.getRow()) 2008 .setFamily(getCatalogFamily()) 2009 .setQualifier(getSeqNumColumn(replicaId)) 2010 .setTimestamp(p.getTimestamp()) 2011 .setType(Cell.Type.Put) 2012 .build()); 2013 } 2014 2015 public static final class ReplicationBarrierResult { 2016 private final long[] barriers; 2017 private final RegionState.State state; 2018 private final List<byte[]> parentRegionNames; 2019 2020 ReplicationBarrierResult(long[] barriers, State state, List<byte[]> parentRegionNames) { 2021 this.barriers = barriers; 2022 this.state = state; 2023 this.parentRegionNames = parentRegionNames; 2024 } 2025 2026 public long[] getBarriers() { 2027 return barriers; 2028 } 2029 2030 public RegionState.State getState() { 2031 return state; 2032 } 2033 2034 public List<byte[]> getParentRegionNames() { 2035 return parentRegionNames; 2036 } 2037 2038 @Override 2039 public String toString() { 2040 return "ReplicationBarrierResult [barriers=" + Arrays.toString(barriers) + ", state=" + 2041 state + ", parentRegionNames=" + 2042 parentRegionNames.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", ")) + 2043 "]"; 2044 } 2045 } 2046 2047 private static long getReplicationBarrier(Cell c) { 2048 return Bytes.toLong(c.getValueArray(), c.getValueOffset(), c.getValueLength()); 2049 } 2050 2051 public static long[] getReplicationBarriers(Result result) { 2052 return result.getColumnCells(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2053 .stream().mapToLong(MetaTableAccessor::getReplicationBarrier).sorted().distinct().toArray(); 2054 } 2055 2056 private static ReplicationBarrierResult getReplicationBarrierResult(Result result) { 2057 long[] barriers = getReplicationBarriers(result); 2058 byte[] stateBytes = result.getValue(getCatalogFamily(), getRegionStateColumn()); 2059 RegionState.State state = 2060 stateBytes != null ? RegionState.State.valueOf(Bytes.toString(stateBytes)) : null; 2061 byte[] parentRegionsBytes = 2062 result.getValue(HConstants.REPLICATION_BARRIER_FAMILY, REPLICATION_PARENT_QUALIFIER); 2063 List<byte[]> parentRegionNames = 2064 parentRegionsBytes != null ? parseParentsBytes(parentRegionsBytes) : Collections.emptyList(); 2065 return new ReplicationBarrierResult(barriers, state, parentRegionNames); 2066 } 2067 2068 public static ReplicationBarrierResult getReplicationBarrierResult(Connection conn, 2069 TableName tableName, byte[] row, byte[] encodedRegionName) throws IOException { 2070 byte[] metaStartKey = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 2071 byte[] metaStopKey = 2072 RegionInfo.createRegionName(tableName, HConstants.EMPTY_START_ROW, "", false); 2073 Scan scan = new Scan().withStartRow(metaStartKey).withStopRow(metaStopKey) 2074 .addColumn(getCatalogFamily(), getRegionStateColumn()) 2075 .addFamily(HConstants.REPLICATION_BARRIER_FAMILY).readAllVersions().setReversed(true) 2076 .setCaching(10); 2077 try (Table table = getMetaHTable(conn); ResultScanner scanner = table.getScanner(scan)) { 2078 for (Result result;;) { 2079 result = scanner.next(); 2080 if (result == null) { 2081 return new ReplicationBarrierResult(new long[0], null, Collections.emptyList()); 2082 } 2083 byte[] regionName = result.getRow(); 2084 // TODO: we may look up a region which has already been split or merged so we need to check 2085 // whether the encoded name matches. Need to find a way to quit earlier when there is no 2086 // record for the given region, for now it will scan to the end of the table. 2087 if (!Bytes.equals(encodedRegionName, 2088 Bytes.toBytes(RegionInfo.encodeRegionName(regionName)))) { 2089 continue; 2090 } 2091 return getReplicationBarrierResult(result); 2092 } 2093 } 2094 } 2095 2096 public static long[] getReplicationBarrier(Connection conn, byte[] regionName) 2097 throws IOException { 2098 try (Table table = getMetaHTable(conn)) { 2099 Result result = table.get(new Get(regionName) 2100 .addColumn(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2101 .readAllVersions()); 2102 return getReplicationBarriers(result); 2103 } 2104 } 2105 2106 public static List<Pair<String, Long>> getTableEncodedRegionNameAndLastBarrier(Connection conn, 2107 TableName tableName) throws IOException { 2108 List<Pair<String, Long>> list = new ArrayList<>(); 2109 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2110 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, r -> { 2111 byte[] value = 2112 r.getValue(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER); 2113 if (value == null) { 2114 return true; 2115 } 2116 long lastBarrier = Bytes.toLong(value); 2117 String encodedRegionName = RegionInfo.encodeRegionName(r.getRow()); 2118 list.add(Pair.newPair(encodedRegionName, lastBarrier)); 2119 return true; 2120 }); 2121 return list; 2122 } 2123 2124 public static List<String> getTableEncodedRegionNamesForSerialReplication(Connection conn, 2125 TableName tableName) throws IOException { 2126 List<String> list = new ArrayList<>(); 2127 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2128 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, 2129 new FirstKeyOnlyFilter(), Integer.MAX_VALUE, r -> { 2130 list.add(RegionInfo.encodeRegionName(r.getRow())); 2131 return true; 2132 }); 2133 return list; 2134 } 2135 2136 private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException { 2137 if (!METALOG.isDebugEnabled()) { 2138 return; 2139 } 2140 // Logging each mutation in separate line makes it easier to see diff between them visually 2141 // because of common starting indentation. 2142 for (Mutation mutation : mutations) { 2143 debugLogMutation(mutation); 2144 } 2145 } 2146 2147 private static void debugLogMutation(Mutation p) throws IOException { 2148 METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON()); 2149 } 2150 2151 private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException { 2152 return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 2153 .setRow(p.getRow()) 2154 .setFamily(HConstants.CATALOG_FAMILY) 2155 .setQualifier(getSeqNumColumn(replicaId)) 2156 .setTimestamp(p.getTimestamp()) 2157 .setType(Type.Put) 2158 .setValue(Bytes.toBytes(openSeqNum)) 2159 .build()); 2160 } 2161}