001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import edu.umd.cs.findbugs.annotations.NonNull; 021import edu.umd.cs.findbugs.annotations.Nullable; 022import java.io.ByteArrayOutputStream; 023import java.io.Closeable; 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.Collections; 029import java.util.Iterator; 030import java.util.LinkedHashMap; 031import java.util.List; 032import java.util.Map; 033import java.util.NavigableMap; 034import java.util.Set; 035import java.util.TreeMap; 036import java.util.SortedMap; 037import java.util.regex.Matcher; 038import java.util.regex.Pattern; 039import java.util.stream.Collectors; 040import org.apache.hadoop.conf.Configuration; 041import org.apache.hadoop.hbase.Cell.Type; 042import org.apache.hadoop.hbase.client.Connection; 043import org.apache.hadoop.hbase.client.ConnectionFactory; 044import org.apache.hadoop.hbase.client.Consistency; 045import org.apache.hadoop.hbase.client.Delete; 046import org.apache.hadoop.hbase.client.Get; 047import org.apache.hadoop.hbase.client.Mutation; 048import org.apache.hadoop.hbase.client.Put; 049import org.apache.hadoop.hbase.client.RegionInfo; 050import org.apache.hadoop.hbase.client.RegionInfoBuilder; 051import org.apache.hadoop.hbase.client.RegionLocator; 052import org.apache.hadoop.hbase.client.RegionReplicaUtil; 053import org.apache.hadoop.hbase.client.Result; 054import org.apache.hadoop.hbase.client.ResultScanner; 055import org.apache.hadoop.hbase.client.Scan; 056import org.apache.hadoop.hbase.client.Table; 057import org.apache.hadoop.hbase.client.TableState; 058import org.apache.hadoop.hbase.client.coprocessor.Batch; 059import org.apache.hadoop.hbase.exceptions.DeserializationException; 060import org.apache.hadoop.hbase.filter.Filter; 061import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; 062import org.apache.hadoop.hbase.filter.RowFilter; 063import org.apache.hadoop.hbase.filter.SubstringComparator; 064import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils; 065import org.apache.hadoop.hbase.ipc.ServerRpcController; 066import org.apache.hadoop.hbase.master.RegionState; 067import org.apache.hadoop.hbase.master.RegionState.State; 068import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 069import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; 070import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MultiRowMutationService; 071import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest; 072import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse; 073import org.apache.hadoop.hbase.util.Bytes; 074import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 075import org.apache.hadoop.hbase.util.ExceptionUtil; 076import org.apache.hadoop.hbase.util.Pair; 077import org.apache.hadoop.hbase.util.PairOfSameType; 078import org.apache.yetus.audience.InterfaceAudience; 079import org.slf4j.Logger; 080import org.slf4j.LoggerFactory; 081 082import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 083import org.apache.hbase.thirdparty.com.google.common.base.Throwables; 084 085/** 086 * <p> 087 * Read/write operations on <code>hbase:meta</code> region as well as assignment information stored 088 * to <code>hbase:meta</code>. 089 * </p> 090 * <p> 091 * Some of the methods of this class take ZooKeeperWatcher as a param. The only reason for this is 092 * when this class is used on client-side (e.g. HBaseAdmin), we want to use short-lived connection 093 * (opened before each operation, closed right after), while when used on HM or HRS (like in 094 * AssignmentManager) we want permanent connection. 095 * </p> 096 * <p> 097 * HBASE-10070 adds a replicaId to HRI, meaning more than one HRI can be defined for the same table 098 * range (table, startKey, endKey). For every range, there will be at least one HRI defined which is 099 * called default replica. 100 * </p> 101 * <p> 102 * <h2>Meta layout</h2> 103 * 104 * <pre> 105 * For each table there is single row named for the table with a 'table' column family. 106 * The column family currently has one column in it, the 'state' column: 107 * 108 * table:state => contains table state 109 * 110 * Then for each table range ('Region'), there is a single row, formatted as: 111 * <tableName>,<startKey>,<regionId>,<encodedRegionName>. 112 * This row is the serialized regionName of the default region replica. 113 * Columns are: 114 * info:regioninfo => contains serialized HRI for the default region replica 115 * info:server => contains hostname:port (in string form) for the server hosting 116 * the default regionInfo replica 117 * info:server_<replicaId> => contains hostname:port (in string form) for the server hosting 118 * the regionInfo replica with replicaId 119 * info:serverstartcode => contains server start code (in binary long form) for the server 120 * hosting the default regionInfo replica 121 * info:serverstartcode_<replicaId> => contains server start code (in binary long form) for 122 * the server hosting the regionInfo replica with 123 * replicaId 124 * info:seqnumDuringOpen => contains seqNum (in binary long form) for the region at the time 125 * the server opened the region with default replicaId 126 * info:seqnumDuringOpen_<replicaId> => contains seqNum (in binary long form) for the region 127 * at the time the server opened the region with 128 * replicaId 129 * info:splitA => contains a serialized HRI for the first daughter region if the 130 * region is split 131 * info:splitB => contains a serialized HRI for the second daughter region if the 132 * region is split 133 * info:merge* => contains a serialized HRI for a merge parent region. There will be two 134 * or more of these columns in a row. A row that has these columns is 135 * undergoing a merge and is the result of the merge. Columns listed 136 * in marge* columns are the parents of this merged region. Example 137 * columns: info:merge0001, info:merge0002. You make also see 'mergeA', 138 * and 'mergeB'. This is old form replaced by the new format that allows 139 * for more than two parents to be merged at a time. 140 * TODO: Add rep_barrier for serial replication explaination. See SerialReplicationChecker. 141 * </pre> 142 * </p> 143 * <p> 144 * The actual layout of meta should be encapsulated inside MetaTableAccessor methods, and should not 145 * leak out of it (through Result objects, etc) 146 * </p> 147 */ 148@InterfaceAudience.Private 149public class MetaTableAccessor { 150 151 private static final Logger LOG = LoggerFactory.getLogger(MetaTableAccessor.class); 152 private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META"); 153 154 @VisibleForTesting 155 public static final byte[] REPLICATION_PARENT_QUALIFIER = Bytes.toBytes("parent"); 156 157 private static final byte ESCAPE_BYTE = (byte) 0xFF; 158 159 private static final byte SEPARATED_BYTE = 0x00; 160 161 @InterfaceAudience.Private 162 public enum QueryType { 163 ALL(HConstants.TABLE_FAMILY, HConstants.CATALOG_FAMILY), 164 REGION(HConstants.CATALOG_FAMILY), 165 TABLE(HConstants.TABLE_FAMILY), 166 REPLICATION(HConstants.REPLICATION_BARRIER_FAMILY); 167 168 private final byte[][] families; 169 170 QueryType(byte[]... families) { 171 this.families = families; 172 } 173 174 byte[][] getFamilies() { 175 return this.families; 176 } 177 } 178 179 /** The delimiter for meta columns for replicaIds > 0 */ 180 static final char META_REPLICA_ID_DELIMITER = '_'; 181 182 /** A regex for parsing server columns from meta. See above javadoc for meta layout */ 183 private static final Pattern SERVER_COLUMN_PATTERN 184 = Pattern.compile("^server(_[0-9a-fA-F]{4})?$"); 185 186 //////////////////////// 187 // Reading operations // 188 //////////////////////// 189 190 /** 191 * Performs a full scan of <code>hbase:meta</code> for regions. 192 * @param connection connection we're using 193 * @param visitor Visitor invoked against each row in regions family. 194 */ 195 public static void fullScanRegions(Connection connection, final Visitor visitor) 196 throws IOException { 197 scanMeta(connection, null, null, QueryType.REGION, visitor); 198 } 199 200 /** 201 * Performs a full scan of <code>hbase:meta</code> for regions. 202 * @param connection connection we're using 203 */ 204 public static List<Result> fullScanRegions(Connection connection) throws IOException { 205 return fullScan(connection, QueryType.REGION); 206 } 207 208 /** 209 * Performs a full scan of <code>hbase:meta</code> for tables. 210 * @param connection connection we're using 211 * @param visitor Visitor invoked against each row in tables family. 212 */ 213 public static void fullScanTables(Connection connection, final Visitor visitor) 214 throws IOException { 215 scanMeta(connection, null, null, QueryType.TABLE, visitor); 216 } 217 218 /** 219 * Performs a full scan of <code>hbase:meta</code>. 220 * @param connection connection we're using 221 * @param type scanned part of meta 222 * @return List of {@link Result} 223 */ 224 private static List<Result> fullScan(Connection connection, QueryType type) throws IOException { 225 CollectAllVisitor v = new CollectAllVisitor(); 226 scanMeta(connection, null, null, type, v); 227 return v.getResults(); 228 } 229 230 /** 231 * Callers should call close on the returned {@link Table} instance. 232 * @param connection connection we're using to access Meta 233 * @return An {@link Table} for <code>hbase:meta</code> 234 */ 235 public static Table getMetaHTable(final Connection connection) throws IOException { 236 // We used to pass whole CatalogTracker in here, now we just pass in Connection 237 if (connection == null) { 238 throw new NullPointerException("No connection"); 239 } else if (connection.isClosed()) { 240 throw new IOException("connection is closed"); 241 } 242 return connection.getTable(TableName.META_TABLE_NAME); 243 } 244 245 /** 246 * @param t Table to use (will be closed when done). 247 * @param g Get to run 248 */ 249 private static Result get(final Table t, final Get g) throws IOException { 250 if (t == null) return null; 251 try { 252 return t.get(g); 253 } finally { 254 t.close(); 255 } 256 } 257 258 /** 259 * Gets the region info and assignment for the specified region. 260 * @param connection connection we're using 261 * @param regionName Region to lookup. 262 * @return Location and RegionInfo for <code>regionName</code> 263 * @deprecated use {@link #getRegionLocation(Connection, byte[])} instead 264 */ 265 @Deprecated 266 public static Pair<RegionInfo, ServerName> getRegion(Connection connection, byte [] regionName) 267 throws IOException { 268 HRegionLocation location = getRegionLocation(connection, regionName); 269 return location == null 270 ? null 271 : new Pair<>(location.getRegionInfo(), location.getServerName()); 272 } 273 274 /** 275 * Returns the HRegionLocation from meta for the given region 276 * @param connection connection we're using 277 * @param regionName region we're looking for 278 * @return HRegionLocation for the given region 279 */ 280 public static HRegionLocation getRegionLocation(Connection connection, byte[] regionName) 281 throws IOException { 282 byte[] row = regionName; 283 RegionInfo parsedInfo = null; 284 try { 285 parsedInfo = parseRegionInfoFromRegionName(regionName); 286 row = getMetaKeyForRegion(parsedInfo); 287 } catch (Exception parseEx) { 288 // Ignore. This is used with tableName passed as regionName. 289 } 290 Get get = new Get(row); 291 get.addFamily(HConstants.CATALOG_FAMILY); 292 Result r = get(getMetaHTable(connection), get); 293 RegionLocations locations = getRegionLocations(r); 294 return locations == null ? null 295 : locations.getRegionLocation(parsedInfo == null ? 0 : parsedInfo.getReplicaId()); 296 } 297 298 /** 299 * Returns the HRegionLocation from meta for the given region 300 * @param connection connection we're using 301 * @param regionInfo region information 302 * @return HRegionLocation for the given region 303 */ 304 public static HRegionLocation getRegionLocation(Connection connection, RegionInfo regionInfo) 305 throws IOException { 306 return getRegionLocation(getCatalogFamilyRow(connection, regionInfo), 307 regionInfo, regionInfo.getReplicaId()); 308 } 309 310 /** 311 * @return Return the {@link HConstants#CATALOG_FAMILY} row from hbase:meta. 312 */ 313 public static Result getCatalogFamilyRow(Connection connection, RegionInfo ri) 314 throws IOException { 315 Get get = new Get(getMetaKeyForRegion(ri)); 316 get.addFamily(HConstants.CATALOG_FAMILY); 317 return get(getMetaHTable(connection), get); 318 } 319 320 /** Returns the row key to use for this regionInfo */ 321 public static byte[] getMetaKeyForRegion(RegionInfo regionInfo) { 322 return RegionReplicaUtil.getRegionInfoForDefaultReplica(regionInfo).getRegionName(); 323 } 324 325 /** Returns an HRI parsed from this regionName. Not all the fields of the HRI 326 * is stored in the name, so the returned object should only be used for the fields 327 * in the regionName. 328 */ 329 // This should be moved to RegionInfo? TODO. 330 public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException { 331 byte[][] fields = RegionInfo.parseRegionName(regionName); 332 long regionId = Long.parseLong(Bytes.toString(fields[2])); 333 int replicaId = fields.length > 3 ? Integer.parseInt(Bytes.toString(fields[3]), 16) : 0; 334 return RegionInfoBuilder.newBuilder(TableName.valueOf(fields[0])) 335 .setStartKey(fields[1]) 336 .setEndKey(fields[2]) 337 .setSplit(false) 338 .setRegionId(regionId) 339 .setReplicaId(replicaId) 340 .build(); 341 } 342 343 /** 344 * Gets the result in hbase:meta for the specified region. 345 * @param connection connection we're using 346 * @param regionName region we're looking for 347 * @return result of the specified region 348 */ 349 public static Result getRegionResult(Connection connection, 350 byte[] regionName) throws IOException { 351 Get get = new Get(regionName); 352 get.addFamily(HConstants.CATALOG_FAMILY); 353 return get(getMetaHTable(connection), get); 354 } 355 356 /** 357 * Scans META table for a row whose key contains the specified <B>regionEncodedName</B>, 358 * returning a single related <code>Result</code> instance if any row is found, null otherwise. 359 * 360 * @param connection the connection to query META table. 361 * @param regionEncodedName the region encoded name to look for at META. 362 * @return <code>Result</code> instance with the row related info in META, null otherwise. 363 * @throws IOException if any errors occur while querying META. 364 */ 365 public static Result scanByRegionEncodedName(Connection connection, 366 String regionEncodedName) throws IOException { 367 RowFilter rowFilter = new RowFilter(CompareOperator.EQUAL, 368 new SubstringComparator(regionEncodedName)); 369 Scan scan = getMetaScan(connection, 1); 370 scan.setFilter(rowFilter); 371 ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan); 372 return resultScanner.next(); 373 } 374 375 /** 376 * @return Return all regioninfos listed in the 'info:merge*' columns of 377 * the <code>regionName</code> row. 378 */ 379 @Nullable 380 public static List<RegionInfo> getMergeRegions(Connection connection, byte[] regionName) 381 throws IOException { 382 return getMergeRegions(getRegionResult(connection, regionName).rawCells()); 383 } 384 385 /** 386 * @return Deserialized values of <qualifier,regioninfo> pairs taken from column values that match 387 * the regex 'info:merge.*' in array of <code>cells</code>. 388 */ 389 @Nullable 390 public static Map<String, RegionInfo> getMergeRegionsWithName(Cell [] cells) { 391 if (cells == null) { 392 return null; 393 } 394 Map<String, RegionInfo> regionsToMerge = null; 395 for (Cell cell: cells) { 396 if (!isMergeQualifierPrefix(cell)) { 397 continue; 398 } 399 // Ok. This cell is that of a info:merge* column. 400 RegionInfo ri = RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(), 401 cell.getValueLength()); 402 if (ri != null) { 403 if (regionsToMerge == null) { 404 regionsToMerge = new LinkedHashMap<>(); 405 } 406 regionsToMerge.put(Bytes.toString(CellUtil.cloneQualifier(cell)), ri); 407 } 408 } 409 return regionsToMerge; 410 } 411 412 /** 413 * @return Deserialized regioninfo values taken from column values that match 414 * the regex 'info:merge.*' in array of <code>cells</code>. 415 */ 416 @Nullable 417 public static List<RegionInfo> getMergeRegions(Cell [] cells) { 418 Map<String, RegionInfo> mergeRegionsWithName = getMergeRegionsWithName(cells); 419 return (mergeRegionsWithName == null) ? null : new ArrayList<>(mergeRegionsWithName.values()); 420 } 421 422 /** 423 * @return True if any merge regions present in <code>cells</code>; i.e. 424 * the column in <code>cell</code> matches the regex 'info:merge.*'. 425 */ 426 public static boolean hasMergeRegions(Cell [] cells) { 427 for (Cell cell: cells) { 428 if (!isMergeQualifierPrefix(cell)) { 429 continue; 430 } 431 return true; 432 } 433 return false; 434 } 435 436 /** 437 * @return True if the column in <code>cell</code> matches the regex 'info:merge.*'. 438 */ 439 private static boolean isMergeQualifierPrefix(Cell cell) { 440 // Check to see if has family and that qualifier starts with the merge qualifier 'merge' 441 return CellUtil.matchingFamily(cell, HConstants.CATALOG_FAMILY) && 442 PrivateCellUtil.qualifierStartsWith(cell, HConstants.MERGE_QUALIFIER_PREFIX); 443 } 444 445 /** 446 * Checks if the specified table exists. Looks at the hbase:meta table hosted on 447 * the specified server. 448 * @param connection connection we're using 449 * @param tableName table to check 450 * @return true if the table exists in meta, false if not 451 */ 452 public static boolean tableExists(Connection connection, 453 final TableName tableName) 454 throws IOException { 455 // Catalog tables always exist. 456 return tableName.equals(TableName.META_TABLE_NAME) || 457 getTableState(connection, tableName) != null; 458 } 459 460 /** 461 * Lists all of the regions currently in META. 462 * 463 * @param connection to connect with 464 * @param excludeOfflinedSplitParents False if we are to include offlined/splitparents regions, 465 * true and we'll leave out offlined regions from returned list 466 * @return List of all user-space regions. 467 */ 468 @VisibleForTesting 469 public static List<RegionInfo> getAllRegions(Connection connection, 470 boolean excludeOfflinedSplitParents) 471 throws IOException { 472 List<Pair<RegionInfo, ServerName>> result; 473 474 result = getTableRegionsAndLocations(connection, null, 475 excludeOfflinedSplitParents); 476 477 return getListOfRegionInfos(result); 478 479 } 480 481 /** 482 * Gets all of the regions of the specified table. Do not use this method 483 * to get meta table regions, use methods in MetaTableLocator instead. 484 * @param connection connection we're using 485 * @param tableName table we're looking for 486 * @return Ordered list of {@link RegionInfo}. 487 */ 488 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName) 489 throws IOException { 490 return getTableRegions(connection, tableName, false); 491 } 492 493 /** 494 * Gets all of the regions of the specified table. Do not use this method 495 * to get meta table regions, use methods in MetaTableLocator instead. 496 * @param connection connection we're using 497 * @param tableName table we're looking for 498 * @param excludeOfflinedSplitParents If true, do not include offlined split 499 * parents in the return. 500 * @return Ordered list of {@link RegionInfo}. 501 */ 502 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName, 503 final boolean excludeOfflinedSplitParents) throws IOException { 504 List<Pair<RegionInfo, ServerName>> result = 505 getTableRegionsAndLocations(connection, tableName, excludeOfflinedSplitParents); 506 return getListOfRegionInfos(result); 507 } 508 509 private static List<RegionInfo> getListOfRegionInfos( 510 final List<Pair<RegionInfo, ServerName>> pairs) { 511 if (pairs == null || pairs.isEmpty()) { 512 return Collections.emptyList(); 513 } 514 List<RegionInfo> result = new ArrayList<>(pairs.size()); 515 for (Pair<RegionInfo, ServerName> pair : pairs) { 516 result.add(pair.getFirst()); 517 } 518 return result; 519 } 520 521 /** 522 * @param tableName table we're working with 523 * @return start row for scanning META according to query type 524 */ 525 public static byte[] getTableStartRowForMeta(TableName tableName, QueryType type) { 526 if (tableName == null) { 527 return null; 528 } 529 switch (type) { 530 case REGION: 531 byte[] startRow = new byte[tableName.getName().length + 2]; 532 System.arraycopy(tableName.getName(), 0, startRow, 0, tableName.getName().length); 533 startRow[startRow.length - 2] = HConstants.DELIMITER; 534 startRow[startRow.length - 1] = HConstants.DELIMITER; 535 return startRow; 536 case ALL: 537 case TABLE: 538 default: 539 return tableName.getName(); 540 } 541 } 542 543 /** 544 * @param tableName table we're working with 545 * @return stop row for scanning META according to query type 546 */ 547 public static byte[] getTableStopRowForMeta(TableName tableName, QueryType type) { 548 if (tableName == null) { 549 return null; 550 } 551 final byte[] stopRow; 552 switch (type) { 553 case REGION: 554 stopRow = new byte[tableName.getName().length + 3]; 555 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 556 stopRow[stopRow.length - 3] = ' '; 557 stopRow[stopRow.length - 2] = HConstants.DELIMITER; 558 stopRow[stopRow.length - 1] = HConstants.DELIMITER; 559 break; 560 case ALL: 561 case TABLE: 562 default: 563 stopRow = new byte[tableName.getName().length + 1]; 564 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 565 stopRow[stopRow.length - 1] = ' '; 566 break; 567 } 568 return stopRow; 569 } 570 571 /** 572 * This method creates a Scan object that will only scan catalog rows that 573 * belong to the specified table. It doesn't specify any columns. 574 * This is a better alternative to just using a start row and scan until 575 * it hits a new table since that requires parsing the HRI to get the table 576 * name. 577 * @param tableName bytes of table's name 578 * @return configured Scan object 579 */ 580 @Deprecated 581 public static Scan getScanForTableName(Connection connection, TableName tableName) { 582 // Start key is just the table name with delimiters 583 byte[] startKey = getTableStartRowForMeta(tableName, QueryType.REGION); 584 // Stop key appends the smallest possible char to the table name 585 byte[] stopKey = getTableStopRowForMeta(tableName, QueryType.REGION); 586 587 Scan scan = getMetaScan(connection, -1); 588 scan.setStartRow(startKey); 589 scan.setStopRow(stopKey); 590 return scan; 591 } 592 593 private static Scan getMetaScan(Connection connection, int rowUpperLimit) { 594 Scan scan = new Scan(); 595 int scannerCaching = connection.getConfiguration() 596 .getInt(HConstants.HBASE_META_SCANNER_CACHING, 597 HConstants.DEFAULT_HBASE_META_SCANNER_CACHING); 598 if (connection.getConfiguration().getBoolean(HConstants.USE_META_REPLICAS, 599 HConstants.DEFAULT_USE_META_REPLICAS)) { 600 scan.setConsistency(Consistency.TIMELINE); 601 } 602 if (rowUpperLimit > 0) { 603 scan.setLimit(rowUpperLimit); 604 scan.setReadType(Scan.ReadType.PREAD); 605 } 606 scan.setCaching(scannerCaching); 607 return scan; 608 } 609 /** 610 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 611 * @param connection connection we're using 612 * @param tableName table we're looking for 613 * @return Return list of regioninfos and server. 614 */ 615 public static List<Pair<RegionInfo, ServerName>> 616 getTableRegionsAndLocations(Connection connection, TableName tableName) 617 throws IOException { 618 return getTableRegionsAndLocations(connection, tableName, true); 619 } 620 621 /** 622 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 623 * @param connection connection we're using 624 * @param tableName table to work with, can be null for getting all regions 625 * @param excludeOfflinedSplitParents don't return split parents 626 * @return Return list of regioninfos and server addresses. 627 */ 628 // What happens here when 1M regions in hbase:meta? This won't scale? 629 public static List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations( 630 Connection connection, @Nullable final TableName tableName, 631 final boolean excludeOfflinedSplitParents) throws IOException { 632 if (tableName != null && tableName.equals(TableName.META_TABLE_NAME)) { 633 throw new IOException("This method can't be used to locate meta regions;" 634 + " use MetaTableLocator instead"); 635 } 636 // Make a version of CollectingVisitor that collects RegionInfo and ServerAddress 637 CollectingVisitor<Pair<RegionInfo, ServerName>> visitor = 638 new CollectingVisitor<Pair<RegionInfo, ServerName>>() { 639 private RegionLocations current = null; 640 641 @Override 642 public boolean visit(Result r) throws IOException { 643 current = getRegionLocations(r); 644 if (current == null || current.getRegionLocation().getRegion() == null) { 645 LOG.warn("No serialized RegionInfo in " + r); 646 return true; 647 } 648 RegionInfo hri = current.getRegionLocation().getRegion(); 649 if (excludeOfflinedSplitParents && hri.isSplitParent()) return true; 650 // Else call super and add this Result to the collection. 651 return super.visit(r); 652 } 653 654 @Override 655 void add(Result r) { 656 if (current == null) { 657 return; 658 } 659 for (HRegionLocation loc : current.getRegionLocations()) { 660 if (loc != null) { 661 this.results.add(new Pair<>(loc.getRegion(), loc.getServerName())); 662 } 663 } 664 } 665 }; 666 scanMeta(connection, 667 getTableStartRowForMeta(tableName, QueryType.REGION), 668 getTableStopRowForMeta(tableName, QueryType.REGION), 669 QueryType.REGION, visitor); 670 return visitor.getResults(); 671 } 672 673 /** 674 * @param connection connection we're using 675 * @param serverName server whose regions we're interested in 676 * @return List of user regions installed on this server (does not include 677 * catalog regions). 678 * @throws IOException 679 */ 680 public static NavigableMap<RegionInfo, Result> 681 getServerUserRegions(Connection connection, final ServerName serverName) 682 throws IOException { 683 final NavigableMap<RegionInfo, Result> hris = new TreeMap<>(); 684 // Fill the above hris map with entries from hbase:meta that have the passed 685 // servername. 686 CollectingVisitor<Result> v = new CollectingVisitor<Result>() { 687 @Override 688 void add(Result r) { 689 if (r == null || r.isEmpty()) return; 690 RegionLocations locations = getRegionLocations(r); 691 if (locations == null) return; 692 for (HRegionLocation loc : locations.getRegionLocations()) { 693 if (loc != null) { 694 if (loc.getServerName() != null && loc.getServerName().equals(serverName)) { 695 hris.put(loc.getRegion(), r); 696 } 697 } 698 } 699 } 700 }; 701 scanMeta(connection, null, null, QueryType.REGION, v); 702 return hris; 703 } 704 705 public static void fullScanMetaAndPrint(Connection connection) 706 throws IOException { 707 Visitor v = r -> { 708 if (r == null || r.isEmpty()) { 709 return true; 710 } 711 LOG.info("fullScanMetaAndPrint.Current Meta Row: " + r); 712 TableState state = getTableState(r); 713 if (state != null) { 714 LOG.info("fullScanMetaAndPrint.Table State={}" + state); 715 } else { 716 RegionLocations locations = getRegionLocations(r); 717 if (locations == null) { 718 return true; 719 } 720 for (HRegionLocation loc : locations.getRegionLocations()) { 721 if (loc != null) { 722 LOG.info("fullScanMetaAndPrint.HRI Print={}", loc.getRegion()); 723 } 724 } 725 } 726 return true; 727 }; 728 scanMeta(connection, null, null, QueryType.ALL, v); 729 } 730 731 public static void scanMetaForTableRegions(Connection connection, Visitor visitor, 732 TableName tableName) throws IOException { 733 scanMeta(connection, tableName, QueryType.REGION, Integer.MAX_VALUE, visitor); 734 } 735 736 private static void scanMeta(Connection connection, TableName table, QueryType type, int maxRows, 737 final Visitor visitor) throws IOException { 738 scanMeta(connection, getTableStartRowForMeta(table, type), getTableStopRowForMeta(table, type), 739 type, maxRows, visitor); 740 } 741 742 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 743 @Nullable final byte[] stopRow, QueryType type, final Visitor visitor) throws IOException { 744 scanMeta(connection, startRow, stopRow, type, Integer.MAX_VALUE, visitor); 745 } 746 747 /** 748 * Performs a scan of META table for given table starting from given row. 749 * @param connection connection we're using 750 * @param visitor visitor to call 751 * @param tableName table withing we scan 752 * @param row start scan from this row 753 * @param rowLimit max number of rows to return 754 */ 755 public static void scanMeta(Connection connection, final Visitor visitor, 756 final TableName tableName, final byte[] row, final int rowLimit) throws IOException { 757 byte[] startRow = null; 758 byte[] stopRow = null; 759 if (tableName != null) { 760 startRow = getTableStartRowForMeta(tableName, QueryType.REGION); 761 if (row != null) { 762 RegionInfo closestRi = getClosestRegionInfo(connection, tableName, row); 763 startRow = 764 RegionInfo.createRegionName(tableName, closestRi.getStartKey(), HConstants.ZEROES, false); 765 } 766 stopRow = getTableStopRowForMeta(tableName, QueryType.REGION); 767 } 768 scanMeta(connection, startRow, stopRow, QueryType.REGION, rowLimit, visitor); 769 } 770 771 /** 772 * Performs a scan of META table. 773 * @param connection connection we're using 774 * @param startRow Where to start the scan. Pass null if want to begin scan 775 * at first row. 776 * @param stopRow Where to stop the scan. Pass null if want to scan all rows 777 * from the start one 778 * @param type scanned part of meta 779 * @param maxRows maximum rows to return 780 * @param visitor Visitor invoked against each row. 781 */ 782 static void scanMeta(Connection connection, @Nullable final byte[] startRow, 783 @Nullable final byte[] stopRow, QueryType type, int maxRows, final Visitor visitor) 784 throws IOException { 785 scanMeta(connection, startRow, stopRow, type, null, maxRows, visitor); 786 } 787 788 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 789 @Nullable final byte[] stopRow, QueryType type, @Nullable Filter filter, int maxRows, 790 final Visitor visitor) throws IOException { 791 int rowUpperLimit = maxRows > 0 ? maxRows : Integer.MAX_VALUE; 792 Scan scan = getMetaScan(connection, rowUpperLimit); 793 794 for (byte[] family : type.getFamilies()) { 795 scan.addFamily(family); 796 } 797 if (startRow != null) { 798 scan.withStartRow(startRow); 799 } 800 if (stopRow != null) { 801 scan.withStopRow(stopRow); 802 } 803 if (filter != null) { 804 scan.setFilter(filter); 805 } 806 807 if (LOG.isTraceEnabled()) { 808 LOG.trace("Scanning META" + " starting at row=" + Bytes.toStringBinary(startRow) + 809 " stopping at row=" + Bytes.toStringBinary(stopRow) + " for max=" + rowUpperLimit + 810 " with caching=" + scan.getCaching()); 811 } 812 813 int currentRow = 0; 814 try (Table metaTable = getMetaHTable(connection)) { 815 try (ResultScanner scanner = metaTable.getScanner(scan)) { 816 Result data; 817 while ((data = scanner.next()) != null) { 818 if (data.isEmpty()) continue; 819 // Break if visit returns false. 820 if (!visitor.visit(data)) break; 821 if (++currentRow >= rowUpperLimit) break; 822 } 823 } 824 } 825 if (visitor instanceof Closeable) { 826 try { 827 ((Closeable) visitor).close(); 828 } catch (Throwable t) { 829 ExceptionUtil.rethrowIfInterrupt(t); 830 LOG.debug("Got exception in closing the meta scanner visitor", t); 831 } 832 } 833 } 834 835 /** 836 * @return Get closest metatable region row to passed <code>row</code> 837 */ 838 @NonNull 839 private static RegionInfo getClosestRegionInfo(Connection connection, 840 @NonNull final TableName tableName, @NonNull final byte[] row) throws IOException { 841 byte[] searchRow = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 842 Scan scan = getMetaScan(connection, 1); 843 scan.setReversed(true); 844 scan.withStartRow(searchRow); 845 try (ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan)) { 846 Result result = resultScanner.next(); 847 if (result == null) { 848 throw new TableNotFoundException("Cannot find row in META " + 849 " for table: " + tableName + ", row=" + Bytes.toStringBinary(row)); 850 } 851 RegionInfo regionInfo = getRegionInfo(result); 852 if (regionInfo == null) { 853 throw new IOException("RegionInfo was null or empty in Meta for " + 854 tableName + ", row=" + Bytes.toStringBinary(row)); 855 } 856 return regionInfo; 857 } 858 } 859 860 /** 861 * Returns the column family used for meta columns. 862 * @return HConstants.CATALOG_FAMILY. 863 */ 864 public static byte[] getCatalogFamily() { 865 return HConstants.CATALOG_FAMILY; 866 } 867 868 /** 869 * Returns the column family used for table columns. 870 * @return HConstants.TABLE_FAMILY. 871 */ 872 private static byte[] getTableFamily() { 873 return HConstants.TABLE_FAMILY; 874 } 875 876 /** 877 * Returns the column qualifier for serialized region info 878 * @return HConstants.REGIONINFO_QUALIFIER 879 */ 880 public static byte[] getRegionInfoColumn() { 881 return HConstants.REGIONINFO_QUALIFIER; 882 } 883 884 /** 885 * Returns the column qualifier for serialized table state 886 * @return HConstants.TABLE_STATE_QUALIFIER 887 */ 888 private static byte[] getTableStateColumn() { 889 return HConstants.TABLE_STATE_QUALIFIER; 890 } 891 892 /** 893 * Returns the column qualifier for serialized region state 894 * @return HConstants.STATE_QUALIFIER 895 */ 896 private static byte[] getRegionStateColumn() { 897 return HConstants.STATE_QUALIFIER; 898 } 899 900 /** 901 * Returns the column qualifier for serialized region state 902 * @param replicaId the replicaId of the region 903 * @return a byte[] for state qualifier 904 */ 905 @VisibleForTesting 906 static byte[] getRegionStateColumn(int replicaId) { 907 return replicaId == 0 ? HConstants.STATE_QUALIFIER 908 : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 909 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 910 } 911 912 /** 913 * Returns the column qualifier for serialized region state 914 * @param replicaId the replicaId of the region 915 * @return a byte[] for sn column qualifier 916 */ 917 public static byte[] getServerNameColumn(int replicaId) { 918 return replicaId == 0 ? HConstants.SERVERNAME_QUALIFIER 919 : Bytes.toBytes(HConstants.SERVERNAME_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 920 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 921 } 922 923 /** 924 * Returns the column qualifier for server column for replicaId 925 * @param replicaId the replicaId of the region 926 * @return a byte[] for server column qualifier 927 */ 928 @VisibleForTesting 929 public static byte[] getServerColumn(int replicaId) { 930 return replicaId == 0 931 ? HConstants.SERVER_QUALIFIER 932 : Bytes.toBytes(HConstants.SERVER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 933 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 934 } 935 936 /** 937 * Returns the column qualifier for server start code column for replicaId 938 * @param replicaId the replicaId of the region 939 * @return a byte[] for server start code column qualifier 940 */ 941 @VisibleForTesting 942 public static byte[] getStartCodeColumn(int replicaId) { 943 return replicaId == 0 944 ? HConstants.STARTCODE_QUALIFIER 945 : Bytes.toBytes(HConstants.STARTCODE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 946 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 947 } 948 949 /** 950 * Returns the column qualifier for seqNum column for replicaId 951 * @param replicaId the replicaId of the region 952 * @return a byte[] for seqNum column qualifier 953 */ 954 @VisibleForTesting 955 public static byte[] getSeqNumColumn(int replicaId) { 956 return replicaId == 0 957 ? HConstants.SEQNUM_QUALIFIER 958 : Bytes.toBytes(HConstants.SEQNUM_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 959 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 960 } 961 962 /** 963 * Parses the replicaId from the server column qualifier. See top of the class javadoc 964 * for the actual meta layout 965 * @param serverColumn the column qualifier 966 * @return an int for the replicaId 967 */ 968 @VisibleForTesting 969 static int parseReplicaIdFromServerColumn(byte[] serverColumn) { 970 String serverStr = Bytes.toString(serverColumn); 971 972 Matcher matcher = SERVER_COLUMN_PATTERN.matcher(serverStr); 973 if (matcher.matches() && matcher.groupCount() > 0) { 974 String group = matcher.group(1); 975 if (group != null && group.length() > 0) { 976 return Integer.parseInt(group.substring(1), 16); 977 } else { 978 return 0; 979 } 980 } 981 return -1; 982 } 983 984 /** 985 * Returns a {@link ServerName} from catalog table {@link Result}. 986 * @param r Result to pull from 987 * @return A ServerName instance or null if necessary fields not found or empty. 988 */ 989 @Nullable 990 @InterfaceAudience.Private // for use by HMaster#getTableRegionRow which is used for testing only 991 public static ServerName getServerName(final Result r, final int replicaId) { 992 byte[] serverColumn = getServerColumn(replicaId); 993 Cell cell = r.getColumnLatestCell(getCatalogFamily(), serverColumn); 994 if (cell == null || cell.getValueLength() == 0) return null; 995 String hostAndPort = Bytes.toString( 996 cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 997 byte[] startcodeColumn = getStartCodeColumn(replicaId); 998 cell = r.getColumnLatestCell(getCatalogFamily(), startcodeColumn); 999 if (cell == null || cell.getValueLength() == 0) return null; 1000 try { 1001 return ServerName.valueOf(hostAndPort, 1002 Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())); 1003 } catch (IllegalArgumentException e) { 1004 LOG.error("Ignoring invalid region for server " + hostAndPort + "; cell=" + cell, e); 1005 return null; 1006 } 1007 } 1008 1009 /** 1010 * Returns the {@link ServerName} from catalog table {@link Result} where the region is 1011 * transitioning on. It should be the same as {@link MetaTableAccessor#getServerName(Result,int)} 1012 * if the server is at OPEN state. 1013 * 1014 * @param r Result to pull the transitioning server name from 1015 * @return A ServerName instance or {@link MetaTableAccessor#getServerName(Result,int)} 1016 * if necessary fields not found or empty. 1017 */ 1018 @Nullable 1019 public static ServerName getTargetServerName(final Result r, final int replicaId) { 1020 final Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, 1021 getServerNameColumn(replicaId)); 1022 if (cell == null || cell.getValueLength() == 0) { 1023 RegionLocations locations = MetaTableAccessor.getRegionLocations(r); 1024 if (locations != null) { 1025 HRegionLocation location = locations.getRegionLocation(replicaId); 1026 if (location != null) { 1027 return location.getServerName(); 1028 } 1029 } 1030 return null; 1031 } 1032 return ServerName.parseServerName(Bytes.toString(cell.getValueArray(), cell.getValueOffset(), 1033 cell.getValueLength())); 1034 } 1035 1036 /** 1037 * The latest seqnum that the server writing to meta observed when opening the region. 1038 * E.g. the seqNum when the result of {@link #getServerName(Result, int)} was written. 1039 * @param r Result to pull the seqNum from 1040 * @return SeqNum, or HConstants.NO_SEQNUM if there's no value written. 1041 */ 1042 private static long getSeqNumDuringOpen(final Result r, final int replicaId) { 1043 Cell cell = r.getColumnLatestCell(getCatalogFamily(), getSeqNumColumn(replicaId)); 1044 if (cell == null || cell.getValueLength() == 0) return HConstants.NO_SEQNUM; 1045 return Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 1046 } 1047 1048 /** 1049 * Returns the daughter regions by reading the corresponding columns of the catalog table 1050 * Result. 1051 * @param data a Result object from the catalog table scan 1052 * @return pair of RegionInfo or PairOfSameType(null, null) if region is not a split parent 1053 */ 1054 public static PairOfSameType<RegionInfo> getDaughterRegions(Result data) { 1055 RegionInfo splitA = getRegionInfo(data, HConstants.SPLITA_QUALIFIER); 1056 RegionInfo splitB = getRegionInfo(data, HConstants.SPLITB_QUALIFIER); 1057 return new PairOfSameType<>(splitA, splitB); 1058 } 1059 1060 /** 1061 * Returns an HRegionLocationList extracted from the result. 1062 * @return an HRegionLocationList containing all locations for the region range or null if 1063 * we can't deserialize the result. 1064 */ 1065 @Nullable 1066 public static RegionLocations getRegionLocations(final Result r) { 1067 if (r == null) return null; 1068 RegionInfo regionInfo = getRegionInfo(r, getRegionInfoColumn()); 1069 if (regionInfo == null) return null; 1070 1071 List<HRegionLocation> locations = new ArrayList<>(1); 1072 NavigableMap<byte[],NavigableMap<byte[],byte[]>> familyMap = r.getNoVersionMap(); 1073 1074 locations.add(getRegionLocation(r, regionInfo, 0)); 1075 1076 NavigableMap<byte[], byte[]> infoMap = familyMap.get(getCatalogFamily()); 1077 if (infoMap == null) return new RegionLocations(locations); 1078 1079 // iterate until all serverName columns are seen 1080 int replicaId = 0; 1081 byte[] serverColumn = getServerColumn(replicaId); 1082 SortedMap<byte[], byte[]> serverMap; 1083 serverMap = infoMap.tailMap(serverColumn, false); 1084 1085 if (serverMap.isEmpty()) return new RegionLocations(locations); 1086 1087 for (Map.Entry<byte[], byte[]> entry : serverMap.entrySet()) { 1088 replicaId = parseReplicaIdFromServerColumn(entry.getKey()); 1089 if (replicaId < 0) { 1090 break; 1091 } 1092 HRegionLocation location = getRegionLocation(r, regionInfo, replicaId); 1093 // In case the region replica is newly created, it's location might be null. We usually do not 1094 // have HRL's in RegionLocations object with null ServerName. They are handled as null HRLs. 1095 if (location.getServerName() == null) { 1096 locations.add(null); 1097 } else { 1098 locations.add(location); 1099 } 1100 } 1101 1102 return new RegionLocations(locations); 1103 } 1104 1105 /** 1106 * Returns the HRegionLocation parsed from the given meta row Result 1107 * for the given regionInfo and replicaId. The regionInfo can be the default region info 1108 * for the replica. 1109 * @param r the meta row result 1110 * @param regionInfo RegionInfo for default replica 1111 * @param replicaId the replicaId for the HRegionLocation 1112 * @return HRegionLocation parsed from the given meta row Result for the given replicaId 1113 */ 1114 private static HRegionLocation getRegionLocation(final Result r, final RegionInfo regionInfo, 1115 final int replicaId) { 1116 ServerName serverName = getServerName(r, replicaId); 1117 long seqNum = getSeqNumDuringOpen(r, replicaId); 1118 RegionInfo replicaInfo = RegionReplicaUtil.getRegionInfoForReplica(regionInfo, replicaId); 1119 return new HRegionLocation(replicaInfo, serverName, seqNum); 1120 } 1121 1122 /** 1123 * Returns RegionInfo object from the column 1124 * HConstants.CATALOG_FAMILY:HConstants.REGIONINFO_QUALIFIER of the catalog 1125 * table Result. 1126 * @param data a Result object from the catalog table scan 1127 * @return RegionInfo or null 1128 */ 1129 public static RegionInfo getRegionInfo(Result data) { 1130 return getRegionInfo(data, HConstants.REGIONINFO_QUALIFIER); 1131 } 1132 1133 /** 1134 * Returns the RegionInfo object from the column {@link HConstants#CATALOG_FAMILY} and 1135 * <code>qualifier</code> of the catalog table result. 1136 * @param r a Result object from the catalog table scan 1137 * @param qualifier Column family qualifier 1138 * @return An RegionInfo instance or null. 1139 */ 1140 @Nullable 1141 public static RegionInfo getRegionInfo(final Result r, byte [] qualifier) { 1142 Cell cell = r.getColumnLatestCell(getCatalogFamily(), qualifier); 1143 if (cell == null) return null; 1144 return RegionInfo.parseFromOrNull(cell.getValueArray(), 1145 cell.getValueOffset(), cell.getValueLength()); 1146 } 1147 1148 /** 1149 * Fetch table state for given table from META table 1150 * @param conn connection to use 1151 * @param tableName table to fetch state for 1152 */ 1153 @Nullable 1154 public static TableState getTableState(Connection conn, TableName tableName) 1155 throws IOException { 1156 if (tableName.equals(TableName.META_TABLE_NAME)) { 1157 return new TableState(tableName, TableState.State.ENABLED); 1158 } 1159 Table metaHTable = getMetaHTable(conn); 1160 Get get = new Get(tableName.getName()).addColumn(getTableFamily(), getTableStateColumn()); 1161 Result result = metaHTable.get(get); 1162 return getTableState(result); 1163 } 1164 1165 /** 1166 * Fetch table states from META table 1167 * @param conn connection to use 1168 * @return map {tableName -> state} 1169 */ 1170 public static Map<TableName, TableState> getTableStates(Connection conn) 1171 throws IOException { 1172 final Map<TableName, TableState> states = new LinkedHashMap<>(); 1173 Visitor collector = r -> { 1174 TableState state = getTableState(r); 1175 if (state != null) { 1176 states.put(state.getTableName(), state); 1177 } 1178 return true; 1179 }; 1180 fullScanTables(conn, collector); 1181 return states; 1182 } 1183 1184 /** 1185 * Updates state in META 1186 * Do not use. For internal use only. 1187 * @param conn connection to use 1188 * @param tableName table to look for 1189 */ 1190 public static void updateTableState(Connection conn, TableName tableName, 1191 TableState.State actual) throws IOException { 1192 updateTableState(conn, new TableState(tableName, actual)); 1193 } 1194 1195 /** 1196 * Decode table state from META Result. 1197 * Should contain cell from HConstants.TABLE_FAMILY 1198 * @return null if not found 1199 */ 1200 @Nullable 1201 public static TableState getTableState(Result r) throws IOException { 1202 Cell cell = r.getColumnLatestCell(getTableFamily(), getTableStateColumn()); 1203 if (cell == null) { 1204 return null; 1205 } 1206 try { 1207 return TableState.parseFrom(TableName.valueOf(r.getRow()), 1208 Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), 1209 cell.getValueOffset() + cell.getValueLength())); 1210 } catch (DeserializationException e) { 1211 throw new IOException(e); 1212 } 1213 } 1214 1215 /** 1216 * Implementations 'visit' a catalog table row. 1217 */ 1218 public interface Visitor { 1219 /** 1220 * Visit the catalog table row. 1221 * @param r A row from catalog table 1222 * @return True if we are to proceed scanning the table, else false if 1223 * we are to stop now. 1224 */ 1225 boolean visit(final Result r) throws IOException; 1226 } 1227 1228 /** 1229 * Implementations 'visit' a catalog table row but with close() at the end. 1230 */ 1231 public interface CloseableVisitor extends Visitor, Closeable { 1232 } 1233 1234 /** 1235 * A {@link Visitor} that collects content out of passed {@link Result}. 1236 */ 1237 static abstract class CollectingVisitor<T> implements Visitor { 1238 final List<T> results = new ArrayList<>(); 1239 @Override 1240 public boolean visit(Result r) throws IOException { 1241 if (r != null && !r.isEmpty()) { 1242 add(r); 1243 } 1244 return true; 1245 } 1246 1247 abstract void add(Result r); 1248 1249 /** 1250 * @return Collected results; wait till visits complete to collect all 1251 * possible results 1252 */ 1253 List<T> getResults() { 1254 return this.results; 1255 } 1256 } 1257 1258 /** 1259 * Collects all returned. 1260 */ 1261 static class CollectAllVisitor extends CollectingVisitor<Result> { 1262 @Override 1263 void add(Result r) { 1264 this.results.add(r); 1265 } 1266 } 1267 1268 /** 1269 * A Visitor that skips offline regions and split parents 1270 */ 1271 public static abstract class DefaultVisitorBase implements Visitor { 1272 1273 DefaultVisitorBase() { 1274 super(); 1275 } 1276 1277 public abstract boolean visitInternal(Result rowResult) throws IOException; 1278 1279 @Override 1280 public boolean visit(Result rowResult) throws IOException { 1281 RegionInfo info = getRegionInfo(rowResult); 1282 if (info == null) { 1283 return true; 1284 } 1285 1286 //skip over offline and split regions 1287 if (!(info.isOffline() || info.isSplit())) { 1288 return visitInternal(rowResult); 1289 } 1290 return true; 1291 } 1292 } 1293 1294 /** 1295 * A Visitor for a table. Provides a consistent view of the table's 1296 * hbase:meta entries during concurrent splits (see HBASE-5986 for details). This class 1297 * does not guarantee ordered traversal of meta entries, and can block until the 1298 * hbase:meta entries for daughters are available during splits. 1299 */ 1300 public static abstract class TableVisitorBase extends DefaultVisitorBase { 1301 private TableName tableName; 1302 1303 public TableVisitorBase(TableName tableName) { 1304 super(); 1305 this.tableName = tableName; 1306 } 1307 1308 @Override 1309 public final boolean visit(Result rowResult) throws IOException { 1310 RegionInfo info = getRegionInfo(rowResult); 1311 if (info == null) { 1312 return true; 1313 } 1314 if (!(info.getTable().equals(tableName))) { 1315 return false; 1316 } 1317 return super.visit(rowResult); 1318 } 1319 } 1320 1321 /** 1322 * Count regions in <code>hbase:meta</code> for passed table. 1323 * @param c Configuration object 1324 * @param tableName table name to count regions for 1325 * @return Count or regions in table <code>tableName</code> 1326 */ 1327 public static int getRegionCount(final Configuration c, final TableName tableName) 1328 throws IOException { 1329 try (Connection connection = ConnectionFactory.createConnection(c)) { 1330 return getRegionCount(connection, tableName); 1331 } 1332 } 1333 1334 /** 1335 * Count regions in <code>hbase:meta</code> for passed table. 1336 * @param connection Connection object 1337 * @param tableName table name to count regions for 1338 * @return Count or regions in table <code>tableName</code> 1339 */ 1340 public static int getRegionCount(final Connection connection, final TableName tableName) 1341 throws IOException { 1342 try (RegionLocator locator = connection.getRegionLocator(tableName)) { 1343 List<HRegionLocation> locations = locator.getAllRegionLocations(); 1344 return locations == null ? 0 : locations.size(); 1345 } 1346 } 1347 1348 //////////////////////// 1349 // Editing operations // 1350 //////////////////////// 1351 /** 1352 * Generates and returns a Put containing the region into for the catalog table 1353 */ 1354 public static Put makePutFromRegionInfo(RegionInfo regionInfo, long ts) throws IOException { 1355 return addRegionInfo(new Put(regionInfo.getRegionName(), ts), regionInfo); 1356 } 1357 1358 /** 1359 * Generates and returns a Delete containing the region info for the catalog table 1360 */ 1361 private static Delete makeDeleteFromRegionInfo(RegionInfo regionInfo, long ts) { 1362 if (regionInfo == null) { 1363 throw new IllegalArgumentException("Can't make a delete for null region"); 1364 } 1365 Delete delete = new Delete(regionInfo.getRegionName()); 1366 delete.addFamily(getCatalogFamily(), ts); 1367 return delete; 1368 } 1369 1370 /** 1371 * Adds split daughters to the Put 1372 */ 1373 private static Put addDaughtersToPut(Put put, RegionInfo splitA, RegionInfo splitB) 1374 throws IOException { 1375 if (splitA != null) { 1376 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1377 .setRow(put.getRow()) 1378 .setFamily(HConstants.CATALOG_FAMILY) 1379 .setQualifier(HConstants.SPLITA_QUALIFIER) 1380 .setTimestamp(put.getTimestamp()) 1381 .setType(Type.Put) 1382 .setValue(RegionInfo.toByteArray(splitA)) 1383 .build()); 1384 } 1385 if (splitB != null) { 1386 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1387 .setRow(put.getRow()) 1388 .setFamily(HConstants.CATALOG_FAMILY) 1389 .setQualifier(HConstants.SPLITB_QUALIFIER) 1390 .setTimestamp(put.getTimestamp()) 1391 .setType(Type.Put) 1392 .setValue(RegionInfo.toByteArray(splitB)) 1393 .build()); 1394 } 1395 return put; 1396 } 1397 1398 /** 1399 * Put the passed <code>p</code> to the <code>hbase:meta</code> table. 1400 * @param connection connection we're using 1401 * @param p Put to add to hbase:meta 1402 */ 1403 private static void putToMetaTable(Connection connection, Put p) throws IOException { 1404 try (Table table = getMetaHTable(connection)) { 1405 put(table, p); 1406 } 1407 } 1408 1409 /** 1410 * @param t Table to use 1411 * @param p put to make 1412 */ 1413 private static void put(Table t, Put p) throws IOException { 1414 debugLogMutation(p); 1415 t.put(p); 1416 } 1417 1418 /** 1419 * Put the passed <code>ps</code> to the <code>hbase:meta</code> table. 1420 * @param connection connection we're using 1421 * @param ps Put to add to hbase:meta 1422 */ 1423 public static void putsToMetaTable(final Connection connection, final List<Put> ps) 1424 throws IOException { 1425 if (ps.isEmpty()) { 1426 return; 1427 } 1428 try (Table t = getMetaHTable(connection)) { 1429 debugLogMutations(ps); 1430 // the implementation for putting a single Put is much simpler so here we do a check first. 1431 if (ps.size() == 1) { 1432 t.put(ps.get(0)); 1433 } else { 1434 t.put(ps); 1435 } 1436 } 1437 } 1438 1439 /** 1440 * Delete the passed <code>d</code> from the <code>hbase:meta</code> table. 1441 * @param connection connection we're using 1442 * @param d Delete to add to hbase:meta 1443 */ 1444 private static void deleteFromMetaTable(final Connection connection, final Delete d) 1445 throws IOException { 1446 List<Delete> dels = new ArrayList<>(1); 1447 dels.add(d); 1448 deleteFromMetaTable(connection, dels); 1449 } 1450 1451 /** 1452 * Delete the passed <code>deletes</code> from the <code>hbase:meta</code> table. 1453 * @param connection connection we're using 1454 * @param deletes Deletes to add to hbase:meta This list should support #remove. 1455 */ 1456 private static void deleteFromMetaTable(final Connection connection, final List<Delete> deletes) 1457 throws IOException { 1458 try (Table t = getMetaHTable(connection)) { 1459 debugLogMutations(deletes); 1460 t.delete(deletes); 1461 } 1462 } 1463 1464 /** 1465 * Deletes some replica columns corresponding to replicas for the passed rows 1466 * @param metaRows rows in hbase:meta 1467 * @param replicaIndexToDeleteFrom the replica ID we would start deleting from 1468 * @param numReplicasToRemove how many replicas to remove 1469 * @param connection connection we're using to access meta table 1470 */ 1471 public static void removeRegionReplicasFromMeta(Set<byte[]> metaRows, 1472 int replicaIndexToDeleteFrom, int numReplicasToRemove, Connection connection) 1473 throws IOException { 1474 int absoluteIndex = replicaIndexToDeleteFrom + numReplicasToRemove; 1475 for (byte[] row : metaRows) { 1476 long now = EnvironmentEdgeManager.currentTime(); 1477 Delete deleteReplicaLocations = new Delete(row); 1478 for (int i = replicaIndexToDeleteFrom; i < absoluteIndex; i++) { 1479 deleteReplicaLocations.addColumns(getCatalogFamily(), 1480 getServerColumn(i), now); 1481 deleteReplicaLocations.addColumns(getCatalogFamily(), 1482 getSeqNumColumn(i), now); 1483 deleteReplicaLocations.addColumns(getCatalogFamily(), 1484 getStartCodeColumn(i), now); 1485 deleteReplicaLocations.addColumns(getCatalogFamily(), getServerNameColumn(i), now); 1486 deleteReplicaLocations.addColumns(getCatalogFamily(), getRegionStateColumn(i), now); 1487 } 1488 1489 deleteFromMetaTable(connection, deleteReplicaLocations); 1490 } 1491 } 1492 1493 private static Put addRegionStateToPut(Put put, RegionState.State state) throws IOException { 1494 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1495 .setRow(put.getRow()) 1496 .setFamily(HConstants.CATALOG_FAMILY) 1497 .setQualifier(getRegionStateColumn()) 1498 .setTimestamp(put.getTimestamp()) 1499 .setType(Cell.Type.Put) 1500 .setValue(Bytes.toBytes(state.name())) 1501 .build()); 1502 return put; 1503 } 1504 1505 /** 1506 * Update state column in hbase:meta. 1507 */ 1508 public static void updateRegionState(Connection connection, RegionInfo ri, 1509 RegionState.State state) throws IOException { 1510 Put put = new Put(RegionReplicaUtil.getRegionInfoForDefaultReplica(ri).getRegionName()); 1511 MetaTableAccessor.putsToMetaTable(connection, 1512 Collections.singletonList(addRegionStateToPut(put, state))); 1513 } 1514 1515 /** 1516 * Adds daughter region infos to hbase:meta row for the specified region. Note that this does not 1517 * add its daughter's as different rows, but adds information about the daughters in the same row 1518 * as the parent. Use 1519 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} 1520 * if you want to do that. 1521 * @param connection connection we're using 1522 * @param regionInfo RegionInfo of parent region 1523 * @param splitA first split daughter of the parent regionInfo 1524 * @param splitB second split daughter of the parent regionInfo 1525 * @throws IOException if problem connecting or updating meta 1526 */ 1527 public static void addSplitsToParent(Connection connection, RegionInfo regionInfo, 1528 RegionInfo splitA, RegionInfo splitB) throws IOException { 1529 try (Table meta = getMetaHTable(connection)) { 1530 Put put = makePutFromRegionInfo(regionInfo, EnvironmentEdgeManager.currentTime()); 1531 addDaughtersToPut(put, splitA, splitB); 1532 meta.put(put); 1533 debugLogMutation(put); 1534 LOG.debug("Added region {}", regionInfo.getRegionNameAsString()); 1535 } 1536 } 1537 1538 /** 1539 * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this 1540 * does not add its daughter's as different rows, but adds information about the daughters 1541 * in the same row as the parent. Use 1542 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} 1543 * if you want to do that. 1544 * @param connection connection we're using 1545 * @param regionInfo region information 1546 * @throws IOException if problem connecting or updating meta 1547 */ 1548 @VisibleForTesting 1549 public static void addRegionToMeta(Connection connection, RegionInfo regionInfo) 1550 throws IOException { 1551 addRegionsToMeta(connection, Collections.singletonList(regionInfo), 1); 1552 } 1553 1554 /** 1555 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions 1556 * is CLOSED. 1557 * @param connection connection we're using 1558 * @param regionInfos region information list 1559 * @throws IOException if problem connecting or updating meta 1560 */ 1561 public static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1562 int regionReplication) throws IOException { 1563 addRegionsToMeta(connection, regionInfos, regionReplication, 1564 EnvironmentEdgeManager.currentTime()); 1565 } 1566 1567 /** 1568 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions 1569 * is CLOSED. 1570 * @param connection connection we're using 1571 * @param regionInfos region information list 1572 * @param ts desired timestamp 1573 * @throws IOException if problem connecting or updating meta 1574 */ 1575 private static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1576 int regionReplication, long ts) throws IOException { 1577 List<Put> puts = new ArrayList<>(); 1578 for (RegionInfo regionInfo : regionInfos) { 1579 if (RegionReplicaUtil.isDefaultReplica(regionInfo)) { 1580 Put put = makePutFromRegionInfo(regionInfo, ts); 1581 // New regions are added with initial state of CLOSED. 1582 addRegionStateToPut(put, RegionState.State.CLOSED); 1583 // Add empty locations for region replicas so that number of replicas can be cached 1584 // whenever the primary region is looked up from meta 1585 for (int i = 1; i < regionReplication; i++) { 1586 addEmptyLocation(put, i); 1587 } 1588 puts.add(put); 1589 } 1590 } 1591 putsToMetaTable(connection, puts); 1592 LOG.info("Added {} regions to meta.", puts.size()); 1593 } 1594 1595 static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException { 1596 int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more. 1597 int max = mergeRegions.size(); 1598 if (max > limit) { 1599 // Should never happen!!!!! But just in case. 1600 throw new RuntimeException("Can't merge " + max + " regions in one go; " + limit + 1601 " is upper-limit."); 1602 } 1603 int counter = 0; 1604 for (RegionInfo ri: mergeRegions) { 1605 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++); 1606 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY). 1607 setRow(put.getRow()). 1608 setFamily(HConstants.CATALOG_FAMILY). 1609 setQualifier(Bytes.toBytes(qualifier)). 1610 setTimestamp(put.getTimestamp()). 1611 setType(Type.Put). 1612 setValue(RegionInfo.toByteArray(ri)). 1613 build()); 1614 } 1615 return put; 1616 } 1617 1618 /** 1619 * Merge regions into one in an atomic operation. Deletes the merging regions in 1620 * hbase:meta and adds the merged region. 1621 * @param connection connection we're using 1622 * @param mergedRegion the merged region 1623 * @param parentSeqNum Parent regions to merge and their next open sequence id used 1624 * by serial replication. Set to -1 if not needed by this table. 1625 * @param sn the location of the region 1626 */ 1627 public static void mergeRegions(Connection connection, RegionInfo mergedRegion, 1628 Map<RegionInfo, Long> parentSeqNum, ServerName sn, int regionReplication) 1629 throws IOException { 1630 try (Table meta = getMetaHTable(connection)) { 1631 long time = HConstants.LATEST_TIMESTAMP; 1632 List<Mutation> mutations = new ArrayList<>(); 1633 List<RegionInfo> replicationParents = new ArrayList<>(); 1634 for (Map.Entry<RegionInfo, Long> e: parentSeqNum.entrySet()) { 1635 RegionInfo ri = e.getKey(); 1636 long seqNum = e.getValue(); 1637 // Deletes for merging regions 1638 mutations.add(makeDeleteFromRegionInfo(ri, time)); 1639 if (seqNum > 0) { 1640 mutations.add(makePutForReplicationBarrier(ri, seqNum, time)); 1641 replicationParents.add(ri); 1642 } 1643 } 1644 // Put for parent 1645 Put putOfMerged = makePutFromRegionInfo(mergedRegion, time); 1646 putOfMerged = addMergeRegions(putOfMerged, parentSeqNum.keySet()); 1647 // Set initial state to CLOSED. 1648 // NOTE: If initial state is not set to CLOSED then merged region gets added with the 1649 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1650 // master tries to assign this offline region. This is followed by re-assignments of the 1651 // merged region from resumed {@link MergeTableRegionsProcedure} 1652 addRegionStateToPut(putOfMerged, RegionState.State.CLOSED); 1653 mutations.add(putOfMerged); 1654 // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null 1655 // if crash after merge happened but before we got to here.. means in-memory 1656 // locations of offlined merged, now-closed, regions is lost. Should be ok. We 1657 // assign the merged region later. 1658 if (sn != null) { 1659 addLocation(putOfMerged, sn, 1, mergedRegion.getReplicaId()); 1660 } 1661 1662 // Add empty locations for region replicas of the merged region so that number of replicas 1663 // can be cached whenever the primary region is looked up from meta 1664 for (int i = 1; i < regionReplication; i++) { 1665 addEmptyLocation(putOfMerged, i); 1666 } 1667 // add parent reference for serial replication 1668 if (!replicationParents.isEmpty()) { 1669 addReplicationParent(putOfMerged, replicationParents); 1670 } 1671 byte[] tableRow = Bytes.toBytes(mergedRegion.getRegionNameAsString() + HConstants.DELIMITER); 1672 multiMutate(meta, tableRow, mutations); 1673 } 1674 } 1675 1676 /** 1677 * Splits the region into two in an atomic operation. Offlines the parent region with the 1678 * information that it is split into two, and also adds the daughter regions. Does not add the 1679 * location information to the daughter regions since they are not open yet. 1680 * @param connection connection we're using 1681 * @param parent the parent region which is split 1682 * @param parentOpenSeqNum the next open sequence id for parent region, used by serial 1683 * replication. -1 if not necessary. 1684 * @param splitA Split daughter region A 1685 * @param splitB Split daughter region B 1686 * @param sn the location of the region 1687 */ 1688 public static void splitRegion(Connection connection, RegionInfo parent, long parentOpenSeqNum, 1689 RegionInfo splitA, RegionInfo splitB, ServerName sn, int regionReplication) 1690 throws IOException { 1691 try (Table meta = getMetaHTable(connection)) { 1692 long time = EnvironmentEdgeManager.currentTime(); 1693 // Put for parent 1694 Put putParent = makePutFromRegionInfo(RegionInfoBuilder.newBuilder(parent) 1695 .setOffline(true) 1696 .setSplit(true).build(), time); 1697 addDaughtersToPut(putParent, splitA, splitB); 1698 1699 // Puts for daughters 1700 Put putA = makePutFromRegionInfo(splitA, time); 1701 Put putB = makePutFromRegionInfo(splitB, time); 1702 if (parentOpenSeqNum > 0) { 1703 addReplicationBarrier(putParent, parentOpenSeqNum); 1704 addReplicationParent(putA, Collections.singletonList(parent)); 1705 addReplicationParent(putB, Collections.singletonList(parent)); 1706 } 1707 // Set initial state to CLOSED 1708 // NOTE: If initial state is not set to CLOSED then daughter regions get added with the 1709 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1710 // master tries to assign these offline regions. This is followed by re-assignments of the 1711 // daughter regions from resumed {@link SplitTableRegionProcedure} 1712 addRegionStateToPut(putA, RegionState.State.CLOSED); 1713 addRegionStateToPut(putB, RegionState.State.CLOSED); 1714 1715 addSequenceNum(putA, 1, splitA.getReplicaId()); // new regions, openSeqNum = 1 is fine. 1716 addSequenceNum(putB, 1, splitB.getReplicaId()); 1717 1718 // Add empty locations for region replicas of daughters so that number of replicas can be 1719 // cached whenever the primary region is looked up from meta 1720 for (int i = 1; i < regionReplication; i++) { 1721 addEmptyLocation(putA, i); 1722 addEmptyLocation(putB, i); 1723 } 1724 1725 byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER); 1726 multiMutate(meta, tableRow, putParent, putA, putB); 1727 } 1728 } 1729 1730 /** 1731 * Update state of the table in meta. 1732 * @param connection what we use for update 1733 * @param state new state 1734 */ 1735 private static void updateTableState(Connection connection, TableState state) throws IOException { 1736 Put put = makePutFromTableState(state, EnvironmentEdgeManager.currentTime()); 1737 putToMetaTable(connection, put); 1738 LOG.info("Updated {} in hbase:meta", state); 1739 } 1740 1741 /** 1742 * Construct PUT for given state 1743 * @param state new state 1744 */ 1745 public static Put makePutFromTableState(TableState state, long ts) { 1746 Put put = new Put(state.getTableName().getName(), ts); 1747 put.addColumn(getTableFamily(), getTableStateColumn(), state.convert().toByteArray()); 1748 return put; 1749 } 1750 1751 /** 1752 * Remove state for table from meta 1753 * @param connection to use for deletion 1754 * @param table to delete state for 1755 */ 1756 public static void deleteTableState(Connection connection, TableName table) 1757 throws IOException { 1758 long time = EnvironmentEdgeManager.currentTime(); 1759 Delete delete = new Delete(table.getName()); 1760 delete.addColumns(getTableFamily(), getTableStateColumn(), time); 1761 deleteFromMetaTable(connection, delete); 1762 LOG.info("Deleted table " + table + " state from META"); 1763 } 1764 1765 private static void multiMutate(Table table, byte[] row, 1766 Mutation... mutations) throws IOException { 1767 multiMutate(table, row, Arrays.asList(mutations)); 1768 } 1769 1770 /** 1771 * Performs an atomic multi-mutate operation against the given table. Used by the likes of 1772 * merge and split as these want to make atomic mutations across multiple rows. 1773 * @throws IOException even if we encounter a RuntimeException, we'll still wrap it in an IOE. 1774 */ 1775 @VisibleForTesting 1776 static void multiMutate(final Table table, byte[] row, final List<Mutation> mutations) 1777 throws IOException { 1778 debugLogMutations(mutations); 1779 Batch.Call<MultiRowMutationService, MutateRowsResponse> callable = instance -> { 1780 MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder(); 1781 for (Mutation mutation : mutations) { 1782 if (mutation instanceof Put) { 1783 builder.addMutationRequest( 1784 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.PUT, mutation)); 1785 } else if (mutation instanceof Delete) { 1786 builder.addMutationRequest( 1787 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.DELETE, mutation)); 1788 } else { 1789 throw new DoNotRetryIOException( 1790 "multi in MetaEditor doesn't support " + mutation.getClass().getName()); 1791 } 1792 } 1793 ServerRpcController controller = new ServerRpcController(); 1794 CoprocessorRpcUtils.BlockingRpcCallback<MutateRowsResponse> rpcCallback = 1795 new CoprocessorRpcUtils.BlockingRpcCallback<>(); 1796 instance.mutateRows(controller, builder.build(), rpcCallback); 1797 MutateRowsResponse resp = rpcCallback.get(); 1798 if (controller.failedOnException()) { 1799 throw controller.getFailedOn(); 1800 } 1801 return resp; 1802 }; 1803 try { 1804 table.coprocessorService(MultiRowMutationService.class, row, row, callable); 1805 } catch (Throwable e) { 1806 // Throw if an IOE else wrap in an IOE EVEN IF IT IS a RuntimeException (e.g. 1807 // a RejectedExecutionException because the hosting exception is shutting down. 1808 // This is old behavior worth reexamining. Procedures doing merge or split 1809 // currently don't handle RuntimeExceptions coming up out of meta table edits. 1810 // Would have to work on this at least. See HBASE-23904. 1811 Throwables.throwIfInstanceOf(e, IOException.class); 1812 throw new IOException(e); 1813 } 1814 } 1815 1816 /** 1817 * Updates the location of the specified region in hbase:meta to be the specified server hostname 1818 * and startcode. 1819 * <p> 1820 * Uses passed catalog tracker to get a connection to the server hosting hbase:meta and makes 1821 * edits to that region. 1822 * @param connection connection we're using 1823 * @param regionInfo region to update location of 1824 * @param openSeqNum the latest sequence number obtained when the region was open 1825 * @param sn Server name 1826 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1827 */ 1828 @VisibleForTesting 1829 public static void updateRegionLocation(Connection connection, RegionInfo regionInfo, 1830 ServerName sn, long openSeqNum, long masterSystemTime) throws IOException { 1831 updateLocation(connection, regionInfo, sn, openSeqNum, masterSystemTime); 1832 } 1833 1834 /** 1835 * Updates the location of the specified region to be the specified server. 1836 * <p> 1837 * Connects to the specified server which should be hosting the specified catalog region name to 1838 * perform the edit. 1839 * @param connection connection we're using 1840 * @param regionInfo region to update location of 1841 * @param sn Server name 1842 * @param openSeqNum the latest sequence number obtained when the region was open 1843 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1844 * @throws IOException In particular could throw {@link java.net.ConnectException} if the server 1845 * is down on other end. 1846 */ 1847 private static void updateLocation(Connection connection, RegionInfo regionInfo, ServerName sn, 1848 long openSeqNum, long masterSystemTime) throws IOException { 1849 // region replicas are kept in the primary region's row 1850 Put put = new Put(getMetaKeyForRegion(regionInfo), masterSystemTime); 1851 addRegionInfo(put, regionInfo); 1852 addLocation(put, sn, openSeqNum, regionInfo.getReplicaId()); 1853 putToMetaTable(connection, put); 1854 LOG.info("Updated row {} with server=", regionInfo.getRegionNameAsString(), sn); 1855 } 1856 1857 /** 1858 * Deletes the specified region from META. 1859 * @param connection connection we're using 1860 * @param regionInfo region to be deleted from META 1861 */ 1862 public static void deleteRegionInfo(Connection connection, RegionInfo regionInfo) 1863 throws IOException { 1864 Delete delete = new Delete(regionInfo.getRegionName()); 1865 delete.addFamily(getCatalogFamily(), HConstants.LATEST_TIMESTAMP); 1866 deleteFromMetaTable(connection, delete); 1867 LOG.info("Deleted " + regionInfo.getRegionNameAsString()); 1868 } 1869 1870 /** 1871 * Deletes the specified regions from META. 1872 * @param connection connection we're using 1873 * @param regionsInfo list of regions to be deleted from META 1874 */ 1875 public static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo) 1876 throws IOException { 1877 deleteRegionInfos(connection, regionsInfo, EnvironmentEdgeManager.currentTime()); 1878 } 1879 1880 /** 1881 * Deletes the specified regions from META. 1882 * @param connection connection we're using 1883 * @param regionsInfo list of regions to be deleted from META 1884 */ 1885 private static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo, 1886 long ts) 1887 throws IOException { 1888 List<Delete> deletes = new ArrayList<>(regionsInfo.size()); 1889 for (RegionInfo hri : regionsInfo) { 1890 Delete e = new Delete(hri.getRegionName()); 1891 e.addFamily(getCatalogFamily(), ts); 1892 deletes.add(e); 1893 } 1894 deleteFromMetaTable(connection, deletes); 1895 LOG.info("Deleted {} regions from META", regionsInfo.size()); 1896 LOG.debug("Deleted regions: {}", regionsInfo); 1897 } 1898 1899 /** 1900 * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and 1901 * adds new ones. Regions added back have state CLOSED. 1902 * @param connection connection we're using 1903 * @param regionInfos list of regions to be added to META 1904 */ 1905 public static void overwriteRegions(Connection connection, List<RegionInfo> regionInfos, 1906 int regionReplication) throws IOException { 1907 // use master time for delete marker and the Put 1908 long now = EnvironmentEdgeManager.currentTime(); 1909 deleteRegionInfos(connection, regionInfos, now); 1910 // Why sleep? This is the easiest way to ensure that the previous deletes does not 1911 // eclipse the following puts, that might happen in the same ts from the server. 1912 // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed, 1913 // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep. 1914 // 1915 // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed 1916 addRegionsToMeta(connection, regionInfos, regionReplication, now + 1); 1917 LOG.info("Overwritten " + regionInfos.size() + " regions to Meta"); 1918 LOG.debug("Overwritten regions: {} ", regionInfos); 1919 } 1920 1921 /** 1922 * Deletes merge qualifiers for the specified merge region. 1923 * @param connection connection we're using 1924 * @param mergeRegion the merged region 1925 */ 1926 public static void deleteMergeQualifiers(Connection connection, final RegionInfo mergeRegion) 1927 throws IOException { 1928 Delete delete = new Delete(mergeRegion.getRegionName()); 1929 // NOTE: We are doing a new hbase:meta read here. 1930 Cell[] cells = getRegionResult(connection, mergeRegion.getRegionName()).rawCells(); 1931 if (cells == null || cells.length == 0) { 1932 return; 1933 } 1934 List<byte[]> qualifiers = new ArrayList<>(); 1935 for (Cell cell : cells) { 1936 if (!isMergeQualifierPrefix(cell)) { 1937 continue; 1938 } 1939 byte[] qualifier = CellUtil.cloneQualifier(cell); 1940 qualifiers.add(qualifier); 1941 delete.addColumns(getCatalogFamily(), qualifier, HConstants.LATEST_TIMESTAMP); 1942 } 1943 1944 // There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while 1945 // the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second 1946 // GCMultipleMergedRegionsProcedure could delete the merged region by accident! 1947 if (qualifiers.isEmpty()) { 1948 LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString() + 1949 " in meta table, they are cleaned up already, Skip."); 1950 return; 1951 } 1952 1953 deleteFromMetaTable(connection, delete); 1954 LOG.info("Deleted merge references in " + mergeRegion.getRegionNameAsString() + 1955 ", deleted qualifiers " + qualifiers.stream().map(Bytes::toStringBinary). 1956 collect(Collectors.joining(", "))); 1957 } 1958 1959 public static Put addRegionInfo(final Put p, final RegionInfo hri) 1960 throws IOException { 1961 p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1962 .setRow(p.getRow()) 1963 .setFamily(getCatalogFamily()) 1964 .setQualifier(HConstants.REGIONINFO_QUALIFIER) 1965 .setTimestamp(p.getTimestamp()) 1966 .setType(Type.Put) 1967 // Serialize the Default Replica HRI otherwise scan of hbase:meta 1968 // shows an info:regioninfo value with encoded name and region 1969 // name that differs from that of the hbase;meta row. 1970 .setValue(RegionInfo.toByteArray(RegionReplicaUtil.getRegionInfoForDefaultReplica(hri))) 1971 .build()); 1972 return p; 1973 } 1974 1975 public static Put addLocation(Put p, ServerName sn, long openSeqNum, int replicaId) 1976 throws IOException { 1977 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 1978 return p.add(builder.clear() 1979 .setRow(p.getRow()) 1980 .setFamily(getCatalogFamily()) 1981 .setQualifier(getServerColumn(replicaId)) 1982 .setTimestamp(p.getTimestamp()) 1983 .setType(Cell.Type.Put) 1984 .setValue(Bytes.toBytes(sn.getAddress().toString())) 1985 .build()) 1986 .add(builder.clear() 1987 .setRow(p.getRow()) 1988 .setFamily(getCatalogFamily()) 1989 .setQualifier(getStartCodeColumn(replicaId)) 1990 .setTimestamp(p.getTimestamp()) 1991 .setType(Cell.Type.Put) 1992 .setValue(Bytes.toBytes(sn.getStartcode())) 1993 .build()) 1994 .add(builder.clear() 1995 .setRow(p.getRow()) 1996 .setFamily(getCatalogFamily()) 1997 .setQualifier(getSeqNumColumn(replicaId)) 1998 .setTimestamp(p.getTimestamp()) 1999 .setType(Type.Put) 2000 .setValue(Bytes.toBytes(openSeqNum)) 2001 .build()); 2002 } 2003 2004 private static void writeRegionName(ByteArrayOutputStream out, byte[] regionName) { 2005 for (byte b : regionName) { 2006 if (b == ESCAPE_BYTE) { 2007 out.write(ESCAPE_BYTE); 2008 } 2009 out.write(b); 2010 } 2011 } 2012 2013 @VisibleForTesting 2014 public static byte[] getParentsBytes(List<RegionInfo> parents) { 2015 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 2016 Iterator<RegionInfo> iter = parents.iterator(); 2017 writeRegionName(bos, iter.next().getRegionName()); 2018 while (iter.hasNext()) { 2019 bos.write(ESCAPE_BYTE); 2020 bos.write(SEPARATED_BYTE); 2021 writeRegionName(bos, iter.next().getRegionName()); 2022 } 2023 return bos.toByteArray(); 2024 } 2025 2026 private static List<byte[]> parseParentsBytes(byte[] bytes) { 2027 List<byte[]> parents = new ArrayList<>(); 2028 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 2029 for (int i = 0; i < bytes.length; i++) { 2030 if (bytes[i] == ESCAPE_BYTE) { 2031 i++; 2032 if (bytes[i] == SEPARATED_BYTE) { 2033 parents.add(bos.toByteArray()); 2034 bos.reset(); 2035 continue; 2036 } 2037 // fall through to append the byte 2038 } 2039 bos.write(bytes[i]); 2040 } 2041 if (bos.size() > 0) { 2042 parents.add(bos.toByteArray()); 2043 } 2044 return parents; 2045 } 2046 2047 private static void addReplicationParent(Put put, List<RegionInfo> parents) throws IOException { 2048 byte[] value = getParentsBytes(parents); 2049 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 2050 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(REPLICATION_PARENT_QUALIFIER) 2051 .setTimestamp(put.getTimestamp()).setType(Type.Put).setValue(value).build()); 2052 } 2053 2054 public static Put makePutForReplicationBarrier(RegionInfo regionInfo, long openSeqNum, long ts) 2055 throws IOException { 2056 Put put = new Put(regionInfo.getRegionName(), ts); 2057 addReplicationBarrier(put, openSeqNum); 2058 return put; 2059 } 2060 2061 /** 2062 * See class comment on SerialReplicationChecker 2063 */ 2064 public static void addReplicationBarrier(Put put, long openSeqNum) throws IOException { 2065 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 2066 .setRow(put.getRow()) 2067 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY) 2068 .setQualifier(HConstants.SEQNUM_QUALIFIER) 2069 .setTimestamp(put.getTimestamp()) 2070 .setType(Type.Put) 2071 .setValue(Bytes.toBytes(openSeqNum)) 2072 .build()); 2073 } 2074 2075 private static Put addEmptyLocation(Put p, int replicaId) throws IOException { 2076 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 2077 return p.add(builder.clear() 2078 .setRow(p.getRow()) 2079 .setFamily(getCatalogFamily()) 2080 .setQualifier(getServerColumn(replicaId)) 2081 .setTimestamp(p.getTimestamp()) 2082 .setType(Type.Put) 2083 .build()) 2084 .add(builder.clear() 2085 .setRow(p.getRow()) 2086 .setFamily(getCatalogFamily()) 2087 .setQualifier(getStartCodeColumn(replicaId)) 2088 .setTimestamp(p.getTimestamp()) 2089 .setType(Cell.Type.Put) 2090 .build()) 2091 .add(builder.clear() 2092 .setRow(p.getRow()) 2093 .setFamily(getCatalogFamily()) 2094 .setQualifier(getSeqNumColumn(replicaId)) 2095 .setTimestamp(p.getTimestamp()) 2096 .setType(Cell.Type.Put) 2097 .build()); 2098 } 2099 2100 public static final class ReplicationBarrierResult { 2101 private final long[] barriers; 2102 private final RegionState.State state; 2103 private final List<byte[]> parentRegionNames; 2104 2105 ReplicationBarrierResult(long[] barriers, State state, List<byte[]> parentRegionNames) { 2106 this.barriers = barriers; 2107 this.state = state; 2108 this.parentRegionNames = parentRegionNames; 2109 } 2110 2111 public long[] getBarriers() { 2112 return barriers; 2113 } 2114 2115 public RegionState.State getState() { 2116 return state; 2117 } 2118 2119 public List<byte[]> getParentRegionNames() { 2120 return parentRegionNames; 2121 } 2122 2123 @Override 2124 public String toString() { 2125 return "ReplicationBarrierResult [barriers=" + Arrays.toString(barriers) + ", state=" + 2126 state + ", parentRegionNames=" + 2127 parentRegionNames.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", ")) + 2128 "]"; 2129 } 2130 } 2131 2132 private static long getReplicationBarrier(Cell c) { 2133 return Bytes.toLong(c.getValueArray(), c.getValueOffset(), c.getValueLength()); 2134 } 2135 2136 public static long[] getReplicationBarriers(Result result) { 2137 return result.getColumnCells(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2138 .stream().mapToLong(MetaTableAccessor::getReplicationBarrier).sorted().distinct().toArray(); 2139 } 2140 2141 private static ReplicationBarrierResult getReplicationBarrierResult(Result result) { 2142 long[] barriers = getReplicationBarriers(result); 2143 byte[] stateBytes = result.getValue(getCatalogFamily(), getRegionStateColumn()); 2144 RegionState.State state = 2145 stateBytes != null ? RegionState.State.valueOf(Bytes.toString(stateBytes)) : null; 2146 byte[] parentRegionsBytes = 2147 result.getValue(HConstants.REPLICATION_BARRIER_FAMILY, REPLICATION_PARENT_QUALIFIER); 2148 List<byte[]> parentRegionNames = 2149 parentRegionsBytes != null ? parseParentsBytes(parentRegionsBytes) : Collections.emptyList(); 2150 return new ReplicationBarrierResult(barriers, state, parentRegionNames); 2151 } 2152 2153 public static ReplicationBarrierResult getReplicationBarrierResult(Connection conn, 2154 TableName tableName, byte[] row, byte[] encodedRegionName) throws IOException { 2155 byte[] metaStartKey = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 2156 byte[] metaStopKey = 2157 RegionInfo.createRegionName(tableName, HConstants.EMPTY_START_ROW, "", false); 2158 Scan scan = new Scan().withStartRow(metaStartKey).withStopRow(metaStopKey) 2159 .addColumn(getCatalogFamily(), getRegionStateColumn()) 2160 .addFamily(HConstants.REPLICATION_BARRIER_FAMILY).readAllVersions().setReversed(true) 2161 .setCaching(10); 2162 try (Table table = getMetaHTable(conn); ResultScanner scanner = table.getScanner(scan)) { 2163 for (Result result;;) { 2164 result = scanner.next(); 2165 if (result == null) { 2166 return new ReplicationBarrierResult(new long[0], null, Collections.emptyList()); 2167 } 2168 byte[] regionName = result.getRow(); 2169 // TODO: we may look up a region which has already been split or merged so we need to check 2170 // whether the encoded name matches. Need to find a way to quit earlier when there is no 2171 // record for the given region, for now it will scan to the end of the table. 2172 if (!Bytes.equals(encodedRegionName, 2173 Bytes.toBytes(RegionInfo.encodeRegionName(regionName)))) { 2174 continue; 2175 } 2176 return getReplicationBarrierResult(result); 2177 } 2178 } 2179 } 2180 2181 public static long[] getReplicationBarrier(Connection conn, byte[] regionName) 2182 throws IOException { 2183 try (Table table = getMetaHTable(conn)) { 2184 Result result = table.get(new Get(regionName) 2185 .addColumn(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2186 .readAllVersions()); 2187 return getReplicationBarriers(result); 2188 } 2189 } 2190 2191 public static List<Pair<String, Long>> getTableEncodedRegionNameAndLastBarrier(Connection conn, 2192 TableName tableName) throws IOException { 2193 List<Pair<String, Long>> list = new ArrayList<>(); 2194 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2195 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, r -> { 2196 byte[] value = 2197 r.getValue(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER); 2198 if (value == null) { 2199 return true; 2200 } 2201 long lastBarrier = Bytes.toLong(value); 2202 String encodedRegionName = RegionInfo.encodeRegionName(r.getRow()); 2203 list.add(Pair.newPair(encodedRegionName, lastBarrier)); 2204 return true; 2205 }); 2206 return list; 2207 } 2208 2209 public static List<String> getTableEncodedRegionNamesForSerialReplication(Connection conn, 2210 TableName tableName) throws IOException { 2211 List<String> list = new ArrayList<>(); 2212 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2213 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, 2214 new FirstKeyOnlyFilter(), Integer.MAX_VALUE, r -> { 2215 list.add(RegionInfo.encodeRegionName(r.getRow())); 2216 return true; 2217 }); 2218 return list; 2219 } 2220 2221 private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException { 2222 if (!METALOG.isDebugEnabled()) { 2223 return; 2224 } 2225 // Logging each mutation in separate line makes it easier to see diff between them visually 2226 // because of common starting indentation. 2227 for (Mutation mutation : mutations) { 2228 debugLogMutation(mutation); 2229 } 2230 } 2231 2232 private static void debugLogMutation(Mutation p) throws IOException { 2233 METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON()); 2234 } 2235 2236 private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException { 2237 return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 2238 .setRow(p.getRow()) 2239 .setFamily(HConstants.CATALOG_FAMILY) 2240 .setQualifier(getSeqNumColumn(replicaId)) 2241 .setTimestamp(p.getTimestamp()) 2242 .setType(Type.Put) 2243 .setValue(Bytes.toBytes(openSeqNum)) 2244 .build()); 2245 } 2246}