001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import edu.umd.cs.findbugs.annotations.NonNull; 021import edu.umd.cs.findbugs.annotations.Nullable; 022import java.io.ByteArrayOutputStream; 023import java.io.Closeable; 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.Collections; 029import java.util.Iterator; 030import java.util.LinkedHashMap; 031import java.util.List; 032import java.util.Map; 033import java.util.NavigableMap; 034import java.util.Set; 035import java.util.TreeMap; 036import java.util.SortedMap; 037import java.util.regex.Matcher; 038import java.util.regex.Pattern; 039import java.util.stream.Collectors; 040import org.apache.hadoop.conf.Configuration; 041import org.apache.hadoop.hbase.Cell.Type; 042import org.apache.hadoop.hbase.client.Connection; 043import org.apache.hadoop.hbase.client.ConnectionFactory; 044import org.apache.hadoop.hbase.client.Consistency; 045import org.apache.hadoop.hbase.client.Delete; 046import org.apache.hadoop.hbase.client.Get; 047import org.apache.hadoop.hbase.client.Mutation; 048import org.apache.hadoop.hbase.client.Put; 049import org.apache.hadoop.hbase.client.RegionInfo; 050import org.apache.hadoop.hbase.client.RegionInfoBuilder; 051import org.apache.hadoop.hbase.client.RegionLocator; 052import org.apache.hadoop.hbase.client.RegionReplicaUtil; 053import org.apache.hadoop.hbase.client.Result; 054import org.apache.hadoop.hbase.client.ResultScanner; 055import org.apache.hadoop.hbase.client.Scan; 056import org.apache.hadoop.hbase.client.Table; 057import org.apache.hadoop.hbase.client.TableState; 058import org.apache.hadoop.hbase.client.coprocessor.Batch; 059import org.apache.hadoop.hbase.exceptions.DeserializationException; 060import org.apache.hadoop.hbase.filter.Filter; 061import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; 062import org.apache.hadoop.hbase.filter.RowFilter; 063import org.apache.hadoop.hbase.filter.SubstringComparator; 064import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils; 065import org.apache.hadoop.hbase.ipc.ServerRpcController; 066import org.apache.hadoop.hbase.master.RegionState; 067import org.apache.hadoop.hbase.master.RegionState.State; 068import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 069import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; 070import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MultiRowMutationService; 071import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest; 072import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse; 073import org.apache.hadoop.hbase.util.Bytes; 074import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 075import org.apache.hadoop.hbase.util.ExceptionUtil; 076import org.apache.hadoop.hbase.util.Pair; 077import org.apache.hadoop.hbase.util.PairOfSameType; 078import org.apache.yetus.audience.InterfaceAudience; 079import org.slf4j.Logger; 080import org.slf4j.LoggerFactory; 081 082import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 083import org.apache.hbase.thirdparty.com.google.common.base.Throwables; 084 085/** 086 * <p> 087 * Read/write operations on <code>hbase:meta</code> region as well as assignment information stored 088 * to <code>hbase:meta</code>. 089 * </p> 090 * <p> 091 * Some of the methods of this class take ZooKeeperWatcher as a param. The only reason for this is 092 * when this class is used on client-side (e.g. HBaseAdmin), we want to use short-lived connection 093 * (opened before each operation, closed right after), while when used on HM or HRS (like in 094 * AssignmentManager) we want permanent connection. 095 * </p> 096 * <p> 097 * HBASE-10070 adds a replicaId to HRI, meaning more than one HRI can be defined for the same table 098 * range (table, startKey, endKey). For every range, there will be at least one HRI defined which is 099 * called default replica. 100 * </p> 101 * <p> 102 * <h2>Meta layout</h2> 103 * 104 * <pre> 105 * For each table there is single row named for the table with a 'table' column family. 106 * The column family currently has one column in it, the 'state' column: 107 * 108 * table:state => contains table state 109 * 110 * Then for each table range ('Region'), there is a single row, formatted as: 111 * <tableName>,<startKey>,<regionId>,<encodedRegionName>. 112 * This row is the serialized regionName of the default region replica. 113 * Columns are: 114 * info:regioninfo => contains serialized HRI for the default region replica 115 * info:server => contains hostname:port (in string form) for the server hosting 116 * the default regionInfo replica 117 * info:server_<replicaId> => contains hostname:port (in string form) for the server hosting 118 * the regionInfo replica with replicaId 119 * info:serverstartcode => contains server start code (in binary long form) for the server 120 * hosting the default regionInfo replica 121 * info:serverstartcode_<replicaId> => contains server start code (in binary long form) for 122 * the server hosting the regionInfo replica with 123 * replicaId 124 * info:seqnumDuringOpen => contains seqNum (in binary long form) for the region at the time 125 * the server opened the region with default replicaId 126 * info:seqnumDuringOpen_<replicaId> => contains seqNum (in binary long form) for the region 127 * at the time the server opened the region with 128 * replicaId 129 * info:splitA => contains a serialized HRI for the first daughter region if the 130 * region is split 131 * info:splitB => contains a serialized HRI for the second daughter region if the 132 * region is split 133 * info:merge* => contains a serialized HRI for a merge parent region. There will be two 134 * or more of these columns in a row. A row that has these columns is 135 * undergoing a merge and is the result of the merge. Columns listed 136 * in marge* columns are the parents of this merged region. Example 137 * columns: info:merge0001, info:merge0002. You make also see 'mergeA', 138 * and 'mergeB'. This is old form replaced by the new format that allows 139 * for more than two parents to be merged at a time. 140 * TODO: Add rep_barrier for serial replication explaination. See SerialReplicationChecker. 141 * </pre> 142 * </p> 143 * <p> 144 * The actual layout of meta should be encapsulated inside MetaTableAccessor methods, and should not 145 * leak out of it (through Result objects, etc) 146 * </p> 147 */ 148@InterfaceAudience.Private 149public class MetaTableAccessor { 150 151 private static final Logger LOG = LoggerFactory.getLogger(MetaTableAccessor.class); 152 private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META"); 153 154 @VisibleForTesting 155 public static final byte[] REPLICATION_PARENT_QUALIFIER = Bytes.toBytes("parent"); 156 157 private static final byte ESCAPE_BYTE = (byte) 0xFF; 158 159 private static final byte SEPARATED_BYTE = 0x00; 160 161 @InterfaceAudience.Private 162 public enum QueryType { 163 ALL(HConstants.TABLE_FAMILY, HConstants.CATALOG_FAMILY), 164 REGION(HConstants.CATALOG_FAMILY), 165 TABLE(HConstants.TABLE_FAMILY), 166 REPLICATION(HConstants.REPLICATION_BARRIER_FAMILY); 167 168 private final byte[][] families; 169 170 QueryType(byte[]... families) { 171 this.families = families; 172 } 173 174 byte[][] getFamilies() { 175 return this.families; 176 } 177 } 178 179 /** The delimiter for meta columns for replicaIds > 0 */ 180 static final char META_REPLICA_ID_DELIMITER = '_'; 181 182 /** A regex for parsing server columns from meta. See above javadoc for meta layout */ 183 private static final Pattern SERVER_COLUMN_PATTERN 184 = Pattern.compile("^server(_[0-9a-fA-F]{4})?$"); 185 186 //////////////////////// 187 // Reading operations // 188 //////////////////////// 189 190 /** 191 * Performs a full scan of <code>hbase:meta</code> for regions. 192 * @param connection connection we're using 193 * @param visitor Visitor invoked against each row in regions family. 194 */ 195 public static void fullScanRegions(Connection connection, final Visitor visitor) 196 throws IOException { 197 scanMeta(connection, null, null, QueryType.REGION, visitor); 198 } 199 200 /** 201 * Performs a full scan of <code>hbase:meta</code> for regions. 202 * @param connection connection we're using 203 */ 204 public static List<Result> fullScanRegions(Connection connection) throws IOException { 205 return fullScan(connection, QueryType.REGION); 206 } 207 208 /** 209 * Performs a full scan of <code>hbase:meta</code> for tables. 210 * @param connection connection we're using 211 * @param visitor Visitor invoked against each row in tables family. 212 */ 213 public static void fullScanTables(Connection connection, final Visitor visitor) 214 throws IOException { 215 scanMeta(connection, null, null, QueryType.TABLE, visitor); 216 } 217 218 /** 219 * Performs a full scan of <code>hbase:meta</code>. 220 * @param connection connection we're using 221 * @param type scanned part of meta 222 * @return List of {@link Result} 223 */ 224 private static List<Result> fullScan(Connection connection, QueryType type) throws IOException { 225 CollectAllVisitor v = new CollectAllVisitor(); 226 scanMeta(connection, null, null, type, v); 227 return v.getResults(); 228 } 229 230 /** 231 * Callers should call close on the returned {@link Table} instance. 232 * @param connection connection we're using to access Meta 233 * @return An {@link Table} for <code>hbase:meta</code> 234 */ 235 public static Table getMetaHTable(final Connection connection) 236 throws IOException { 237 // We used to pass whole CatalogTracker in here, now we just pass in Connection 238 if (connection == null) { 239 throw new NullPointerException("No connection"); 240 } else if (connection.isClosed()) { 241 throw new IOException("connection is closed"); 242 } 243 return connection.getTable(TableName.META_TABLE_NAME); 244 } 245 246 /** 247 * @param t Table to use (will be closed when done). 248 * @param g Get to run 249 */ 250 private static Result get(final Table t, final Get g) throws IOException { 251 if (t == null) return null; 252 try { 253 return t.get(g); 254 } finally { 255 t.close(); 256 } 257 } 258 259 /** 260 * Gets the region info and assignment for the specified region. 261 * @param connection connection we're using 262 * @param regionName Region to lookup. 263 * @return Location and RegionInfo for <code>regionName</code> 264 * @deprecated use {@link #getRegionLocation(Connection, byte[])} instead 265 */ 266 @Deprecated 267 public static Pair<RegionInfo, ServerName> getRegion(Connection connection, byte [] regionName) 268 throws IOException { 269 HRegionLocation location = getRegionLocation(connection, regionName); 270 return location == null 271 ? null 272 : new Pair<>(location.getRegionInfo(), location.getServerName()); 273 } 274 275 /** 276 * Returns the HRegionLocation from meta for the given region 277 * @param connection connection we're using 278 * @param regionName region we're looking for 279 * @return HRegionLocation for the given region 280 */ 281 public static HRegionLocation getRegionLocation(Connection connection, byte[] regionName) 282 throws IOException { 283 byte[] row = regionName; 284 RegionInfo parsedInfo = null; 285 try { 286 parsedInfo = parseRegionInfoFromRegionName(regionName); 287 row = getMetaKeyForRegion(parsedInfo); 288 } catch (Exception parseEx) { 289 // Ignore. This is used with tableName passed as regionName. 290 } 291 Get get = new Get(row); 292 get.addFamily(HConstants.CATALOG_FAMILY); 293 Result r = get(getMetaHTable(connection), get); 294 RegionLocations locations = getRegionLocations(r); 295 return locations == null ? null 296 : locations.getRegionLocation(parsedInfo == null ? 0 : parsedInfo.getReplicaId()); 297 } 298 299 /** 300 * Returns the HRegionLocation from meta for the given region 301 * @param connection connection we're using 302 * @param regionInfo region information 303 * @return HRegionLocation for the given region 304 */ 305 public static HRegionLocation getRegionLocation(Connection connection, RegionInfo regionInfo) 306 throws IOException { 307 byte[] row = getMetaKeyForRegion(regionInfo); 308 Get get = new Get(row); 309 get.addFamily(HConstants.CATALOG_FAMILY); 310 Result r = get(getMetaHTable(connection), get); 311 return getRegionLocation(r, regionInfo, regionInfo.getReplicaId()); 312 } 313 314 /** Returns the row key to use for this regionInfo */ 315 public static byte[] getMetaKeyForRegion(RegionInfo regionInfo) { 316 return RegionReplicaUtil.getRegionInfoForDefaultReplica(regionInfo).getRegionName(); 317 } 318 319 /** Returns an HRI parsed from this regionName. Not all the fields of the HRI 320 * is stored in the name, so the returned object should only be used for the fields 321 * in the regionName. 322 */ 323 public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException { 324 byte[][] fields = RegionInfo.parseRegionName(regionName); 325 long regionId = Long.parseLong(Bytes.toString(fields[2])); 326 int replicaId = fields.length > 3 ? Integer.parseInt(Bytes.toString(fields[3]), 16) : 0; 327 return RegionInfoBuilder.newBuilder(TableName.valueOf(fields[0])) 328 .setStartKey(fields[1]) 329 .setEndKey(fields[2]) 330 .setSplit(false) 331 .setRegionId(regionId) 332 .setReplicaId(replicaId) 333 .build(); 334 } 335 336 /** 337 * Gets the result in hbase:meta for the specified region. 338 * @param connection connection we're using 339 * @param regionName region we're looking for 340 * @return result of the specified region 341 */ 342 public static Result getRegionResult(Connection connection, 343 byte[] regionName) throws IOException { 344 Get get = new Get(regionName); 345 get.addFamily(HConstants.CATALOG_FAMILY); 346 return get(getMetaHTable(connection), get); 347 } 348 349 /** 350 * Scans META table for a row whose key contains the specified <B>regionEncodedName</B>, 351 * returning a single related <code>Result</code> instance if any row is found, null otherwise. 352 * 353 * @param connection the connection to query META table. 354 * @param regionEncodedName the region encoded name to look for at META. 355 * @return <code>Result</code> instance with the row related info in META, null otherwise. 356 * @throws IOException if any errors occur while querying META. 357 */ 358 public static Result scanByRegionEncodedName(Connection connection, 359 String regionEncodedName) throws IOException { 360 RowFilter rowFilter = new RowFilter(CompareOperator.EQUAL, 361 new SubstringComparator(regionEncodedName)); 362 Scan scan = getMetaScan(connection, 1); 363 scan.setFilter(rowFilter); 364 ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan); 365 return resultScanner.next(); 366 } 367 368 /** 369 * @return Return all regioninfos listed in the 'info:merge*' columns of 370 * the <code>regionName</code> row. 371 */ 372 @Nullable 373 public static List<RegionInfo> getMergeRegions(Connection connection, byte[] regionName) 374 throws IOException { 375 return getMergeRegions(getRegionResult(connection, regionName).rawCells()); 376 } 377 378 /** 379 * @return Deserialized regioninfo values taken from column values that match 380 * the regex 'info:merge.*' in array of <code>cells</code>. 381 */ 382 @Nullable 383 public static List<RegionInfo> getMergeRegions(Cell [] cells) { 384 if (cells == null) { 385 return null; 386 } 387 List<RegionInfo> regionsToMerge = null; 388 for (Cell cell: cells) { 389 if (!isMergeQualifierPrefix(cell)) { 390 continue; 391 } 392 // Ok. This cell is that of a info:merge* column. 393 RegionInfo ri = RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(), 394 cell.getValueLength()); 395 if (ri != null) { 396 if (regionsToMerge == null) { 397 regionsToMerge = new ArrayList<>(); 398 } 399 regionsToMerge.add(ri); 400 } 401 } 402 return regionsToMerge; 403 } 404 405 /** 406 * @return True if any merge regions present in <code>cells</code>; i.e. 407 * the column in <code>cell</code> matches the regex 'info:merge.*'. 408 */ 409 public static boolean hasMergeRegions(Cell [] cells) { 410 for (Cell cell: cells) { 411 if (!isMergeQualifierPrefix(cell)) { 412 continue; 413 } 414 return true; 415 } 416 return false; 417 } 418 419 /** 420 * @return True if the column in <code>cell</code> matches the regex 'info:merge.*'. 421 */ 422 private static boolean isMergeQualifierPrefix(Cell cell) { 423 // Check to see if has family and that qualifier starts with the merge qualifier 'merge' 424 return CellUtil.matchingFamily(cell, HConstants.CATALOG_FAMILY) && 425 PrivateCellUtil.qualifierStartsWith(cell, HConstants.MERGE_QUALIFIER_PREFIX); 426 } 427 428 /** 429 * Checks if the specified table exists. Looks at the hbase:meta table hosted on 430 * the specified server. 431 * @param connection connection we're using 432 * @param tableName table to check 433 * @return true if the table exists in meta, false if not 434 */ 435 public static boolean tableExists(Connection connection, 436 final TableName tableName) 437 throws IOException { 438 // Catalog tables always exist. 439 return tableName.equals(TableName.META_TABLE_NAME) || 440 getTableState(connection, tableName) != null; 441 } 442 443 /** 444 * Lists all of the regions currently in META. 445 * 446 * @param connection to connect with 447 * @param excludeOfflinedSplitParents False if we are to include offlined/splitparents regions, 448 * true and we'll leave out offlined regions from returned list 449 * @return List of all user-space regions. 450 */ 451 @VisibleForTesting 452 public static List<RegionInfo> getAllRegions(Connection connection, 453 boolean excludeOfflinedSplitParents) 454 throws IOException { 455 List<Pair<RegionInfo, ServerName>> result; 456 457 result = getTableRegionsAndLocations(connection, null, 458 excludeOfflinedSplitParents); 459 460 return getListOfRegionInfos(result); 461 462 } 463 464 /** 465 * Gets all of the regions of the specified table. Do not use this method 466 * to get meta table regions, use methods in MetaTableLocator instead. 467 * @param connection connection we're using 468 * @param tableName table we're looking for 469 * @return Ordered list of {@link RegionInfo}. 470 */ 471 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName) 472 throws IOException { 473 return getTableRegions(connection, tableName, false); 474 } 475 476 /** 477 * Gets all of the regions of the specified table. Do not use this method 478 * to get meta table regions, use methods in MetaTableLocator instead. 479 * @param connection connection we're using 480 * @param tableName table we're looking for 481 * @param excludeOfflinedSplitParents If true, do not include offlined split 482 * parents in the return. 483 * @return Ordered list of {@link RegionInfo}. 484 */ 485 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName, 486 final boolean excludeOfflinedSplitParents) throws IOException { 487 List<Pair<RegionInfo, ServerName>> result = 488 getTableRegionsAndLocations(connection, tableName, excludeOfflinedSplitParents); 489 return getListOfRegionInfos(result); 490 } 491 492 private static List<RegionInfo> getListOfRegionInfos( 493 final List<Pair<RegionInfo, ServerName>> pairs) { 494 if (pairs == null || pairs.isEmpty()) { 495 return Collections.emptyList(); 496 } 497 List<RegionInfo> result = new ArrayList<>(pairs.size()); 498 for (Pair<RegionInfo, ServerName> pair : pairs) { 499 result.add(pair.getFirst()); 500 } 501 return result; 502 } 503 504 /** 505 * @param tableName table we're working with 506 * @return start row for scanning META according to query type 507 */ 508 public static byte[] getTableStartRowForMeta(TableName tableName, QueryType type) { 509 if (tableName == null) { 510 return null; 511 } 512 switch (type) { 513 case REGION: 514 byte[] startRow = new byte[tableName.getName().length + 2]; 515 System.arraycopy(tableName.getName(), 0, startRow, 0, tableName.getName().length); 516 startRow[startRow.length - 2] = HConstants.DELIMITER; 517 startRow[startRow.length - 1] = HConstants.DELIMITER; 518 return startRow; 519 case ALL: 520 case TABLE: 521 default: 522 return tableName.getName(); 523 } 524 } 525 526 /** 527 * @param tableName table we're working with 528 * @return stop row for scanning META according to query type 529 */ 530 public static byte[] getTableStopRowForMeta(TableName tableName, QueryType type) { 531 if (tableName == null) { 532 return null; 533 } 534 final byte[] stopRow; 535 switch (type) { 536 case REGION: 537 stopRow = new byte[tableName.getName().length + 3]; 538 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 539 stopRow[stopRow.length - 3] = ' '; 540 stopRow[stopRow.length - 2] = HConstants.DELIMITER; 541 stopRow[stopRow.length - 1] = HConstants.DELIMITER; 542 break; 543 case ALL: 544 case TABLE: 545 default: 546 stopRow = new byte[tableName.getName().length + 1]; 547 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 548 stopRow[stopRow.length - 1] = ' '; 549 break; 550 } 551 return stopRow; 552 } 553 554 /** 555 * This method creates a Scan object that will only scan catalog rows that 556 * belong to the specified table. It doesn't specify any columns. 557 * This is a better alternative to just using a start row and scan until 558 * it hits a new table since that requires parsing the HRI to get the table 559 * name. 560 * @param tableName bytes of table's name 561 * @return configured Scan object 562 */ 563 @Deprecated 564 public static Scan getScanForTableName(Connection connection, TableName tableName) { 565 // Start key is just the table name with delimiters 566 byte[] startKey = getTableStartRowForMeta(tableName, QueryType.REGION); 567 // Stop key appends the smallest possible char to the table name 568 byte[] stopKey = getTableStopRowForMeta(tableName, QueryType.REGION); 569 570 Scan scan = getMetaScan(connection, -1); 571 scan.setStartRow(startKey); 572 scan.setStopRow(stopKey); 573 return scan; 574 } 575 576 private static Scan getMetaScan(Connection connection, int rowUpperLimit) { 577 Scan scan = new Scan(); 578 int scannerCaching = connection.getConfiguration() 579 .getInt(HConstants.HBASE_META_SCANNER_CACHING, 580 HConstants.DEFAULT_HBASE_META_SCANNER_CACHING); 581 if (connection.getConfiguration().getBoolean(HConstants.USE_META_REPLICAS, 582 HConstants.DEFAULT_USE_META_REPLICAS)) { 583 scan.setConsistency(Consistency.TIMELINE); 584 } 585 if (rowUpperLimit > 0) { 586 scan.setLimit(rowUpperLimit); 587 scan.setReadType(Scan.ReadType.PREAD); 588 } 589 scan.setCaching(scannerCaching); 590 return scan; 591 } 592 /** 593 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 594 * @param connection connection we're using 595 * @param tableName table we're looking for 596 * @return Return list of regioninfos and server. 597 */ 598 public static List<Pair<RegionInfo, ServerName>> 599 getTableRegionsAndLocations(Connection connection, TableName tableName) 600 throws IOException { 601 return getTableRegionsAndLocations(connection, tableName, true); 602 } 603 604 /** 605 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 606 * @param connection connection we're using 607 * @param tableName table to work with, can be null for getting all regions 608 * @param excludeOfflinedSplitParents don't return split parents 609 * @return Return list of regioninfos and server addresses. 610 */ 611 // What happens here when 1M regions in hbase:meta? This won't scale? 612 public static List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations( 613 Connection connection, @Nullable final TableName tableName, 614 final boolean excludeOfflinedSplitParents) throws IOException { 615 if (tableName != null && tableName.equals(TableName.META_TABLE_NAME)) { 616 throw new IOException("This method can't be used to locate meta regions;" 617 + " use MetaTableLocator instead"); 618 } 619 // Make a version of CollectingVisitor that collects RegionInfo and ServerAddress 620 CollectingVisitor<Pair<RegionInfo, ServerName>> visitor = 621 new CollectingVisitor<Pair<RegionInfo, ServerName>>() { 622 private RegionLocations current = null; 623 624 @Override 625 public boolean visit(Result r) throws IOException { 626 current = getRegionLocations(r); 627 if (current == null || current.getRegionLocation().getRegion() == null) { 628 LOG.warn("No serialized RegionInfo in " + r); 629 return true; 630 } 631 RegionInfo hri = current.getRegionLocation().getRegion(); 632 if (excludeOfflinedSplitParents && hri.isSplitParent()) return true; 633 // Else call super and add this Result to the collection. 634 return super.visit(r); 635 } 636 637 @Override 638 void add(Result r) { 639 if (current == null) { 640 return; 641 } 642 for (HRegionLocation loc : current.getRegionLocations()) { 643 if (loc != null) { 644 this.results.add(new Pair<>(loc.getRegion(), loc.getServerName())); 645 } 646 } 647 } 648 }; 649 scanMeta(connection, 650 getTableStartRowForMeta(tableName, QueryType.REGION), 651 getTableStopRowForMeta(tableName, QueryType.REGION), 652 QueryType.REGION, visitor); 653 return visitor.getResults(); 654 } 655 656 /** 657 * @param connection connection we're using 658 * @param serverName server whose regions we're interested in 659 * @return List of user regions installed on this server (does not include 660 * catalog regions). 661 * @throws IOException 662 */ 663 public static NavigableMap<RegionInfo, Result> 664 getServerUserRegions(Connection connection, final ServerName serverName) 665 throws IOException { 666 final NavigableMap<RegionInfo, Result> hris = new TreeMap<>(); 667 // Fill the above hris map with entries from hbase:meta that have the passed 668 // servername. 669 CollectingVisitor<Result> v = new CollectingVisitor<Result>() { 670 @Override 671 void add(Result r) { 672 if (r == null || r.isEmpty()) return; 673 RegionLocations locations = getRegionLocations(r); 674 if (locations == null) return; 675 for (HRegionLocation loc : locations.getRegionLocations()) { 676 if (loc != null) { 677 if (loc.getServerName() != null && loc.getServerName().equals(serverName)) { 678 hris.put(loc.getRegion(), r); 679 } 680 } 681 } 682 } 683 }; 684 scanMeta(connection, null, null, QueryType.REGION, v); 685 return hris; 686 } 687 688 public static void fullScanMetaAndPrint(Connection connection) 689 throws IOException { 690 Visitor v = r -> { 691 if (r == null || r.isEmpty()) { 692 return true; 693 } 694 LOG.info("fullScanMetaAndPrint.Current Meta Row: " + r); 695 TableState state = getTableState(r); 696 if (state != null) { 697 LOG.info("fullScanMetaAndPrint.Table State={}" + state); 698 } else { 699 RegionLocations locations = getRegionLocations(r); 700 if (locations == null) { 701 return true; 702 } 703 for (HRegionLocation loc : locations.getRegionLocations()) { 704 if (loc != null) { 705 LOG.info("fullScanMetaAndPrint.HRI Print={}", loc.getRegion()); 706 } 707 } 708 } 709 return true; 710 }; 711 scanMeta(connection, null, null, QueryType.ALL, v); 712 } 713 714 public static void scanMetaForTableRegions(Connection connection, Visitor visitor, 715 TableName tableName) throws IOException { 716 scanMeta(connection, tableName, QueryType.REGION, Integer.MAX_VALUE, visitor); 717 } 718 719 private static void scanMeta(Connection connection, TableName table, QueryType type, int maxRows, 720 final Visitor visitor) throws IOException { 721 scanMeta(connection, getTableStartRowForMeta(table, type), getTableStopRowForMeta(table, type), 722 type, maxRows, visitor); 723 } 724 725 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 726 @Nullable final byte[] stopRow, QueryType type, final Visitor visitor) throws IOException { 727 scanMeta(connection, startRow, stopRow, type, Integer.MAX_VALUE, visitor); 728 } 729 730 /** 731 * Performs a scan of META table for given table starting from given row. 732 * @param connection connection we're using 733 * @param visitor visitor to call 734 * @param tableName table withing we scan 735 * @param row start scan from this row 736 * @param rowLimit max number of rows to return 737 */ 738 public static void scanMeta(Connection connection, final Visitor visitor, 739 final TableName tableName, final byte[] row, final int rowLimit) throws IOException { 740 byte[] startRow = null; 741 byte[] stopRow = null; 742 if (tableName != null) { 743 startRow = getTableStartRowForMeta(tableName, QueryType.REGION); 744 if (row != null) { 745 RegionInfo closestRi = getClosestRegionInfo(connection, tableName, row); 746 startRow = 747 RegionInfo.createRegionName(tableName, closestRi.getStartKey(), HConstants.ZEROES, false); 748 } 749 stopRow = getTableStopRowForMeta(tableName, QueryType.REGION); 750 } 751 scanMeta(connection, startRow, stopRow, QueryType.REGION, rowLimit, visitor); 752 } 753 754 /** 755 * Performs a scan of META table. 756 * @param connection connection we're using 757 * @param startRow Where to start the scan. Pass null if want to begin scan 758 * at first row. 759 * @param stopRow Where to stop the scan. Pass null if want to scan all rows 760 * from the start one 761 * @param type scanned part of meta 762 * @param maxRows maximum rows to return 763 * @param visitor Visitor invoked against each row. 764 */ 765 static void scanMeta(Connection connection, @Nullable final byte[] startRow, 766 @Nullable final byte[] stopRow, QueryType type, int maxRows, final Visitor visitor) 767 throws IOException { 768 scanMeta(connection, startRow, stopRow, type, null, maxRows, visitor); 769 } 770 771 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 772 @Nullable final byte[] stopRow, QueryType type, @Nullable Filter filter, int maxRows, 773 final Visitor visitor) throws IOException { 774 int rowUpperLimit = maxRows > 0 ? maxRows : Integer.MAX_VALUE; 775 Scan scan = getMetaScan(connection, rowUpperLimit); 776 777 for (byte[] family : type.getFamilies()) { 778 scan.addFamily(family); 779 } 780 if (startRow != null) { 781 scan.withStartRow(startRow); 782 } 783 if (stopRow != null) { 784 scan.withStopRow(stopRow); 785 } 786 if (filter != null) { 787 scan.setFilter(filter); 788 } 789 790 if (LOG.isTraceEnabled()) { 791 LOG.trace("Scanning META" + " starting at row=" + Bytes.toStringBinary(startRow) + 792 " stopping at row=" + Bytes.toStringBinary(stopRow) + " for max=" + rowUpperLimit + 793 " with caching=" + scan.getCaching()); 794 } 795 796 int currentRow = 0; 797 try (Table metaTable = getMetaHTable(connection)) { 798 try (ResultScanner scanner = metaTable.getScanner(scan)) { 799 Result data; 800 while ((data = scanner.next()) != null) { 801 if (data.isEmpty()) continue; 802 // Break if visit returns false. 803 if (!visitor.visit(data)) break; 804 if (++currentRow >= rowUpperLimit) break; 805 } 806 } 807 } 808 if (visitor instanceof Closeable) { 809 try { 810 ((Closeable) visitor).close(); 811 } catch (Throwable t) { 812 ExceptionUtil.rethrowIfInterrupt(t); 813 LOG.debug("Got exception in closing the meta scanner visitor", t); 814 } 815 } 816 } 817 818 /** 819 * @return Get closest metatable region row to passed <code>row</code> 820 */ 821 @NonNull 822 private static RegionInfo getClosestRegionInfo(Connection connection, 823 @NonNull final TableName tableName, @NonNull final byte[] row) throws IOException { 824 byte[] searchRow = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 825 Scan scan = getMetaScan(connection, 1); 826 scan.setReversed(true); 827 scan.withStartRow(searchRow); 828 try (ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan)) { 829 Result result = resultScanner.next(); 830 if (result == null) { 831 throw new TableNotFoundException("Cannot find row in META " + 832 " for table: " + tableName + ", row=" + Bytes.toStringBinary(row)); 833 } 834 RegionInfo regionInfo = getRegionInfo(result); 835 if (regionInfo == null) { 836 throw new IOException("RegionInfo was null or empty in Meta for " + 837 tableName + ", row=" + Bytes.toStringBinary(row)); 838 } 839 return regionInfo; 840 } 841 } 842 843 /** 844 * Returns the column family used for meta columns. 845 * @return HConstants.CATALOG_FAMILY. 846 */ 847 public static byte[] getCatalogFamily() { 848 return HConstants.CATALOG_FAMILY; 849 } 850 851 /** 852 * Returns the column family used for table columns. 853 * @return HConstants.TABLE_FAMILY. 854 */ 855 private static byte[] getTableFamily() { 856 return HConstants.TABLE_FAMILY; 857 } 858 859 /** 860 * Returns the column qualifier for serialized region info 861 * @return HConstants.REGIONINFO_QUALIFIER 862 */ 863 public static byte[] getRegionInfoColumn() { 864 return HConstants.REGIONINFO_QUALIFIER; 865 } 866 867 /** 868 * Returns the column qualifier for serialized table state 869 * @return HConstants.TABLE_STATE_QUALIFIER 870 */ 871 private static byte[] getTableStateColumn() { 872 return HConstants.TABLE_STATE_QUALIFIER; 873 } 874 875 /** 876 * Returns the column qualifier for serialized region state 877 * @return HConstants.STATE_QUALIFIER 878 */ 879 private static byte[] getRegionStateColumn() { 880 return HConstants.STATE_QUALIFIER; 881 } 882 883 /** 884 * Returns the column qualifier for serialized region state 885 * @param replicaId the replicaId of the region 886 * @return a byte[] for state qualifier 887 */ 888 @VisibleForTesting 889 static byte[] getRegionStateColumn(int replicaId) { 890 return replicaId == 0 ? HConstants.STATE_QUALIFIER 891 : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 892 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 893 } 894 895 /** 896 * Returns the column qualifier for serialized region state 897 * @param replicaId the replicaId of the region 898 * @return a byte[] for sn column qualifier 899 */ 900 @VisibleForTesting 901 static byte[] getServerNameColumn(int replicaId) { 902 return replicaId == 0 ? HConstants.SERVERNAME_QUALIFIER 903 : Bytes.toBytes(HConstants.SERVERNAME_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 904 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 905 } 906 907 /** 908 * Returns the column qualifier for server column for replicaId 909 * @param replicaId the replicaId of the region 910 * @return a byte[] for server column qualifier 911 */ 912 @VisibleForTesting 913 public static byte[] getServerColumn(int replicaId) { 914 return replicaId == 0 915 ? HConstants.SERVER_QUALIFIER 916 : Bytes.toBytes(HConstants.SERVER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 917 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 918 } 919 920 /** 921 * Returns the column qualifier for server start code column for replicaId 922 * @param replicaId the replicaId of the region 923 * @return a byte[] for server start code column qualifier 924 */ 925 @VisibleForTesting 926 public static byte[] getStartCodeColumn(int replicaId) { 927 return replicaId == 0 928 ? HConstants.STARTCODE_QUALIFIER 929 : Bytes.toBytes(HConstants.STARTCODE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 930 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 931 } 932 933 /** 934 * Returns the column qualifier for seqNum column for replicaId 935 * @param replicaId the replicaId of the region 936 * @return a byte[] for seqNum column qualifier 937 */ 938 @VisibleForTesting 939 public static byte[] getSeqNumColumn(int replicaId) { 940 return replicaId == 0 941 ? HConstants.SEQNUM_QUALIFIER 942 : Bytes.toBytes(HConstants.SEQNUM_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 943 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 944 } 945 946 /** 947 * Parses the replicaId from the server column qualifier. See top of the class javadoc 948 * for the actual meta layout 949 * @param serverColumn the column qualifier 950 * @return an int for the replicaId 951 */ 952 @VisibleForTesting 953 static int parseReplicaIdFromServerColumn(byte[] serverColumn) { 954 String serverStr = Bytes.toString(serverColumn); 955 956 Matcher matcher = SERVER_COLUMN_PATTERN.matcher(serverStr); 957 if (matcher.matches() && matcher.groupCount() > 0) { 958 String group = matcher.group(1); 959 if (group != null && group.length() > 0) { 960 return Integer.parseInt(group.substring(1), 16); 961 } else { 962 return 0; 963 } 964 } 965 return -1; 966 } 967 968 /** 969 * Returns a {@link ServerName} from catalog table {@link Result}. 970 * @param r Result to pull from 971 * @return A ServerName instance or null if necessary fields not found or empty. 972 */ 973 @Nullable 974 @InterfaceAudience.Private // for use by HMaster#getTableRegionRow which is used for testing only 975 public static ServerName getServerName(final Result r, final int replicaId) { 976 byte[] serverColumn = getServerColumn(replicaId); 977 Cell cell = r.getColumnLatestCell(getCatalogFamily(), serverColumn); 978 if (cell == null || cell.getValueLength() == 0) return null; 979 String hostAndPort = Bytes.toString( 980 cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 981 byte[] startcodeColumn = getStartCodeColumn(replicaId); 982 cell = r.getColumnLatestCell(getCatalogFamily(), startcodeColumn); 983 if (cell == null || cell.getValueLength() == 0) return null; 984 try { 985 return ServerName.valueOf(hostAndPort, 986 Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())); 987 } catch (IllegalArgumentException e) { 988 LOG.error("Ignoring invalid region for server " + hostAndPort + "; cell=" + cell, e); 989 return null; 990 } 991 } 992 993 /** 994 * The latest seqnum that the server writing to meta observed when opening the region. 995 * E.g. the seqNum when the result of {@link #getServerName(Result, int)} was written. 996 * @param r Result to pull the seqNum from 997 * @return SeqNum, or HConstants.NO_SEQNUM if there's no value written. 998 */ 999 private static long getSeqNumDuringOpen(final Result r, final int replicaId) { 1000 Cell cell = r.getColumnLatestCell(getCatalogFamily(), getSeqNumColumn(replicaId)); 1001 if (cell == null || cell.getValueLength() == 0) return HConstants.NO_SEQNUM; 1002 return Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 1003 } 1004 1005 /** 1006 * Returns the daughter regions by reading the corresponding columns of the catalog table 1007 * Result. 1008 * @param data a Result object from the catalog table scan 1009 * @return pair of RegionInfo or PairOfSameType(null, null) if region is not a split parent 1010 */ 1011 public static PairOfSameType<RegionInfo> getDaughterRegions(Result data) { 1012 RegionInfo splitA = getRegionInfo(data, HConstants.SPLITA_QUALIFIER); 1013 RegionInfo splitB = getRegionInfo(data, HConstants.SPLITB_QUALIFIER); 1014 return new PairOfSameType<>(splitA, splitB); 1015 } 1016 1017 /** 1018 * Returns an HRegionLocationList extracted from the result. 1019 * @return an HRegionLocationList containing all locations for the region range or null if 1020 * we can't deserialize the result. 1021 */ 1022 @Nullable 1023 public static RegionLocations getRegionLocations(final Result r) { 1024 if (r == null) return null; 1025 RegionInfo regionInfo = getRegionInfo(r, getRegionInfoColumn()); 1026 if (regionInfo == null) return null; 1027 1028 List<HRegionLocation> locations = new ArrayList<>(1); 1029 NavigableMap<byte[],NavigableMap<byte[],byte[]>> familyMap = r.getNoVersionMap(); 1030 1031 locations.add(getRegionLocation(r, regionInfo, 0)); 1032 1033 NavigableMap<byte[], byte[]> infoMap = familyMap.get(getCatalogFamily()); 1034 if (infoMap == null) return new RegionLocations(locations); 1035 1036 // iterate until all serverName columns are seen 1037 int replicaId = 0; 1038 byte[] serverColumn = getServerColumn(replicaId); 1039 SortedMap<byte[], byte[]> serverMap; 1040 serverMap = infoMap.tailMap(serverColumn, false); 1041 1042 if (serverMap.isEmpty()) return new RegionLocations(locations); 1043 1044 for (Map.Entry<byte[], byte[]> entry : serverMap.entrySet()) { 1045 replicaId = parseReplicaIdFromServerColumn(entry.getKey()); 1046 if (replicaId < 0) { 1047 break; 1048 } 1049 HRegionLocation location = getRegionLocation(r, regionInfo, replicaId); 1050 // In case the region replica is newly created, it's location might be null. We usually do not 1051 // have HRL's in RegionLocations object with null ServerName. They are handled as null HRLs. 1052 if (location.getServerName() == null) { 1053 locations.add(null); 1054 } else { 1055 locations.add(location); 1056 } 1057 } 1058 1059 return new RegionLocations(locations); 1060 } 1061 1062 /** 1063 * Returns the HRegionLocation parsed from the given meta row Result 1064 * for the given regionInfo and replicaId. The regionInfo can be the default region info 1065 * for the replica. 1066 * @param r the meta row result 1067 * @param regionInfo RegionInfo for default replica 1068 * @param replicaId the replicaId for the HRegionLocation 1069 * @return HRegionLocation parsed from the given meta row Result for the given replicaId 1070 */ 1071 private static HRegionLocation getRegionLocation(final Result r, final RegionInfo regionInfo, 1072 final int replicaId) { 1073 ServerName serverName = getServerName(r, replicaId); 1074 long seqNum = getSeqNumDuringOpen(r, replicaId); 1075 RegionInfo replicaInfo = RegionReplicaUtil.getRegionInfoForReplica(regionInfo, replicaId); 1076 return new HRegionLocation(replicaInfo, serverName, seqNum); 1077 } 1078 1079 /** 1080 * Returns RegionInfo object from the column 1081 * HConstants.CATALOG_FAMILY:HConstants.REGIONINFO_QUALIFIER of the catalog 1082 * table Result. 1083 * @param data a Result object from the catalog table scan 1084 * @return RegionInfo or null 1085 */ 1086 public static RegionInfo getRegionInfo(Result data) { 1087 return getRegionInfo(data, HConstants.REGIONINFO_QUALIFIER); 1088 } 1089 1090 /** 1091 * Returns the RegionInfo object from the column {@link HConstants#CATALOG_FAMILY} and 1092 * <code>qualifier</code> of the catalog table result. 1093 * @param r a Result object from the catalog table scan 1094 * @param qualifier Column family qualifier 1095 * @return An RegionInfo instance or null. 1096 */ 1097 @Nullable 1098 public static RegionInfo getRegionInfo(final Result r, byte [] qualifier) { 1099 Cell cell = r.getColumnLatestCell(getCatalogFamily(), qualifier); 1100 if (cell == null) return null; 1101 return RegionInfo.parseFromOrNull(cell.getValueArray(), 1102 cell.getValueOffset(), cell.getValueLength()); 1103 } 1104 1105 /** 1106 * Fetch table state for given table from META table 1107 * @param conn connection to use 1108 * @param tableName table to fetch state for 1109 */ 1110 @Nullable 1111 public static TableState getTableState(Connection conn, TableName tableName) 1112 throws IOException { 1113 if (tableName.equals(TableName.META_TABLE_NAME)) { 1114 return new TableState(tableName, TableState.State.ENABLED); 1115 } 1116 Table metaHTable = getMetaHTable(conn); 1117 Get get = new Get(tableName.getName()).addColumn(getTableFamily(), getTableStateColumn()); 1118 Result result = metaHTable.get(get); 1119 return getTableState(result); 1120 } 1121 1122 /** 1123 * Fetch table states from META table 1124 * @param conn connection to use 1125 * @return map {tableName -> state} 1126 */ 1127 public static Map<TableName, TableState> getTableStates(Connection conn) 1128 throws IOException { 1129 final Map<TableName, TableState> states = new LinkedHashMap<>(); 1130 Visitor collector = r -> { 1131 TableState state = getTableState(r); 1132 if (state != null) { 1133 states.put(state.getTableName(), state); 1134 } 1135 return true; 1136 }; 1137 fullScanTables(conn, collector); 1138 return states; 1139 } 1140 1141 /** 1142 * Updates state in META 1143 * @param conn connection to use 1144 * @param tableName table to look for 1145 */ 1146 public static void updateTableState(Connection conn, TableName tableName, 1147 TableState.State actual) throws IOException { 1148 updateTableState(conn, new TableState(tableName, actual)); 1149 } 1150 1151 /** 1152 * Decode table state from META Result. 1153 * Should contain cell from HConstants.TABLE_FAMILY 1154 * @return null if not found 1155 */ 1156 @Nullable 1157 public static TableState getTableState(Result r) throws IOException { 1158 Cell cell = r.getColumnLatestCell(getTableFamily(), getTableStateColumn()); 1159 if (cell == null) { 1160 return null; 1161 } 1162 try { 1163 return TableState.parseFrom(TableName.valueOf(r.getRow()), 1164 Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), 1165 cell.getValueOffset() + cell.getValueLength())); 1166 } catch (DeserializationException e) { 1167 throw new IOException(e); 1168 } 1169 } 1170 1171 /** 1172 * Implementations 'visit' a catalog table row. 1173 */ 1174 public interface Visitor { 1175 /** 1176 * Visit the catalog table row. 1177 * @param r A row from catalog table 1178 * @return True if we are to proceed scanning the table, else false if 1179 * we are to stop now. 1180 */ 1181 boolean visit(final Result r) throws IOException; 1182 } 1183 1184 /** 1185 * Implementations 'visit' a catalog table row but with close() at the end. 1186 */ 1187 public interface CloseableVisitor extends Visitor, Closeable { 1188 } 1189 1190 /** 1191 * A {@link Visitor} that collects content out of passed {@link Result}. 1192 */ 1193 static abstract class CollectingVisitor<T> implements Visitor { 1194 final List<T> results = new ArrayList<>(); 1195 @Override 1196 public boolean visit(Result r) throws IOException { 1197 if (r != null && !r.isEmpty()) { 1198 add(r); 1199 } 1200 return true; 1201 } 1202 1203 abstract void add(Result r); 1204 1205 /** 1206 * @return Collected results; wait till visits complete to collect all 1207 * possible results 1208 */ 1209 List<T> getResults() { 1210 return this.results; 1211 } 1212 } 1213 1214 /** 1215 * Collects all returned. 1216 */ 1217 static class CollectAllVisitor extends CollectingVisitor<Result> { 1218 @Override 1219 void add(Result r) { 1220 this.results.add(r); 1221 } 1222 } 1223 1224 /** 1225 * A Visitor that skips offline regions and split parents 1226 */ 1227 public static abstract class DefaultVisitorBase implements Visitor { 1228 1229 DefaultVisitorBase() { 1230 super(); 1231 } 1232 1233 public abstract boolean visitInternal(Result rowResult) throws IOException; 1234 1235 @Override 1236 public boolean visit(Result rowResult) throws IOException { 1237 RegionInfo info = getRegionInfo(rowResult); 1238 if (info == null) { 1239 return true; 1240 } 1241 1242 //skip over offline and split regions 1243 if (!(info.isOffline() || info.isSplit())) { 1244 return visitInternal(rowResult); 1245 } 1246 return true; 1247 } 1248 } 1249 1250 /** 1251 * A Visitor for a table. Provides a consistent view of the table's 1252 * hbase:meta entries during concurrent splits (see HBASE-5986 for details). This class 1253 * does not guarantee ordered traversal of meta entries, and can block until the 1254 * hbase:meta entries for daughters are available during splits. 1255 */ 1256 public static abstract class TableVisitorBase extends DefaultVisitorBase { 1257 private TableName tableName; 1258 1259 public TableVisitorBase(TableName tableName) { 1260 super(); 1261 this.tableName = tableName; 1262 } 1263 1264 @Override 1265 public final boolean visit(Result rowResult) throws IOException { 1266 RegionInfo info = getRegionInfo(rowResult); 1267 if (info == null) { 1268 return true; 1269 } 1270 if (!(info.getTable().equals(tableName))) { 1271 return false; 1272 } 1273 return super.visit(rowResult); 1274 } 1275 } 1276 1277 /** 1278 * Count regions in <code>hbase:meta</code> for passed table. 1279 * @param c Configuration object 1280 * @param tableName table name to count regions for 1281 * @return Count or regions in table <code>tableName</code> 1282 */ 1283 public static int getRegionCount(final Configuration c, final TableName tableName) 1284 throws IOException { 1285 try (Connection connection = ConnectionFactory.createConnection(c)) { 1286 return getRegionCount(connection, tableName); 1287 } 1288 } 1289 1290 /** 1291 * Count regions in <code>hbase:meta</code> for passed table. 1292 * @param connection Connection object 1293 * @param tableName table name to count regions for 1294 * @return Count or regions in table <code>tableName</code> 1295 */ 1296 public static int getRegionCount(final Connection connection, final TableName tableName) 1297 throws IOException { 1298 try (RegionLocator locator = connection.getRegionLocator(tableName)) { 1299 List<HRegionLocation> locations = locator.getAllRegionLocations(); 1300 return locations == null ? 0 : locations.size(); 1301 } 1302 } 1303 1304 //////////////////////// 1305 // Editing operations // 1306 //////////////////////// 1307 /** 1308 * Generates and returns a Put containing the region into for the catalog table 1309 */ 1310 public static Put makePutFromRegionInfo(RegionInfo regionInfo, long ts) throws IOException { 1311 Put put = new Put(regionInfo.getRegionName(), ts); 1312 addRegionInfo(put, regionInfo); 1313 return put; 1314 } 1315 1316 /** 1317 * Generates and returns a Delete containing the region info for the catalog table 1318 */ 1319 private static Delete makeDeleteFromRegionInfo(RegionInfo regionInfo, long ts) { 1320 if (regionInfo == null) { 1321 throw new IllegalArgumentException("Can't make a delete for null region"); 1322 } 1323 Delete delete = new Delete(regionInfo.getRegionName()); 1324 delete.addFamily(getCatalogFamily(), ts); 1325 return delete; 1326 } 1327 1328 /** 1329 * Adds split daughters to the Put 1330 */ 1331 private static Put addDaughtersToPut(Put put, RegionInfo splitA, RegionInfo splitB) 1332 throws IOException { 1333 if (splitA != null) { 1334 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1335 .setRow(put.getRow()) 1336 .setFamily(HConstants.CATALOG_FAMILY) 1337 .setQualifier(HConstants.SPLITA_QUALIFIER) 1338 .setTimestamp(put.getTimestamp()) 1339 .setType(Type.Put) 1340 .setValue(RegionInfo.toByteArray(splitA)) 1341 .build()); 1342 } 1343 if (splitB != null) { 1344 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1345 .setRow(put.getRow()) 1346 .setFamily(HConstants.CATALOG_FAMILY) 1347 .setQualifier(HConstants.SPLITB_QUALIFIER) 1348 .setTimestamp(put.getTimestamp()) 1349 .setType(Type.Put) 1350 .setValue(RegionInfo.toByteArray(splitB)) 1351 .build()); 1352 } 1353 return put; 1354 } 1355 1356 /** 1357 * Put the passed <code>p</code> to the <code>hbase:meta</code> table. 1358 * @param connection connection we're using 1359 * @param p Put to add to hbase:meta 1360 */ 1361 private static void putToMetaTable(Connection connection, Put p) throws IOException { 1362 try (Table table = getMetaHTable(connection)) { 1363 put(table, p); 1364 } 1365 } 1366 1367 /** 1368 * @param t Table to use 1369 * @param p put to make 1370 */ 1371 private static void put(Table t, Put p) throws IOException { 1372 debugLogMutation(p); 1373 t.put(p); 1374 } 1375 1376 /** 1377 * Put the passed <code>ps</code> to the <code>hbase:meta</code> table. 1378 * @param connection connection we're using 1379 * @param ps Put to add to hbase:meta 1380 */ 1381 public static void putsToMetaTable(final Connection connection, final List<Put> ps) 1382 throws IOException { 1383 if (ps.isEmpty()) { 1384 return; 1385 } 1386 try (Table t = getMetaHTable(connection)) { 1387 debugLogMutations(ps); 1388 // the implementation for putting a single Put is much simpler so here we do a check first. 1389 if (ps.size() == 1) { 1390 t.put(ps.get(0)); 1391 } else { 1392 t.put(ps); 1393 } 1394 } 1395 } 1396 1397 /** 1398 * Delete the passed <code>d</code> from the <code>hbase:meta</code> table. 1399 * @param connection connection we're using 1400 * @param d Delete to add to hbase:meta 1401 */ 1402 private static void deleteFromMetaTable(final Connection connection, final Delete d) 1403 throws IOException { 1404 List<Delete> dels = new ArrayList<>(1); 1405 dels.add(d); 1406 deleteFromMetaTable(connection, dels); 1407 } 1408 1409 /** 1410 * Delete the passed <code>deletes</code> from the <code>hbase:meta</code> table. 1411 * @param connection connection we're using 1412 * @param deletes Deletes to add to hbase:meta This list should support #remove. 1413 */ 1414 private static void deleteFromMetaTable(final Connection connection, final List<Delete> deletes) 1415 throws IOException { 1416 try (Table t = getMetaHTable(connection)) { 1417 debugLogMutations(deletes); 1418 t.delete(deletes); 1419 } 1420 } 1421 1422 /** 1423 * Deletes some replica columns corresponding to replicas for the passed rows 1424 * @param metaRows rows in hbase:meta 1425 * @param replicaIndexToDeleteFrom the replica ID we would start deleting from 1426 * @param numReplicasToRemove how many replicas to remove 1427 * @param connection connection we're using to access meta table 1428 */ 1429 public static void removeRegionReplicasFromMeta(Set<byte[]> metaRows, 1430 int replicaIndexToDeleteFrom, int numReplicasToRemove, Connection connection) 1431 throws IOException { 1432 int absoluteIndex = replicaIndexToDeleteFrom + numReplicasToRemove; 1433 for (byte[] row : metaRows) { 1434 long now = EnvironmentEdgeManager.currentTime(); 1435 Delete deleteReplicaLocations = new Delete(row); 1436 for (int i = replicaIndexToDeleteFrom; i < absoluteIndex; i++) { 1437 deleteReplicaLocations.addColumns(getCatalogFamily(), 1438 getServerColumn(i), now); 1439 deleteReplicaLocations.addColumns(getCatalogFamily(), 1440 getSeqNumColumn(i), now); 1441 deleteReplicaLocations.addColumns(getCatalogFamily(), 1442 getStartCodeColumn(i), now); 1443 deleteReplicaLocations.addColumns(getCatalogFamily(), getServerNameColumn(i), now); 1444 deleteReplicaLocations.addColumns(getCatalogFamily(), getRegionStateColumn(i), now); 1445 } 1446 1447 deleteFromMetaTable(connection, deleteReplicaLocations); 1448 } 1449 } 1450 1451 private static Put addRegionStateToPut(Put put, RegionState.State state) throws IOException { 1452 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1453 .setRow(put.getRow()) 1454 .setFamily(HConstants.CATALOG_FAMILY) 1455 .setQualifier(getRegionStateColumn()) 1456 .setTimestamp(put.getTimestamp()) 1457 .setType(Cell.Type.Put) 1458 .setValue(Bytes.toBytes(state.name())) 1459 .build()); 1460 return put; 1461 } 1462 1463 /** 1464 * Update state column in hbase:meta. 1465 */ 1466 public static void updateRegionState(Connection connection, RegionInfo ri, 1467 RegionState.State state) throws IOException { 1468 Put put = new Put(RegionReplicaUtil.getRegionInfoForDefaultReplica(ri).getRegionName()); 1469 MetaTableAccessor.putsToMetaTable(connection, 1470 Collections.singletonList(addRegionStateToPut(put, state))); 1471 } 1472 1473 /** 1474 * Adds daughter region infos to hbase:meta row for the specified region. Note that this does not 1475 * add its daughter's as different rows, but adds information about the daughters in the same row 1476 * as the parent. Use 1477 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} 1478 * if you want to do that. 1479 * @param connection connection we're using 1480 * @param regionInfo RegionInfo of parent region 1481 * @param splitA first split daughter of the parent regionInfo 1482 * @param splitB second split daughter of the parent regionInfo 1483 * @throws IOException if problem connecting or updating meta 1484 */ 1485 public static void addSplitsToParent(Connection connection, RegionInfo regionInfo, 1486 RegionInfo splitA, RegionInfo splitB) throws IOException { 1487 try (Table meta = getMetaHTable(connection)) { 1488 Put put = makePutFromRegionInfo(regionInfo, EnvironmentEdgeManager.currentTime()); 1489 addDaughtersToPut(put, splitA, splitB); 1490 meta.put(put); 1491 debugLogMutation(put); 1492 LOG.debug("Added region {}", regionInfo.getRegionNameAsString()); 1493 } 1494 } 1495 1496 /** 1497 * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this 1498 * does not add its daughter's as different rows, but adds information about the daughters 1499 * in the same row as the parent. Use 1500 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} 1501 * if you want to do that. 1502 * @param connection connection we're using 1503 * @param regionInfo region information 1504 * @throws IOException if problem connecting or updating meta 1505 */ 1506 @VisibleForTesting 1507 public static void addRegionToMeta(Connection connection, RegionInfo regionInfo) 1508 throws IOException { 1509 addRegionsToMeta(connection, Collections.singletonList(regionInfo), 1); 1510 } 1511 1512 /** 1513 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions 1514 * is CLOSED. 1515 * @param connection connection we're using 1516 * @param regionInfos region information list 1517 * @throws IOException if problem connecting or updating meta 1518 */ 1519 public static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1520 int regionReplication) throws IOException { 1521 addRegionsToMeta(connection, regionInfos, regionReplication, 1522 EnvironmentEdgeManager.currentTime()); 1523 } 1524 1525 /** 1526 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions 1527 * is CLOSED. 1528 * @param connection connection we're using 1529 * @param regionInfos region information list 1530 * @param ts desired timestamp 1531 * @throws IOException if problem connecting or updating meta 1532 */ 1533 private static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1534 int regionReplication, long ts) throws IOException { 1535 List<Put> puts = new ArrayList<>(); 1536 for (RegionInfo regionInfo : regionInfos) { 1537 if (RegionReplicaUtil.isDefaultReplica(regionInfo)) { 1538 Put put = makePutFromRegionInfo(regionInfo, ts); 1539 // New regions are added with initial state of CLOSED. 1540 addRegionStateToPut(put, RegionState.State.CLOSED); 1541 // Add empty locations for region replicas so that number of replicas can be cached 1542 // whenever the primary region is looked up from meta 1543 for (int i = 1; i < regionReplication; i++) { 1544 addEmptyLocation(put, i); 1545 } 1546 puts.add(put); 1547 } 1548 } 1549 putsToMetaTable(connection, puts); 1550 LOG.info("Added {} regions to meta.", puts.size()); 1551 } 1552 1553 static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException { 1554 int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more. 1555 int max = mergeRegions.size(); 1556 if (max > limit) { 1557 // Should never happen!!!!! But just in case. 1558 throw new RuntimeException("Can't merge " + max + " regions in one go; " + limit + 1559 " is upper-limit."); 1560 } 1561 int counter = 0; 1562 for (RegionInfo ri: mergeRegions) { 1563 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++); 1564 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY). 1565 setRow(put.getRow()). 1566 setFamily(HConstants.CATALOG_FAMILY). 1567 setQualifier(Bytes.toBytes(qualifier)). 1568 setTimestamp(put.getTimestamp()). 1569 setType(Type.Put). 1570 setValue(RegionInfo.toByteArray(ri)). 1571 build()); 1572 } 1573 return put; 1574 } 1575 1576 /** 1577 * Merge regions into one in an atomic operation. Deletes the merging regions in 1578 * hbase:meta and adds the merged region. 1579 * @param connection connection we're using 1580 * @param mergedRegion the merged region 1581 * @param parentSeqNum Parent regions to merge and their next open sequence id used 1582 * by serial replication. Set to -1 if not needed by this table. 1583 * @param sn the location of the region 1584 */ 1585 public static void mergeRegions(Connection connection, RegionInfo mergedRegion, 1586 Map<RegionInfo, Long> parentSeqNum, ServerName sn, int regionReplication) 1587 throws IOException { 1588 try (Table meta = getMetaHTable(connection)) { 1589 long time = HConstants.LATEST_TIMESTAMP; 1590 List<Mutation> mutations = new ArrayList<>(); 1591 List<RegionInfo> replicationParents = new ArrayList<>(); 1592 for (Map.Entry<RegionInfo, Long> e: parentSeqNum.entrySet()) { 1593 RegionInfo ri = e.getKey(); 1594 long seqNum = e.getValue(); 1595 // Deletes for merging regions 1596 mutations.add(makeDeleteFromRegionInfo(ri, time)); 1597 if (seqNum > 0) { 1598 mutations.add(makePutForReplicationBarrier(ri, seqNum, time)); 1599 replicationParents.add(ri); 1600 } 1601 } 1602 // Put for parent 1603 Put putOfMerged = makePutFromRegionInfo(mergedRegion, time); 1604 putOfMerged = addMergeRegions(putOfMerged, parentSeqNum.keySet()); 1605 // Set initial state to CLOSED. 1606 // NOTE: If initial state is not set to CLOSED then merged region gets added with the 1607 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1608 // master tries to assign this offline region. This is followed by re-assignments of the 1609 // merged region from resumed {@link MergeTableRegionsProcedure} 1610 addRegionStateToPut(putOfMerged, RegionState.State.CLOSED); 1611 mutations.add(putOfMerged); 1612 // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null 1613 // if crash after merge happened but before we got to here.. means in-memory 1614 // locations of offlined merged, now-closed, regions is lost. Should be ok. We 1615 // assign the merged region later. 1616 if (sn != null) { 1617 addLocation(putOfMerged, sn, 1, mergedRegion.getReplicaId()); 1618 } 1619 1620 // Add empty locations for region replicas of the merged region so that number of replicas 1621 // can be cached whenever the primary region is looked up from meta 1622 for (int i = 1; i < regionReplication; i++) { 1623 addEmptyLocation(putOfMerged, i); 1624 } 1625 // add parent reference for serial replication 1626 if (!replicationParents.isEmpty()) { 1627 addReplicationParent(putOfMerged, replicationParents); 1628 } 1629 byte[] tableRow = Bytes.toBytes(mergedRegion.getRegionNameAsString() + HConstants.DELIMITER); 1630 multiMutate(meta, tableRow, mutations); 1631 } 1632 } 1633 1634 /** 1635 * Splits the region into two in an atomic operation. Offlines the parent region with the 1636 * information that it is split into two, and also adds the daughter regions. Does not add the 1637 * location information to the daughter regions since they are not open yet. 1638 * @param connection connection we're using 1639 * @param parent the parent region which is split 1640 * @param parentOpenSeqNum the next open sequence id for parent region, used by serial 1641 * replication. -1 if not necessary. 1642 * @param splitA Split daughter region A 1643 * @param splitB Split daughter region B 1644 * @param sn the location of the region 1645 */ 1646 public static void splitRegion(Connection connection, RegionInfo parent, long parentOpenSeqNum, 1647 RegionInfo splitA, RegionInfo splitB, ServerName sn, int regionReplication) 1648 throws IOException { 1649 try (Table meta = getMetaHTable(connection)) { 1650 long time = EnvironmentEdgeManager.currentTime(); 1651 // Put for parent 1652 Put putParent = makePutFromRegionInfo(RegionInfoBuilder.newBuilder(parent) 1653 .setOffline(true) 1654 .setSplit(true).build(), time); 1655 addDaughtersToPut(putParent, splitA, splitB); 1656 1657 // Puts for daughters 1658 Put putA = makePutFromRegionInfo(splitA, time); 1659 Put putB = makePutFromRegionInfo(splitB, time); 1660 if (parentOpenSeqNum > 0) { 1661 addReplicationBarrier(putParent, parentOpenSeqNum); 1662 addReplicationParent(putA, Collections.singletonList(parent)); 1663 addReplicationParent(putB, Collections.singletonList(parent)); 1664 } 1665 // Set initial state to CLOSED 1666 // NOTE: If initial state is not set to CLOSED then daughter regions get added with the 1667 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1668 // master tries to assign these offline regions. This is followed by re-assignments of the 1669 // daughter regions from resumed {@link SplitTableRegionProcedure} 1670 addRegionStateToPut(putA, RegionState.State.CLOSED); 1671 addRegionStateToPut(putB, RegionState.State.CLOSED); 1672 1673 addSequenceNum(putA, 1, splitA.getReplicaId()); // new regions, openSeqNum = 1 is fine. 1674 addSequenceNum(putB, 1, splitB.getReplicaId()); 1675 1676 // Add empty locations for region replicas of daughters so that number of replicas can be 1677 // cached whenever the primary region is looked up from meta 1678 for (int i = 1; i < regionReplication; i++) { 1679 addEmptyLocation(putA, i); 1680 addEmptyLocation(putB, i); 1681 } 1682 1683 byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER); 1684 multiMutate(meta, tableRow, putParent, putA, putB); 1685 } 1686 } 1687 1688 /** 1689 * Update state of the table in meta. 1690 * @param connection what we use for update 1691 * @param state new state 1692 */ 1693 private static void updateTableState(Connection connection, TableState state) throws IOException { 1694 Put put = makePutFromTableState(state, EnvironmentEdgeManager.currentTime()); 1695 putToMetaTable(connection, put); 1696 LOG.info("Updated {} in hbase:meta", state); 1697 } 1698 1699 /** 1700 * Construct PUT for given state 1701 * @param state new state 1702 */ 1703 public static Put makePutFromTableState(TableState state, long ts) { 1704 Put put = new Put(state.getTableName().getName(), ts); 1705 put.addColumn(getTableFamily(), getTableStateColumn(), state.convert().toByteArray()); 1706 return put; 1707 } 1708 1709 /** 1710 * Remove state for table from meta 1711 * @param connection to use for deletion 1712 * @param table to delete state for 1713 */ 1714 public static void deleteTableState(Connection connection, TableName table) 1715 throws IOException { 1716 long time = EnvironmentEdgeManager.currentTime(); 1717 Delete delete = new Delete(table.getName()); 1718 delete.addColumns(getTableFamily(), getTableStateColumn(), time); 1719 deleteFromMetaTable(connection, delete); 1720 LOG.info("Deleted table " + table + " state from META"); 1721 } 1722 1723 private static void multiMutate(Table table, byte[] row, 1724 Mutation... mutations) throws IOException { 1725 multiMutate(table, row, Arrays.asList(mutations)); 1726 } 1727 1728 /** 1729 * Performs an atomic multi-mutate operation against the given table. 1730 */ 1731 private static void multiMutate(final Table table, byte[] row, final List<Mutation> mutations) 1732 throws IOException { 1733 debugLogMutations(mutations); 1734 Batch.Call<MultiRowMutationService, MutateRowsResponse> callable = 1735 new Batch.Call<MultiRowMutationService, MutateRowsResponse>() { 1736 1737 @Override 1738 public MutateRowsResponse call(MultiRowMutationService instance) throws IOException { 1739 MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder(); 1740 for (Mutation mutation : mutations) { 1741 if (mutation instanceof Put) { 1742 builder.addMutationRequest( 1743 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.PUT, mutation)); 1744 } else if (mutation instanceof Delete) { 1745 builder.addMutationRequest( 1746 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.DELETE, mutation)); 1747 } else { 1748 throw new DoNotRetryIOException( 1749 "multi in MetaEditor doesn't support " + mutation.getClass().getName()); 1750 } 1751 } 1752 ServerRpcController controller = new ServerRpcController(); 1753 CoprocessorRpcUtils.BlockingRpcCallback<MutateRowsResponse> rpcCallback = 1754 new CoprocessorRpcUtils.BlockingRpcCallback<>(); 1755 instance.mutateRows(controller, builder.build(), rpcCallback); 1756 MutateRowsResponse resp = rpcCallback.get(); 1757 if (controller.failedOnException()) { 1758 throw controller.getFailedOn(); 1759 } 1760 return resp; 1761 } 1762 }; 1763 try { 1764 table.coprocessorService(MultiRowMutationService.class, row, row, callable); 1765 } catch (Throwable e) { 1766 Throwables.propagateIfPossible(e, IOException.class); 1767 throw new IOException(e); 1768 } 1769 } 1770 1771 /** 1772 * Updates the location of the specified region in hbase:meta to be the specified server hostname 1773 * and startcode. 1774 * <p> 1775 * Uses passed catalog tracker to get a connection to the server hosting hbase:meta and makes 1776 * edits to that region. 1777 * @param connection connection we're using 1778 * @param regionInfo region to update location of 1779 * @param openSeqNum the latest sequence number obtained when the region was open 1780 * @param sn Server name 1781 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1782 */ 1783 @VisibleForTesting 1784 public static void updateRegionLocation(Connection connection, RegionInfo regionInfo, 1785 ServerName sn, long openSeqNum, long masterSystemTime) throws IOException { 1786 updateLocation(connection, regionInfo, sn, openSeqNum, masterSystemTime); 1787 } 1788 1789 /** 1790 * Updates the location of the specified region to be the specified server. 1791 * <p> 1792 * Connects to the specified server which should be hosting the specified catalog region name to 1793 * perform the edit. 1794 * @param connection connection we're using 1795 * @param regionInfo region to update location of 1796 * @param sn Server name 1797 * @param openSeqNum the latest sequence number obtained when the region was open 1798 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1799 * @throws IOException In particular could throw {@link java.net.ConnectException} if the server 1800 * is down on other end. 1801 */ 1802 private static void updateLocation(Connection connection, RegionInfo regionInfo, ServerName sn, 1803 long openSeqNum, long masterSystemTime) throws IOException { 1804 // region replicas are kept in the primary region's row 1805 Put put = new Put(getMetaKeyForRegion(regionInfo), masterSystemTime); 1806 addRegionInfo(put, regionInfo); 1807 addLocation(put, sn, openSeqNum, regionInfo.getReplicaId()); 1808 putToMetaTable(connection, put); 1809 LOG.info("Updated row {} with server=", regionInfo.getRegionNameAsString(), sn); 1810 } 1811 1812 /** 1813 * Deletes the specified region from META. 1814 * @param connection connection we're using 1815 * @param regionInfo region to be deleted from META 1816 */ 1817 public static void deleteRegionInfo(Connection connection, RegionInfo regionInfo) 1818 throws IOException { 1819 Delete delete = new Delete(regionInfo.getRegionName()); 1820 delete.addFamily(getCatalogFamily(), HConstants.LATEST_TIMESTAMP); 1821 deleteFromMetaTable(connection, delete); 1822 LOG.info("Deleted " + regionInfo.getRegionNameAsString()); 1823 } 1824 1825 /** 1826 * Deletes the specified regions from META. 1827 * @param connection connection we're using 1828 * @param regionsInfo list of regions to be deleted from META 1829 */ 1830 public static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo) 1831 throws IOException { 1832 deleteRegionInfos(connection, regionsInfo, EnvironmentEdgeManager.currentTime()); 1833 } 1834 1835 /** 1836 * Deletes the specified regions from META. 1837 * @param connection connection we're using 1838 * @param regionsInfo list of regions to be deleted from META 1839 */ 1840 private static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo, 1841 long ts) 1842 throws IOException { 1843 List<Delete> deletes = new ArrayList<>(regionsInfo.size()); 1844 for (RegionInfo hri : regionsInfo) { 1845 Delete e = new Delete(hri.getRegionName()); 1846 e.addFamily(getCatalogFamily(), ts); 1847 deletes.add(e); 1848 } 1849 deleteFromMetaTable(connection, deletes); 1850 LOG.info("Deleted {} regions from META", regionsInfo.size()); 1851 LOG.debug("Deleted regions: {}", regionsInfo); 1852 } 1853 1854 /** 1855 * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and 1856 * adds new ones. Regions added back have state CLOSED. 1857 * @param connection connection we're using 1858 * @param regionInfos list of regions to be added to META 1859 */ 1860 public static void overwriteRegions(Connection connection, List<RegionInfo> regionInfos, 1861 int regionReplication) throws IOException { 1862 // use master time for delete marker and the Put 1863 long now = EnvironmentEdgeManager.currentTime(); 1864 deleteRegionInfos(connection, regionInfos, now); 1865 // Why sleep? This is the easiest way to ensure that the previous deletes does not 1866 // eclipse the following puts, that might happen in the same ts from the server. 1867 // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed, 1868 // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep. 1869 // 1870 // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed 1871 addRegionsToMeta(connection, regionInfos, regionReplication, now + 1); 1872 LOG.info("Overwritten " + regionInfos.size() + " regions to Meta"); 1873 LOG.debug("Overwritten regions: {} ", regionInfos); 1874 } 1875 1876 /** 1877 * Deletes merge qualifiers for the specified merge region. 1878 * @param connection connection we're using 1879 * @param mergeRegion the merged region 1880 */ 1881 public static void deleteMergeQualifiers(Connection connection, final RegionInfo mergeRegion) 1882 throws IOException { 1883 Delete delete = new Delete(mergeRegion.getRegionName()); 1884 // NOTE: We are doing a new hbase:meta read here. 1885 Cell[] cells = getRegionResult(connection, mergeRegion.getRegionName()).rawCells(); 1886 if (cells == null || cells.length == 0) { 1887 return; 1888 } 1889 List<byte[]> qualifiers = new ArrayList<>(); 1890 for (Cell cell : cells) { 1891 if (!isMergeQualifierPrefix(cell)) { 1892 continue; 1893 } 1894 byte[] qualifier = CellUtil.cloneQualifier(cell); 1895 qualifiers.add(qualifier); 1896 delete.addColumns(getCatalogFamily(), qualifier, HConstants.LATEST_TIMESTAMP); 1897 } 1898 deleteFromMetaTable(connection, delete); 1899 LOG.info("Deleted merge references in " + mergeRegion.getRegionNameAsString() + 1900 ", deleted qualifiers " + qualifiers.stream().map(Bytes::toStringBinary). 1901 collect(Collectors.joining(", "))); 1902 } 1903 1904 public static Put addRegionInfo(final Put p, final RegionInfo hri) 1905 throws IOException { 1906 p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1907 .setRow(p.getRow()) 1908 .setFamily(getCatalogFamily()) 1909 .setQualifier(HConstants.REGIONINFO_QUALIFIER) 1910 .setTimestamp(p.getTimestamp()) 1911 .setType(Type.Put) 1912 // Serialize the Default Replica HRI otherwise scan of hbase:meta 1913 // shows an info:regioninfo value with encoded name and region 1914 // name that differs from that of the hbase;meta row. 1915 .setValue(RegionInfo.toByteArray(RegionReplicaUtil.getRegionInfoForDefaultReplica(hri))) 1916 .build()); 1917 return p; 1918 } 1919 1920 public static Put addLocation(Put p, ServerName sn, long openSeqNum, int replicaId) 1921 throws IOException { 1922 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 1923 return p.add(builder.clear() 1924 .setRow(p.getRow()) 1925 .setFamily(getCatalogFamily()) 1926 .setQualifier(getServerColumn(replicaId)) 1927 .setTimestamp(p.getTimestamp()) 1928 .setType(Cell.Type.Put) 1929 .setValue(Bytes.toBytes(sn.getAddress().toString())) 1930 .build()) 1931 .add(builder.clear() 1932 .setRow(p.getRow()) 1933 .setFamily(getCatalogFamily()) 1934 .setQualifier(getStartCodeColumn(replicaId)) 1935 .setTimestamp(p.getTimestamp()) 1936 .setType(Cell.Type.Put) 1937 .setValue(Bytes.toBytes(sn.getStartcode())) 1938 .build()) 1939 .add(builder.clear() 1940 .setRow(p.getRow()) 1941 .setFamily(getCatalogFamily()) 1942 .setQualifier(getSeqNumColumn(replicaId)) 1943 .setTimestamp(p.getTimestamp()) 1944 .setType(Type.Put) 1945 .setValue(Bytes.toBytes(openSeqNum)) 1946 .build()); 1947 } 1948 1949 private static void writeRegionName(ByteArrayOutputStream out, byte[] regionName) { 1950 for (byte b : regionName) { 1951 if (b == ESCAPE_BYTE) { 1952 out.write(ESCAPE_BYTE); 1953 } 1954 out.write(b); 1955 } 1956 } 1957 1958 @VisibleForTesting 1959 public static byte[] getParentsBytes(List<RegionInfo> parents) { 1960 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1961 Iterator<RegionInfo> iter = parents.iterator(); 1962 writeRegionName(bos, iter.next().getRegionName()); 1963 while (iter.hasNext()) { 1964 bos.write(ESCAPE_BYTE); 1965 bos.write(SEPARATED_BYTE); 1966 writeRegionName(bos, iter.next().getRegionName()); 1967 } 1968 return bos.toByteArray(); 1969 } 1970 1971 private static List<byte[]> parseParentsBytes(byte[] bytes) { 1972 List<byte[]> parents = new ArrayList<>(); 1973 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1974 for (int i = 0; i < bytes.length; i++) { 1975 if (bytes[i] == ESCAPE_BYTE) { 1976 i++; 1977 if (bytes[i] == SEPARATED_BYTE) { 1978 parents.add(bos.toByteArray()); 1979 bos.reset(); 1980 continue; 1981 } 1982 // fall through to append the byte 1983 } 1984 bos.write(bytes[i]); 1985 } 1986 if (bos.size() > 0) { 1987 parents.add(bos.toByteArray()); 1988 } 1989 return parents; 1990 } 1991 1992 private static void addReplicationParent(Put put, List<RegionInfo> parents) throws IOException { 1993 byte[] value = getParentsBytes(parents); 1994 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1995 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(REPLICATION_PARENT_QUALIFIER) 1996 .setTimestamp(put.getTimestamp()).setType(Type.Put).setValue(value).build()); 1997 } 1998 1999 public static Put makePutForReplicationBarrier(RegionInfo regionInfo, long openSeqNum, long ts) 2000 throws IOException { 2001 Put put = new Put(regionInfo.getRegionName(), ts); 2002 addReplicationBarrier(put, openSeqNum); 2003 return put; 2004 } 2005 2006 /** 2007 * See class comment on SerialReplicationChecker 2008 */ 2009 public static void addReplicationBarrier(Put put, long openSeqNum) throws IOException { 2010 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 2011 .setRow(put.getRow()) 2012 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY) 2013 .setQualifier(HConstants.SEQNUM_QUALIFIER) 2014 .setTimestamp(put.getTimestamp()) 2015 .setType(Type.Put) 2016 .setValue(Bytes.toBytes(openSeqNum)) 2017 .build()); 2018 } 2019 2020 private static Put addEmptyLocation(Put p, int replicaId) throws IOException { 2021 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 2022 return p.add(builder.clear() 2023 .setRow(p.getRow()) 2024 .setFamily(getCatalogFamily()) 2025 .setQualifier(getServerColumn(replicaId)) 2026 .setTimestamp(p.getTimestamp()) 2027 .setType(Type.Put) 2028 .build()) 2029 .add(builder.clear() 2030 .setRow(p.getRow()) 2031 .setFamily(getCatalogFamily()) 2032 .setQualifier(getStartCodeColumn(replicaId)) 2033 .setTimestamp(p.getTimestamp()) 2034 .setType(Cell.Type.Put) 2035 .build()) 2036 .add(builder.clear() 2037 .setRow(p.getRow()) 2038 .setFamily(getCatalogFamily()) 2039 .setQualifier(getSeqNumColumn(replicaId)) 2040 .setTimestamp(p.getTimestamp()) 2041 .setType(Cell.Type.Put) 2042 .build()); 2043 } 2044 2045 public static final class ReplicationBarrierResult { 2046 private final long[] barriers; 2047 private final RegionState.State state; 2048 private final List<byte[]> parentRegionNames; 2049 2050 ReplicationBarrierResult(long[] barriers, State state, List<byte[]> parentRegionNames) { 2051 this.barriers = barriers; 2052 this.state = state; 2053 this.parentRegionNames = parentRegionNames; 2054 } 2055 2056 public long[] getBarriers() { 2057 return barriers; 2058 } 2059 2060 public RegionState.State getState() { 2061 return state; 2062 } 2063 2064 public List<byte[]> getParentRegionNames() { 2065 return parentRegionNames; 2066 } 2067 2068 @Override 2069 public String toString() { 2070 return "ReplicationBarrierResult [barriers=" + Arrays.toString(barriers) + ", state=" + 2071 state + ", parentRegionNames=" + 2072 parentRegionNames.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", ")) + 2073 "]"; 2074 } 2075 } 2076 2077 private static long getReplicationBarrier(Cell c) { 2078 return Bytes.toLong(c.getValueArray(), c.getValueOffset(), c.getValueLength()); 2079 } 2080 2081 public static long[] getReplicationBarriers(Result result) { 2082 return result.getColumnCells(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2083 .stream().mapToLong(MetaTableAccessor::getReplicationBarrier).sorted().distinct().toArray(); 2084 } 2085 2086 private static ReplicationBarrierResult getReplicationBarrierResult(Result result) { 2087 long[] barriers = getReplicationBarriers(result); 2088 byte[] stateBytes = result.getValue(getCatalogFamily(), getRegionStateColumn()); 2089 RegionState.State state = 2090 stateBytes != null ? RegionState.State.valueOf(Bytes.toString(stateBytes)) : null; 2091 byte[] parentRegionsBytes = 2092 result.getValue(HConstants.REPLICATION_BARRIER_FAMILY, REPLICATION_PARENT_QUALIFIER); 2093 List<byte[]> parentRegionNames = 2094 parentRegionsBytes != null ? parseParentsBytes(parentRegionsBytes) : Collections.emptyList(); 2095 return new ReplicationBarrierResult(barriers, state, parentRegionNames); 2096 } 2097 2098 public static ReplicationBarrierResult getReplicationBarrierResult(Connection conn, 2099 TableName tableName, byte[] row, byte[] encodedRegionName) throws IOException { 2100 byte[] metaStartKey = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 2101 byte[] metaStopKey = 2102 RegionInfo.createRegionName(tableName, HConstants.EMPTY_START_ROW, "", false); 2103 Scan scan = new Scan().withStartRow(metaStartKey).withStopRow(metaStopKey) 2104 .addColumn(getCatalogFamily(), getRegionStateColumn()) 2105 .addFamily(HConstants.REPLICATION_BARRIER_FAMILY).readAllVersions().setReversed(true) 2106 .setCaching(10); 2107 try (Table table = getMetaHTable(conn); ResultScanner scanner = table.getScanner(scan)) { 2108 for (Result result;;) { 2109 result = scanner.next(); 2110 if (result == null) { 2111 return new ReplicationBarrierResult(new long[0], null, Collections.emptyList()); 2112 } 2113 byte[] regionName = result.getRow(); 2114 // TODO: we may look up a region which has already been split or merged so we need to check 2115 // whether the encoded name matches. Need to find a way to quit earlier when there is no 2116 // record for the given region, for now it will scan to the end of the table. 2117 if (!Bytes.equals(encodedRegionName, 2118 Bytes.toBytes(RegionInfo.encodeRegionName(regionName)))) { 2119 continue; 2120 } 2121 return getReplicationBarrierResult(result); 2122 } 2123 } 2124 } 2125 2126 public static long[] getReplicationBarrier(Connection conn, byte[] regionName) 2127 throws IOException { 2128 try (Table table = getMetaHTable(conn)) { 2129 Result result = table.get(new Get(regionName) 2130 .addColumn(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2131 .readAllVersions()); 2132 return getReplicationBarriers(result); 2133 } 2134 } 2135 2136 public static List<Pair<String, Long>> getTableEncodedRegionNameAndLastBarrier(Connection conn, 2137 TableName tableName) throws IOException { 2138 List<Pair<String, Long>> list = new ArrayList<>(); 2139 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2140 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, r -> { 2141 byte[] value = 2142 r.getValue(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER); 2143 if (value == null) { 2144 return true; 2145 } 2146 long lastBarrier = Bytes.toLong(value); 2147 String encodedRegionName = RegionInfo.encodeRegionName(r.getRow()); 2148 list.add(Pair.newPair(encodedRegionName, lastBarrier)); 2149 return true; 2150 }); 2151 return list; 2152 } 2153 2154 public static List<String> getTableEncodedRegionNamesForSerialReplication(Connection conn, 2155 TableName tableName) throws IOException { 2156 List<String> list = new ArrayList<>(); 2157 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2158 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, 2159 new FirstKeyOnlyFilter(), Integer.MAX_VALUE, r -> { 2160 list.add(RegionInfo.encodeRegionName(r.getRow())); 2161 return true; 2162 }); 2163 return list; 2164 } 2165 2166 private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException { 2167 if (!METALOG.isDebugEnabled()) { 2168 return; 2169 } 2170 // Logging each mutation in separate line makes it easier to see diff between them visually 2171 // because of common starting indentation. 2172 for (Mutation mutation : mutations) { 2173 debugLogMutation(mutation); 2174 } 2175 } 2176 2177 private static void debugLogMutation(Mutation p) throws IOException { 2178 METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON()); 2179 } 2180 2181 private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException { 2182 return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 2183 .setRow(p.getRow()) 2184 .setFamily(HConstants.CATALOG_FAMILY) 2185 .setQualifier(getSeqNumColumn(replicaId)) 2186 .setTimestamp(p.getTimestamp()) 2187 .setType(Type.Put) 2188 .setValue(Bytes.toBytes(openSeqNum)) 2189 .build()); 2190 } 2191}