001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import edu.umd.cs.findbugs.annotations.NonNull; 021import edu.umd.cs.findbugs.annotations.Nullable; 022import java.io.ByteArrayOutputStream; 023import java.io.Closeable; 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.Collections; 029import java.util.Iterator; 030import java.util.LinkedHashMap; 031import java.util.List; 032import java.util.Map; 033import java.util.NavigableMap; 034import java.util.Set; 035import java.util.SortedMap; 036import java.util.TreeMap; 037import java.util.regex.Matcher; 038import java.util.regex.Pattern; 039import java.util.stream.Collectors; 040import org.apache.hadoop.conf.Configuration; 041import org.apache.hadoop.hbase.Cell.Type; 042import org.apache.hadoop.hbase.client.Connection; 043import org.apache.hadoop.hbase.client.ConnectionFactory; 044import org.apache.hadoop.hbase.client.Consistency; 045import org.apache.hadoop.hbase.client.Delete; 046import org.apache.hadoop.hbase.client.Get; 047import org.apache.hadoop.hbase.client.Mutation; 048import org.apache.hadoop.hbase.client.Put; 049import org.apache.hadoop.hbase.client.RegionInfo; 050import org.apache.hadoop.hbase.client.RegionInfoBuilder; 051import org.apache.hadoop.hbase.client.RegionLocator; 052import org.apache.hadoop.hbase.client.RegionReplicaUtil; 053import org.apache.hadoop.hbase.client.RegionServerCallable; 054import org.apache.hadoop.hbase.client.Result; 055import org.apache.hadoop.hbase.client.ResultScanner; 056import org.apache.hadoop.hbase.client.Scan; 057import org.apache.hadoop.hbase.client.Table; 058import org.apache.hadoop.hbase.client.TableState; 059import org.apache.hadoop.hbase.exceptions.DeserializationException; 060import org.apache.hadoop.hbase.filter.Filter; 061import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; 062import org.apache.hadoop.hbase.filter.RowFilter; 063import org.apache.hadoop.hbase.filter.SubstringComparator; 064import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel; 065import org.apache.hadoop.hbase.master.RegionState; 066import org.apache.hadoop.hbase.master.RegionState.State; 067import org.apache.hadoop.hbase.protobuf.ProtobufUtil; 068import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; 069import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier; 070import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionSpecifier.RegionSpecifierType; 071import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos; 072import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest; 073import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse; 074import org.apache.hadoop.hbase.util.Bytes; 075import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 076import org.apache.hadoop.hbase.util.ExceptionUtil; 077import org.apache.hadoop.hbase.util.Pair; 078import org.apache.hadoop.hbase.util.PairOfSameType; 079import org.apache.yetus.audience.InterfaceAudience; 080import org.slf4j.Logger; 081import org.slf4j.LoggerFactory; 082 083import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 084 085/** 086 * <p> 087 * Read/write operations on <code>hbase:meta</code> region as well as assignment information stored 088 * to <code>hbase:meta</code>. 089 * </p> 090 * <p> 091 * Some of the methods of this class take ZooKeeperWatcher as a param. The only reason for this is 092 * when this class is used on client-side (e.g. HBaseAdmin), we want to use short-lived connection 093 * (opened before each operation, closed right after), while when used on HM or HRS (like in 094 * AssignmentManager) we want permanent connection. 095 * </p> 096 * <p> 097 * HBASE-10070 adds a replicaId to HRI, meaning more than one HRI can be defined for the same table 098 * range (table, startKey, endKey). For every range, there will be at least one HRI defined which is 099 * called default replica. 100 * </p> 101 * <p> 102 * <h2>Meta layout</h2> 103 * 104 * <pre> 105 * For each table there is single row named for the table with a 'table' column family. 106 * The column family currently has one column in it, the 'state' column: 107 * 108 * table:state => contains table state 109 * 110 * Then for each table range ('Region'), there is a single row, formatted as: 111 * <tableName>,<startKey>,<regionId>,<encodedRegionName>. 112 * This row is the serialized regionName of the default region replica. 113 * Columns are: 114 * info:regioninfo => contains serialized HRI for the default region replica 115 * info:server => contains hostname:port (in string form) for the server hosting 116 * the default regionInfo replica 117 * info:server_<replicaId> => contains hostname:port (in string form) for the server hosting 118 * the regionInfo replica with replicaId 119 * info:serverstartcode => contains server start code (in binary long form) for the server 120 * hosting the default regionInfo replica 121 * info:serverstartcode_<replicaId> => contains server start code (in binary long form) for 122 * the server hosting the regionInfo replica with 123 * replicaId 124 * info:seqnumDuringOpen => contains seqNum (in binary long form) for the region at the time 125 * the server opened the region with default replicaId 126 * info:seqnumDuringOpen_<replicaId> => contains seqNum (in binary long form) for the region 127 * at the time the server opened the region with 128 * replicaId 129 * info:splitA => contains a serialized HRI for the first daughter region if the 130 * region is split 131 * info:splitB => contains a serialized HRI for the second daughter region if the 132 * region is split 133 * info:merge* => contains a serialized HRI for a merge parent region. There will be two 134 * or more of these columns in a row. A row that has these columns is 135 * undergoing a merge and is the result of the merge. Columns listed 136 * in marge* columns are the parents of this merged region. Example 137 * columns: info:merge0001, info:merge0002. You make also see 'mergeA', 138 * and 'mergeB'. This is old form replaced by the new format that allows 139 * for more than two parents to be merged at a time. 140 * TODO: Add rep_barrier for serial replication explaination. 141 * </pre> 142 * </p> 143 * <p> 144 * The actual layout of meta should be encapsulated inside MetaTableAccessor methods, and should not 145 * leak out of it (through Result objects, etc) 146 * </p> 147 */ 148@InterfaceAudience.Private 149public class MetaTableAccessor { 150 151 private static final Logger LOG = LoggerFactory.getLogger(MetaTableAccessor.class); 152 private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META"); 153 154 @VisibleForTesting 155 public static final byte[] REPLICATION_PARENT_QUALIFIER = Bytes.toBytes("parent"); 156 157 private static final byte ESCAPE_BYTE = (byte) 0xFF; 158 159 private static final byte SEPARATED_BYTE = 0x00; 160 161 @InterfaceAudience.Private 162 public enum QueryType { 163 ALL(HConstants.TABLE_FAMILY, HConstants.CATALOG_FAMILY), 164 REGION(HConstants.CATALOG_FAMILY), 165 TABLE(HConstants.TABLE_FAMILY), 166 REPLICATION(HConstants.REPLICATION_BARRIER_FAMILY); 167 168 private final byte[][] families; 169 170 QueryType(byte[]... families) { 171 this.families = families; 172 } 173 174 byte[][] getFamilies() { 175 return this.families; 176 } 177 } 178 179 /** The delimiter for meta columns for replicaIds > 0 */ 180 static final char META_REPLICA_ID_DELIMITER = '_'; 181 182 /** A regex for parsing server columns from meta. See above javadoc for meta layout */ 183 private static final Pattern SERVER_COLUMN_PATTERN 184 = Pattern.compile("^server(_[0-9a-fA-F]{4})?$"); 185 186 //////////////////////// 187 // Reading operations // 188 //////////////////////// 189 190 /** 191 * Performs a full scan of <code>hbase:meta</code> for regions. 192 * @param connection connection we're using 193 * @param visitor Visitor invoked against each row in regions family. 194 */ 195 public static void fullScanRegions(Connection connection, final Visitor visitor) 196 throws IOException { 197 scanMeta(connection, null, null, QueryType.REGION, visitor); 198 } 199 200 /** 201 * Performs a full scan of <code>hbase:meta</code> for regions. 202 * @param connection connection we're using 203 */ 204 public static List<Result> fullScanRegions(Connection connection) throws IOException { 205 return fullScan(connection, QueryType.REGION); 206 } 207 208 /** 209 * Performs a full scan of <code>hbase:meta</code> for tables. 210 * @param connection connection we're using 211 * @param visitor Visitor invoked against each row in tables family. 212 */ 213 public static void fullScanTables(Connection connection, final Visitor visitor) 214 throws IOException { 215 scanMeta(connection, null, null, QueryType.TABLE, visitor); 216 } 217 218 /** 219 * Performs a full scan of <code>hbase:meta</code>. 220 * @param connection connection we're using 221 * @param type scanned part of meta 222 * @return List of {@link Result} 223 */ 224 private static List<Result> fullScan(Connection connection, QueryType type) throws IOException { 225 CollectAllVisitor v = new CollectAllVisitor(); 226 scanMeta(connection, null, null, type, v); 227 return v.getResults(); 228 } 229 230 /** 231 * Callers should call close on the returned {@link Table} instance. 232 * @param connection connection we're using to access Meta 233 * @return An {@link Table} for <code>hbase:meta</code> 234 */ 235 public static Table getMetaHTable(final Connection connection) 236 throws IOException { 237 // We used to pass whole CatalogTracker in here, now we just pass in Connection 238 if (connection == null) { 239 throw new NullPointerException("No connection"); 240 } else if (connection.isClosed()) { 241 throw new IOException("connection is closed"); 242 } 243 return connection.getTable(TableName.META_TABLE_NAME); 244 } 245 246 /** 247 * @param t Table to use (will be closed when done). 248 * @param g Get to run 249 */ 250 private static Result get(final Table t, final Get g) throws IOException { 251 if (t == null) return null; 252 try { 253 return t.get(g); 254 } finally { 255 t.close(); 256 } 257 } 258 259 /** 260 * Gets the region info and assignment for the specified region. 261 * @param connection connection we're using 262 * @param regionName Region to lookup. 263 * @return Location and RegionInfo for <code>regionName</code> 264 * @deprecated use {@link #getRegionLocation(Connection, byte[])} instead 265 */ 266 @Deprecated 267 public static Pair<RegionInfo, ServerName> getRegion(Connection connection, byte [] regionName) 268 throws IOException { 269 HRegionLocation location = getRegionLocation(connection, regionName); 270 return location == null 271 ? null 272 : new Pair<>(location.getRegionInfo(), location.getServerName()); 273 } 274 275 /** 276 * Returns the HRegionLocation from meta for the given region 277 * @param connection connection we're using 278 * @param regionName region we're looking for 279 * @return HRegionLocation for the given region 280 */ 281 public static HRegionLocation getRegionLocation(Connection connection, byte[] regionName) 282 throws IOException { 283 byte[] row = regionName; 284 RegionInfo parsedInfo = null; 285 try { 286 parsedInfo = parseRegionInfoFromRegionName(regionName); 287 row = getMetaKeyForRegion(parsedInfo); 288 } catch (Exception parseEx) { 289 // Ignore. This is used with tableName passed as regionName. 290 } 291 Get get = new Get(row); 292 get.addFamily(HConstants.CATALOG_FAMILY); 293 Result r = get(getMetaHTable(connection), get); 294 RegionLocations locations = getRegionLocations(r); 295 return locations == null ? null 296 : locations.getRegionLocation(parsedInfo == null ? 0 : parsedInfo.getReplicaId()); 297 } 298 299 /** 300 * Returns the HRegionLocation from meta for the given region 301 * @param connection connection we're using 302 * @param regionInfo region information 303 * @return HRegionLocation for the given region 304 */ 305 public static HRegionLocation getRegionLocation(Connection connection, RegionInfo regionInfo) 306 throws IOException { 307 byte[] row = getMetaKeyForRegion(regionInfo); 308 Get get = new Get(row); 309 get.addFamily(HConstants.CATALOG_FAMILY); 310 Result r = get(getMetaHTable(connection), get); 311 return getRegionLocation(r, regionInfo, regionInfo.getReplicaId()); 312 } 313 314 /** Returns the row key to use for this regionInfo */ 315 public static byte[] getMetaKeyForRegion(RegionInfo regionInfo) { 316 return RegionReplicaUtil.getRegionInfoForDefaultReplica(regionInfo).getRegionName(); 317 } 318 319 /** Returns an HRI parsed from this regionName. Not all the fields of the HRI 320 * is stored in the name, so the returned object should only be used for the fields 321 * in the regionName. 322 */ 323 public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException { 324 byte[][] fields = RegionInfo.parseRegionName(regionName); 325 long regionId = Long.parseLong(Bytes.toString(fields[2])); 326 int replicaId = fields.length > 3 ? Integer.parseInt(Bytes.toString(fields[3]), 16) : 0; 327 return RegionInfoBuilder.newBuilder(TableName.valueOf(fields[0])) 328 .setStartKey(fields[1]) 329 .setEndKey(fields[2]) 330 .setSplit(false) 331 .setRegionId(regionId) 332 .setReplicaId(replicaId) 333 .build(); 334 } 335 336 /** 337 * Gets the result in hbase:meta for the specified region. 338 * @param connection connection we're using 339 * @param regionName region we're looking for 340 * @return result of the specified region 341 */ 342 public static Result getRegionResult(Connection connection, 343 byte[] regionName) throws IOException { 344 Get get = new Get(regionName); 345 get.addFamily(HConstants.CATALOG_FAMILY); 346 return get(getMetaHTable(connection), get); 347 } 348 349 /** 350 * Scans META table for a row whose key contains the specified <B>regionEncodedName</B>, 351 * returning a single related <code>Result</code> instance if any row is found, null otherwise. 352 * 353 * @param connection the connection to query META table. 354 * @param regionEncodedName the region encoded name to look for at META. 355 * @return <code>Result</code> instance with the row related info in META, null otherwise. 356 * @throws IOException if any errors occur while querying META. 357 */ 358 public static Result scanByRegionEncodedName(Connection connection, 359 String regionEncodedName) throws IOException { 360 RowFilter rowFilter = new RowFilter(CompareOperator.EQUAL, 361 new SubstringComparator(regionEncodedName)); 362 Scan scan = getMetaScan(connection, 1); 363 scan.setFilter(rowFilter); 364 ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan); 365 return resultScanner.next(); 366 } 367 368 /** 369 * @return Return all regioninfos listed in the 'info:merge*' columns of 370 * the <code>regionName</code> row. 371 */ 372 @Nullable 373 public static List<RegionInfo> getMergeRegions(Connection connection, byte[] regionName) 374 throws IOException { 375 return getMergeRegions(getRegionResult(connection, regionName).rawCells()); 376 } 377 378 /** 379 * @return Deserialized regioninfo values taken from column values that match 380 * the regex 'info:merge.*' in array of <code>cells</code>. 381 */ 382 @Nullable 383 public static List<RegionInfo> getMergeRegions(Cell [] cells) { 384 if (cells == null) { 385 return null; 386 } 387 List<RegionInfo> regionsToMerge = null; 388 for (Cell cell: cells) { 389 if (!isMergeQualifierPrefix(cell)) { 390 continue; 391 } 392 // Ok. This cell is that of a info:merge* column. 393 RegionInfo ri = RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(), 394 cell.getValueLength()); 395 if (ri != null) { 396 if (regionsToMerge == null) { 397 regionsToMerge = new ArrayList<>(); 398 } 399 regionsToMerge.add(ri); 400 } 401 } 402 return regionsToMerge; 403 } 404 405 /** 406 * @return True if any merge regions present in <code>cells</code>; i.e. 407 * the column in <code>cell</code> matches the regex 'info:merge.*'. 408 */ 409 public static boolean hasMergeRegions(Cell [] cells) { 410 for (Cell cell: cells) { 411 if (!isMergeQualifierPrefix(cell)) { 412 continue; 413 } 414 return true; 415 } 416 return false; 417 } 418 419 /** 420 * @return True if the column in <code>cell</code> matches the regex 'info:merge.*'. 421 */ 422 private static boolean isMergeQualifierPrefix(Cell cell) { 423 // Check to see if has family and that qualifier starts with the merge qualifier 'merge' 424 return CellUtil.matchingFamily(cell, HConstants.CATALOG_FAMILY) && 425 PrivateCellUtil.qualifierStartsWith(cell, HConstants.MERGE_QUALIFIER_PREFIX); 426 } 427 428 /** 429 * Checks if the specified table exists. Looks at the hbase:meta table hosted on 430 * the specified server. 431 * @param connection connection we're using 432 * @param tableName table to check 433 * @return true if the table exists in meta, false if not 434 */ 435 public static boolean tableExists(Connection connection, 436 final TableName tableName) 437 throws IOException { 438 // Catalog tables always exist. 439 return tableName.equals(TableName.META_TABLE_NAME) || 440 getTableState(connection, tableName) != null; 441 } 442 443 /** 444 * Lists all of the regions currently in META. 445 * 446 * @param connection to connect with 447 * @param excludeOfflinedSplitParents False if we are to include offlined/splitparents regions, 448 * true and we'll leave out offlined regions from returned list 449 * @return List of all user-space regions. 450 */ 451 @VisibleForTesting 452 public static List<RegionInfo> getAllRegions(Connection connection, 453 boolean excludeOfflinedSplitParents) 454 throws IOException { 455 List<Pair<RegionInfo, ServerName>> result; 456 457 result = getTableRegionsAndLocations(connection, null, 458 excludeOfflinedSplitParents); 459 460 return getListOfRegionInfos(result); 461 462 } 463 464 /** 465 * Gets all of the regions of the specified table. Do not use this method 466 * to get meta table regions, use methods in MetaTableLocator instead. 467 * @param connection connection we're using 468 * @param tableName table we're looking for 469 * @return Ordered list of {@link RegionInfo}. 470 */ 471 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName) 472 throws IOException { 473 return getTableRegions(connection, tableName, false); 474 } 475 476 /** 477 * Gets all of the regions of the specified table. Do not use this method 478 * to get meta table regions, use methods in MetaTableLocator instead. 479 * @param connection connection we're using 480 * @param tableName table we're looking for 481 * @param excludeOfflinedSplitParents If true, do not include offlined split 482 * parents in the return. 483 * @return Ordered list of {@link RegionInfo}. 484 */ 485 public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName, 486 final boolean excludeOfflinedSplitParents) throws IOException { 487 List<Pair<RegionInfo, ServerName>> result = 488 getTableRegionsAndLocations(connection, tableName, excludeOfflinedSplitParents); 489 return getListOfRegionInfos(result); 490 } 491 492 private static List<RegionInfo> getListOfRegionInfos( 493 final List<Pair<RegionInfo, ServerName>> pairs) { 494 if (pairs == null || pairs.isEmpty()) { 495 return Collections.emptyList(); 496 } 497 List<RegionInfo> result = new ArrayList<>(pairs.size()); 498 for (Pair<RegionInfo, ServerName> pair : pairs) { 499 result.add(pair.getFirst()); 500 } 501 return result; 502 } 503 504 /** 505 * @param tableName table we're working with 506 * @return start row for scanning META according to query type 507 */ 508 public static byte[] getTableStartRowForMeta(TableName tableName, QueryType type) { 509 if (tableName == null) { 510 return null; 511 } 512 switch (type) { 513 case REGION: 514 byte[] startRow = new byte[tableName.getName().length + 2]; 515 System.arraycopy(tableName.getName(), 0, startRow, 0, tableName.getName().length); 516 startRow[startRow.length - 2] = HConstants.DELIMITER; 517 startRow[startRow.length - 1] = HConstants.DELIMITER; 518 return startRow; 519 case ALL: 520 case TABLE: 521 default: 522 return tableName.getName(); 523 } 524 } 525 526 /** 527 * @param tableName table we're working with 528 * @return stop row for scanning META according to query type 529 */ 530 public static byte[] getTableStopRowForMeta(TableName tableName, QueryType type) { 531 if (tableName == null) { 532 return null; 533 } 534 final byte[] stopRow; 535 switch (type) { 536 case REGION: 537 stopRow = new byte[tableName.getName().length + 3]; 538 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 539 stopRow[stopRow.length - 3] = ' '; 540 stopRow[stopRow.length - 2] = HConstants.DELIMITER; 541 stopRow[stopRow.length - 1] = HConstants.DELIMITER; 542 break; 543 case ALL: 544 case TABLE: 545 default: 546 stopRow = new byte[tableName.getName().length + 1]; 547 System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length); 548 stopRow[stopRow.length - 1] = ' '; 549 break; 550 } 551 return stopRow; 552 } 553 554 /** 555 * This method creates a Scan object that will only scan catalog rows that 556 * belong to the specified table. It doesn't specify any columns. 557 * This is a better alternative to just using a start row and scan until 558 * it hits a new table since that requires parsing the HRI to get the table 559 * name. 560 * @param tableName bytes of table's name 561 * @return configured Scan object 562 */ 563 @Deprecated 564 public static Scan getScanForTableName(Connection connection, TableName tableName) { 565 // Start key is just the table name with delimiters 566 byte[] startKey = getTableStartRowForMeta(tableName, QueryType.REGION); 567 // Stop key appends the smallest possible char to the table name 568 byte[] stopKey = getTableStopRowForMeta(tableName, QueryType.REGION); 569 570 Scan scan = getMetaScan(connection, -1); 571 scan.setStartRow(startKey); 572 scan.setStopRow(stopKey); 573 return scan; 574 } 575 576 private static Scan getMetaScan(Connection connection, int rowUpperLimit) { 577 Scan scan = new Scan(); 578 int scannerCaching = connection.getConfiguration() 579 .getInt(HConstants.HBASE_META_SCANNER_CACHING, 580 HConstants.DEFAULT_HBASE_META_SCANNER_CACHING); 581 if (connection.getConfiguration().getBoolean(HConstants.USE_META_REPLICAS, 582 HConstants.DEFAULT_USE_META_REPLICAS)) { 583 scan.setConsistency(Consistency.TIMELINE); 584 } 585 if (rowUpperLimit > 0) { 586 scan.setLimit(rowUpperLimit); 587 scan.setReadType(Scan.ReadType.PREAD); 588 } 589 scan.setCaching(scannerCaching); 590 return scan; 591 } 592 /** 593 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 594 * @param connection connection we're using 595 * @param tableName table we're looking for 596 * @return Return list of regioninfos and server. 597 */ 598 public static List<Pair<RegionInfo, ServerName>> 599 getTableRegionsAndLocations(Connection connection, TableName tableName) 600 throws IOException { 601 return getTableRegionsAndLocations(connection, tableName, true); 602 } 603 604 /** 605 * Do not use this method to get meta table regions, use methods in MetaTableLocator instead. 606 * @param connection connection we're using 607 * @param tableName table to work with, can be null for getting all regions 608 * @param excludeOfflinedSplitParents don't return split parents 609 * @return Return list of regioninfos and server addresses. 610 */ 611 public static List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations( 612 Connection connection, @Nullable final TableName tableName, 613 final boolean excludeOfflinedSplitParents) throws IOException { 614 if (tableName != null && tableName.equals(TableName.META_TABLE_NAME)) { 615 throw new IOException("This method can't be used to locate meta regions;" 616 + " use MetaTableLocator instead"); 617 } 618 // Make a version of CollectingVisitor that collects RegionInfo and ServerAddress 619 CollectingVisitor<Pair<RegionInfo, ServerName>> visitor = 620 new CollectingVisitor<Pair<RegionInfo, ServerName>>() { 621 private RegionLocations current = null; 622 623 @Override 624 public boolean visit(Result r) throws IOException { 625 current = getRegionLocations(r); 626 if (current == null || current.getRegionLocation().getRegion() == null) { 627 LOG.warn("No serialized RegionInfo in " + r); 628 return true; 629 } 630 RegionInfo hri = current.getRegionLocation().getRegion(); 631 if (excludeOfflinedSplitParents && hri.isSplitParent()) return true; 632 // Else call super and add this Result to the collection. 633 return super.visit(r); 634 } 635 636 @Override 637 void add(Result r) { 638 if (current == null) { 639 return; 640 } 641 for (HRegionLocation loc : current.getRegionLocations()) { 642 if (loc != null) { 643 this.results.add(new Pair<>(loc.getRegion(), loc.getServerName())); 644 } 645 } 646 } 647 }; 648 scanMeta(connection, 649 getTableStartRowForMeta(tableName, QueryType.REGION), 650 getTableStopRowForMeta(tableName, QueryType.REGION), 651 QueryType.REGION, visitor); 652 return visitor.getResults(); 653 } 654 655 /** 656 * @param connection connection we're using 657 * @param serverName server whose regions we're interested in 658 * @return List of user regions installed on this server (does not include 659 * catalog regions). 660 * @throws IOException 661 */ 662 public static NavigableMap<RegionInfo, Result> 663 getServerUserRegions(Connection connection, final ServerName serverName) 664 throws IOException { 665 final NavigableMap<RegionInfo, Result> hris = new TreeMap<>(); 666 // Fill the above hris map with entries from hbase:meta that have the passed 667 // servername. 668 CollectingVisitor<Result> v = new CollectingVisitor<Result>() { 669 @Override 670 void add(Result r) { 671 if (r == null || r.isEmpty()) return; 672 RegionLocations locations = getRegionLocations(r); 673 if (locations == null) return; 674 for (HRegionLocation loc : locations.getRegionLocations()) { 675 if (loc != null) { 676 if (loc.getServerName() != null && loc.getServerName().equals(serverName)) { 677 hris.put(loc.getRegion(), r); 678 } 679 } 680 } 681 } 682 }; 683 scanMeta(connection, null, null, QueryType.REGION, v); 684 return hris; 685 } 686 687 public static void fullScanMetaAndPrint(Connection connection) 688 throws IOException { 689 Visitor v = r -> { 690 if (r == null || r.isEmpty()) { 691 return true; 692 } 693 LOG.info("fullScanMetaAndPrint.Current Meta Row: " + r); 694 TableState state = getTableState(r); 695 if (state != null) { 696 LOG.info("fullScanMetaAndPrint.Table State={}" + state); 697 } else { 698 RegionLocations locations = getRegionLocations(r); 699 if (locations == null) { 700 return true; 701 } 702 for (HRegionLocation loc : locations.getRegionLocations()) { 703 if (loc != null) { 704 LOG.info("fullScanMetaAndPrint.HRI Print={}", loc.getRegion()); 705 } 706 } 707 } 708 return true; 709 }; 710 scanMeta(connection, null, null, QueryType.ALL, v); 711 } 712 713 public static void scanMetaForTableRegions(Connection connection, Visitor visitor, 714 TableName tableName) throws IOException { 715 scanMeta(connection, tableName, QueryType.REGION, Integer.MAX_VALUE, visitor); 716 } 717 718 private static void scanMeta(Connection connection, TableName table, QueryType type, int maxRows, 719 final Visitor visitor) throws IOException { 720 scanMeta(connection, getTableStartRowForMeta(table, type), getTableStopRowForMeta(table, type), 721 type, maxRows, visitor); 722 } 723 724 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 725 @Nullable final byte[] stopRow, QueryType type, final Visitor visitor) throws IOException { 726 scanMeta(connection, startRow, stopRow, type, Integer.MAX_VALUE, visitor); 727 } 728 729 /** 730 * Performs a scan of META table for given table starting from given row. 731 * @param connection connection we're using 732 * @param visitor visitor to call 733 * @param tableName table withing we scan 734 * @param row start scan from this row 735 * @param rowLimit max number of rows to return 736 */ 737 public static void scanMeta(Connection connection, final Visitor visitor, 738 final TableName tableName, final byte[] row, final int rowLimit) throws IOException { 739 byte[] startRow = null; 740 byte[] stopRow = null; 741 if (tableName != null) { 742 startRow = getTableStartRowForMeta(tableName, QueryType.REGION); 743 if (row != null) { 744 RegionInfo closestRi = getClosestRegionInfo(connection, tableName, row); 745 startRow = 746 RegionInfo.createRegionName(tableName, closestRi.getStartKey(), HConstants.ZEROES, false); 747 } 748 stopRow = getTableStopRowForMeta(tableName, QueryType.REGION); 749 } 750 scanMeta(connection, startRow, stopRow, QueryType.REGION, rowLimit, visitor); 751 } 752 753 /** 754 * Performs a scan of META table. 755 * @param connection connection we're using 756 * @param startRow Where to start the scan. Pass null if want to begin scan 757 * at first row. 758 * @param stopRow Where to stop the scan. Pass null if want to scan all rows 759 * from the start one 760 * @param type scanned part of meta 761 * @param maxRows maximum rows to return 762 * @param visitor Visitor invoked against each row. 763 */ 764 static void scanMeta(Connection connection, @Nullable final byte[] startRow, 765 @Nullable final byte[] stopRow, QueryType type, int maxRows, final Visitor visitor) 766 throws IOException { 767 scanMeta(connection, startRow, stopRow, type, null, maxRows, visitor); 768 } 769 770 private static void scanMeta(Connection connection, @Nullable final byte[] startRow, 771 @Nullable final byte[] stopRow, QueryType type, @Nullable Filter filter, int maxRows, 772 final Visitor visitor) throws IOException { 773 int rowUpperLimit = maxRows > 0 ? maxRows : Integer.MAX_VALUE; 774 Scan scan = getMetaScan(connection, rowUpperLimit); 775 776 for (byte[] family : type.getFamilies()) { 777 scan.addFamily(family); 778 } 779 if (startRow != null) { 780 scan.withStartRow(startRow); 781 } 782 if (stopRow != null) { 783 scan.withStopRow(stopRow); 784 } 785 if (filter != null) { 786 scan.setFilter(filter); 787 } 788 789 if (LOG.isTraceEnabled()) { 790 LOG.trace("Scanning META" + " starting at row=" + Bytes.toStringBinary(startRow) + 791 " stopping at row=" + Bytes.toStringBinary(stopRow) + " for max=" + rowUpperLimit + 792 " with caching=" + scan.getCaching()); 793 } 794 795 int currentRow = 0; 796 try (Table metaTable = getMetaHTable(connection)) { 797 try (ResultScanner scanner = metaTable.getScanner(scan)) { 798 Result data; 799 while ((data = scanner.next()) != null) { 800 if (data.isEmpty()) continue; 801 // Break if visit returns false. 802 if (!visitor.visit(data)) break; 803 if (++currentRow >= rowUpperLimit) break; 804 } 805 } 806 } 807 if (visitor instanceof Closeable) { 808 try { 809 ((Closeable) visitor).close(); 810 } catch (Throwable t) { 811 ExceptionUtil.rethrowIfInterrupt(t); 812 LOG.debug("Got exception in closing the meta scanner visitor", t); 813 } 814 } 815 } 816 817 /** 818 * @return Get closest metatable region row to passed <code>row</code> 819 */ 820 @NonNull 821 private static RegionInfo getClosestRegionInfo(Connection connection, 822 @NonNull final TableName tableName, @NonNull final byte[] row) throws IOException { 823 byte[] searchRow = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 824 Scan scan = getMetaScan(connection, 1); 825 scan.setReversed(true); 826 scan.withStartRow(searchRow); 827 try (ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan)) { 828 Result result = resultScanner.next(); 829 if (result == null) { 830 throw new TableNotFoundException("Cannot find row in META " + 831 " for table: " + tableName + ", row=" + Bytes.toStringBinary(row)); 832 } 833 RegionInfo regionInfo = getRegionInfo(result); 834 if (regionInfo == null) { 835 throw new IOException("RegionInfo was null or empty in Meta for " + 836 tableName + ", row=" + Bytes.toStringBinary(row)); 837 } 838 return regionInfo; 839 } 840 } 841 842 /** 843 * Returns the column family used for meta columns. 844 * @return HConstants.CATALOG_FAMILY. 845 */ 846 public static byte[] getCatalogFamily() { 847 return HConstants.CATALOG_FAMILY; 848 } 849 850 /** 851 * Returns the column family used for table columns. 852 * @return HConstants.TABLE_FAMILY. 853 */ 854 private static byte[] getTableFamily() { 855 return HConstants.TABLE_FAMILY; 856 } 857 858 /** 859 * Returns the column qualifier for serialized region info 860 * @return HConstants.REGIONINFO_QUALIFIER 861 */ 862 public static byte[] getRegionInfoColumn() { 863 return HConstants.REGIONINFO_QUALIFIER; 864 } 865 866 /** 867 * Returns the column qualifier for serialized table state 868 * @return HConstants.TABLE_STATE_QUALIFIER 869 */ 870 private static byte[] getTableStateColumn() { 871 return HConstants.TABLE_STATE_QUALIFIER; 872 } 873 874 /** 875 * Returns the column qualifier for serialized region state 876 * @return HConstants.STATE_QUALIFIER 877 */ 878 private static byte[] getRegionStateColumn() { 879 return HConstants.STATE_QUALIFIER; 880 } 881 882 /** 883 * Returns the column qualifier for serialized region state 884 * @param replicaId the replicaId of the region 885 * @return a byte[] for state qualifier 886 */ 887 @VisibleForTesting 888 static byte[] getRegionStateColumn(int replicaId) { 889 return replicaId == 0 ? HConstants.STATE_QUALIFIER 890 : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 891 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 892 } 893 894 /** 895 * Returns the column qualifier for serialized region state 896 * @param replicaId the replicaId of the region 897 * @return a byte[] for sn column qualifier 898 */ 899 @VisibleForTesting 900 static byte[] getServerNameColumn(int replicaId) { 901 return replicaId == 0 ? HConstants.SERVERNAME_QUALIFIER 902 : Bytes.toBytes(HConstants.SERVERNAME_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 903 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 904 } 905 906 /** 907 * Returns the column qualifier for server column for replicaId 908 * @param replicaId the replicaId of the region 909 * @return a byte[] for server column qualifier 910 */ 911 @VisibleForTesting 912 public static byte[] getServerColumn(int replicaId) { 913 return replicaId == 0 914 ? HConstants.SERVER_QUALIFIER 915 : Bytes.toBytes(HConstants.SERVER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 916 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 917 } 918 919 /** 920 * Returns the column qualifier for server start code column for replicaId 921 * @param replicaId the replicaId of the region 922 * @return a byte[] for server start code column qualifier 923 */ 924 @VisibleForTesting 925 public static byte[] getStartCodeColumn(int replicaId) { 926 return replicaId == 0 927 ? HConstants.STARTCODE_QUALIFIER 928 : Bytes.toBytes(HConstants.STARTCODE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 929 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 930 } 931 932 /** 933 * Returns the column qualifier for seqNum column for replicaId 934 * @param replicaId the replicaId of the region 935 * @return a byte[] for seqNum column qualifier 936 */ 937 @VisibleForTesting 938 public static byte[] getSeqNumColumn(int replicaId) { 939 return replicaId == 0 940 ? HConstants.SEQNUM_QUALIFIER 941 : Bytes.toBytes(HConstants.SEQNUM_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 942 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 943 } 944 945 /** 946 * Parses the replicaId from the server column qualifier. See top of the class javadoc 947 * for the actual meta layout 948 * @param serverColumn the column qualifier 949 * @return an int for the replicaId 950 */ 951 @VisibleForTesting 952 static int parseReplicaIdFromServerColumn(byte[] serverColumn) { 953 String serverStr = Bytes.toString(serverColumn); 954 955 Matcher matcher = SERVER_COLUMN_PATTERN.matcher(serverStr); 956 if (matcher.matches() && matcher.groupCount() > 0) { 957 String group = matcher.group(1); 958 if (group != null && group.length() > 0) { 959 return Integer.parseInt(group.substring(1), 16); 960 } else { 961 return 0; 962 } 963 } 964 return -1; 965 } 966 967 /** 968 * Returns a {@link ServerName} from catalog table {@link Result}. 969 * @param r Result to pull from 970 * @return A ServerName instance or null if necessary fields not found or empty. 971 */ 972 @Nullable 973 @InterfaceAudience.Private // for use by HMaster#getTableRegionRow which is used for testing only 974 public static ServerName getServerName(final Result r, final int replicaId) { 975 byte[] serverColumn = getServerColumn(replicaId); 976 Cell cell = r.getColumnLatestCell(getCatalogFamily(), serverColumn); 977 if (cell == null || cell.getValueLength() == 0) return null; 978 String hostAndPort = Bytes.toString( 979 cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 980 byte[] startcodeColumn = getStartCodeColumn(replicaId); 981 cell = r.getColumnLatestCell(getCatalogFamily(), startcodeColumn); 982 if (cell == null || cell.getValueLength() == 0) return null; 983 try { 984 return ServerName.valueOf(hostAndPort, 985 Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())); 986 } catch (IllegalArgumentException e) { 987 LOG.error("Ignoring invalid region for server " + hostAndPort + "; cell=" + cell, e); 988 return null; 989 } 990 } 991 992 /** 993 * The latest seqnum that the server writing to meta observed when opening the region. 994 * E.g. the seqNum when the result of {@link #getServerName(Result, int)} was written. 995 * @param r Result to pull the seqNum from 996 * @return SeqNum, or HConstants.NO_SEQNUM if there's no value written. 997 */ 998 private static long getSeqNumDuringOpen(final Result r, final int replicaId) { 999 Cell cell = r.getColumnLatestCell(getCatalogFamily(), getSeqNumColumn(replicaId)); 1000 if (cell == null || cell.getValueLength() == 0) return HConstants.NO_SEQNUM; 1001 return Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 1002 } 1003 1004 /** 1005 * Returns the daughter regions by reading the corresponding columns of the catalog table 1006 * Result. 1007 * @param data a Result object from the catalog table scan 1008 * @return pair of RegionInfo or PairOfSameType(null, null) if region is not a split parent 1009 */ 1010 public static PairOfSameType<RegionInfo> getDaughterRegions(Result data) { 1011 RegionInfo splitA = getRegionInfo(data, HConstants.SPLITA_QUALIFIER); 1012 RegionInfo splitB = getRegionInfo(data, HConstants.SPLITB_QUALIFIER); 1013 return new PairOfSameType<>(splitA, splitB); 1014 } 1015 1016 /** 1017 * Returns an HRegionLocationList extracted from the result. 1018 * @return an HRegionLocationList containing all locations for the region range or null if 1019 * we can't deserialize the result. 1020 */ 1021 @Nullable 1022 public static RegionLocations getRegionLocations(final Result r) { 1023 if (r == null) return null; 1024 RegionInfo regionInfo = getRegionInfo(r, getRegionInfoColumn()); 1025 if (regionInfo == null) return null; 1026 1027 List<HRegionLocation> locations = new ArrayList<>(1); 1028 NavigableMap<byte[],NavigableMap<byte[],byte[]>> familyMap = r.getNoVersionMap(); 1029 1030 locations.add(getRegionLocation(r, regionInfo, 0)); 1031 1032 NavigableMap<byte[], byte[]> infoMap = familyMap.get(getCatalogFamily()); 1033 if (infoMap == null) return new RegionLocations(locations); 1034 1035 // iterate until all serverName columns are seen 1036 int replicaId = 0; 1037 byte[] serverColumn = getServerColumn(replicaId); 1038 SortedMap<byte[], byte[]> serverMap; 1039 serverMap = infoMap.tailMap(serverColumn, false); 1040 1041 if (serverMap.isEmpty()) return new RegionLocations(locations); 1042 1043 for (Map.Entry<byte[], byte[]> entry : serverMap.entrySet()) { 1044 replicaId = parseReplicaIdFromServerColumn(entry.getKey()); 1045 if (replicaId < 0) { 1046 break; 1047 } 1048 HRegionLocation location = getRegionLocation(r, regionInfo, replicaId); 1049 // In case the region replica is newly created, it's location might be null. We usually do not 1050 // have HRL's in RegionLocations object with null ServerName. They are handled as null HRLs. 1051 if (location.getServerName() == null) { 1052 locations.add(null); 1053 } else { 1054 locations.add(location); 1055 } 1056 } 1057 1058 return new RegionLocations(locations); 1059 } 1060 1061 /** 1062 * Returns the HRegionLocation parsed from the given meta row Result 1063 * for the given regionInfo and replicaId. The regionInfo can be the default region info 1064 * for the replica. 1065 * @param r the meta row result 1066 * @param regionInfo RegionInfo for default replica 1067 * @param replicaId the replicaId for the HRegionLocation 1068 * @return HRegionLocation parsed from the given meta row Result for the given replicaId 1069 */ 1070 private static HRegionLocation getRegionLocation(final Result r, final RegionInfo regionInfo, 1071 final int replicaId) { 1072 ServerName serverName = getServerName(r, replicaId); 1073 long seqNum = getSeqNumDuringOpen(r, replicaId); 1074 RegionInfo replicaInfo = RegionReplicaUtil.getRegionInfoForReplica(regionInfo, replicaId); 1075 return new HRegionLocation(replicaInfo, serverName, seqNum); 1076 } 1077 1078 /** 1079 * Returns RegionInfo object from the column 1080 * HConstants.CATALOG_FAMILY:HConstants.REGIONINFO_QUALIFIER of the catalog 1081 * table Result. 1082 * @param data a Result object from the catalog table scan 1083 * @return RegionInfo or null 1084 */ 1085 public static RegionInfo getRegionInfo(Result data) { 1086 return getRegionInfo(data, HConstants.REGIONINFO_QUALIFIER); 1087 } 1088 1089 /** 1090 * Returns the RegionInfo object from the column {@link HConstants#CATALOG_FAMILY} and 1091 * <code>qualifier</code> of the catalog table result. 1092 * @param r a Result object from the catalog table scan 1093 * @param qualifier Column family qualifier 1094 * @return An RegionInfo instance or null. 1095 */ 1096 @Nullable 1097 public static RegionInfo getRegionInfo(final Result r, byte [] qualifier) { 1098 Cell cell = r.getColumnLatestCell(getCatalogFamily(), qualifier); 1099 if (cell == null) return null; 1100 return RegionInfo.parseFromOrNull(cell.getValueArray(), 1101 cell.getValueOffset(), cell.getValueLength()); 1102 } 1103 1104 /** 1105 * Fetch table state for given table from META table 1106 * @param conn connection to use 1107 * @param tableName table to fetch state for 1108 */ 1109 @Nullable 1110 public static TableState getTableState(Connection conn, TableName tableName) 1111 throws IOException { 1112 if (tableName.equals(TableName.META_TABLE_NAME)) { 1113 return new TableState(tableName, TableState.State.ENABLED); 1114 } 1115 Table metaHTable = getMetaHTable(conn); 1116 Get get = new Get(tableName.getName()).addColumn(getTableFamily(), getTableStateColumn()); 1117 Result result = metaHTable.get(get); 1118 return getTableState(result); 1119 } 1120 1121 /** 1122 * Fetch table states from META table 1123 * @param conn connection to use 1124 * @return map {tableName -> state} 1125 */ 1126 public static Map<TableName, TableState> getTableStates(Connection conn) 1127 throws IOException { 1128 final Map<TableName, TableState> states = new LinkedHashMap<>(); 1129 Visitor collector = r -> { 1130 TableState state = getTableState(r); 1131 if (state != null) { 1132 states.put(state.getTableName(), state); 1133 } 1134 return true; 1135 }; 1136 fullScanTables(conn, collector); 1137 return states; 1138 } 1139 1140 /** 1141 * Updates state in META 1142 * @param conn connection to use 1143 * @param tableName table to look for 1144 */ 1145 public static void updateTableState(Connection conn, TableName tableName, 1146 TableState.State actual) throws IOException { 1147 updateTableState(conn, new TableState(tableName, actual)); 1148 } 1149 1150 /** 1151 * Decode table state from META Result. 1152 * Should contain cell from HConstants.TABLE_FAMILY 1153 * @return null if not found 1154 */ 1155 @Nullable 1156 public static TableState getTableState(Result r) throws IOException { 1157 Cell cell = r.getColumnLatestCell(getTableFamily(), getTableStateColumn()); 1158 if (cell == null) { 1159 return null; 1160 } 1161 try { 1162 return TableState.parseFrom(TableName.valueOf(r.getRow()), 1163 Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(), 1164 cell.getValueOffset() + cell.getValueLength())); 1165 } catch (DeserializationException e) { 1166 throw new IOException(e); 1167 } 1168 } 1169 1170 /** 1171 * Implementations 'visit' a catalog table row. 1172 */ 1173 public interface Visitor { 1174 /** 1175 * Visit the catalog table row. 1176 * @param r A row from catalog table 1177 * @return True if we are to proceed scanning the table, else false if 1178 * we are to stop now. 1179 */ 1180 boolean visit(final Result r) throws IOException; 1181 } 1182 1183 /** 1184 * Implementations 'visit' a catalog table row but with close() at the end. 1185 */ 1186 public interface CloseableVisitor extends Visitor, Closeable { 1187 } 1188 1189 /** 1190 * A {@link Visitor} that collects content out of passed {@link Result}. 1191 */ 1192 static abstract class CollectingVisitor<T> implements Visitor { 1193 final List<T> results = new ArrayList<>(); 1194 @Override 1195 public boolean visit(Result r) throws IOException { 1196 if (r != null && !r.isEmpty()) { 1197 add(r); 1198 } 1199 return true; 1200 } 1201 1202 abstract void add(Result r); 1203 1204 /** 1205 * @return Collected results; wait till visits complete to collect all 1206 * possible results 1207 */ 1208 List<T> getResults() { 1209 return this.results; 1210 } 1211 } 1212 1213 /** 1214 * Collects all returned. 1215 */ 1216 static class CollectAllVisitor extends CollectingVisitor<Result> { 1217 @Override 1218 void add(Result r) { 1219 this.results.add(r); 1220 } 1221 } 1222 1223 /** 1224 * A Visitor that skips offline regions and split parents 1225 */ 1226 public static abstract class DefaultVisitorBase implements Visitor { 1227 1228 public DefaultVisitorBase() { 1229 super(); 1230 } 1231 1232 public abstract boolean visitInternal(Result rowResult) throws IOException; 1233 1234 @Override 1235 public boolean visit(Result rowResult) throws IOException { 1236 RegionInfo info = getRegionInfo(rowResult); 1237 if (info == null) { 1238 return true; 1239 } 1240 1241 //skip over offline and split regions 1242 if (!(info.isOffline() || info.isSplit())) { 1243 return visitInternal(rowResult); 1244 } 1245 return true; 1246 } 1247 } 1248 1249 /** 1250 * A Visitor for a table. Provides a consistent view of the table's 1251 * hbase:meta entries during concurrent splits (see HBASE-5986 for details). This class 1252 * does not guarantee ordered traversal of meta entries, and can block until the 1253 * hbase:meta entries for daughters are available during splits. 1254 */ 1255 public static abstract class TableVisitorBase extends DefaultVisitorBase { 1256 private TableName tableName; 1257 1258 public TableVisitorBase(TableName tableName) { 1259 super(); 1260 this.tableName = tableName; 1261 } 1262 1263 @Override 1264 public final boolean visit(Result rowResult) throws IOException { 1265 RegionInfo info = getRegionInfo(rowResult); 1266 if (info == null) { 1267 return true; 1268 } 1269 if (!(info.getTable().equals(tableName))) { 1270 return false; 1271 } 1272 return super.visit(rowResult); 1273 } 1274 } 1275 1276 /** 1277 * Count regions in <code>hbase:meta</code> for passed table. 1278 * @param c Configuration object 1279 * @param tableName table name to count regions for 1280 * @return Count or regions in table <code>tableName</code> 1281 */ 1282 public static int getRegionCount(final Configuration c, final TableName tableName) 1283 throws IOException { 1284 try (Connection connection = ConnectionFactory.createConnection(c)) { 1285 return getRegionCount(connection, tableName); 1286 } 1287 } 1288 1289 /** 1290 * Count regions in <code>hbase:meta</code> for passed table. 1291 * @param connection Connection object 1292 * @param tableName table name to count regions for 1293 * @return Count or regions in table <code>tableName</code> 1294 */ 1295 public static int getRegionCount(final Connection connection, final TableName tableName) 1296 throws IOException { 1297 try (RegionLocator locator = connection.getRegionLocator(tableName)) { 1298 List<HRegionLocation> locations = locator.getAllRegionLocations(); 1299 return locations == null ? 0 : locations.size(); 1300 } 1301 } 1302 1303 //////////////////////// 1304 // Editing operations // 1305 //////////////////////// 1306 /** 1307 * Generates and returns a Put containing the region into for the catalog table 1308 */ 1309 public static Put makePutFromRegionInfo(RegionInfo regionInfo, long ts) throws IOException { 1310 Put put = new Put(regionInfo.getRegionName(), ts); 1311 addRegionInfo(put, regionInfo); 1312 return put; 1313 } 1314 1315 /** 1316 * Generates and returns a Delete containing the region info for the catalog table 1317 */ 1318 private static Delete makeDeleteFromRegionInfo(RegionInfo regionInfo, long ts) { 1319 if (regionInfo == null) { 1320 throw new IllegalArgumentException("Can't make a delete for null region"); 1321 } 1322 Delete delete = new Delete(regionInfo.getRegionName()); 1323 delete.addFamily(getCatalogFamily(), ts); 1324 return delete; 1325 } 1326 1327 /** 1328 * Adds split daughters to the Put 1329 */ 1330 private static Put addDaughtersToPut(Put put, RegionInfo splitA, RegionInfo splitB) 1331 throws IOException { 1332 if (splitA != null) { 1333 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1334 .setRow(put.getRow()) 1335 .setFamily(HConstants.CATALOG_FAMILY) 1336 .setQualifier(HConstants.SPLITA_QUALIFIER) 1337 .setTimestamp(put.getTimestamp()) 1338 .setType(Type.Put) 1339 .setValue(RegionInfo.toByteArray(splitA)) 1340 .build()); 1341 } 1342 if (splitB != null) { 1343 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1344 .setRow(put.getRow()) 1345 .setFamily(HConstants.CATALOG_FAMILY) 1346 .setQualifier(HConstants.SPLITB_QUALIFIER) 1347 .setTimestamp(put.getTimestamp()) 1348 .setType(Type.Put) 1349 .setValue(RegionInfo.toByteArray(splitB)) 1350 .build()); 1351 } 1352 return put; 1353 } 1354 1355 /** 1356 * Put the passed <code>p</code> to the <code>hbase:meta</code> table. 1357 * @param connection connection we're using 1358 * @param p Put to add to hbase:meta 1359 */ 1360 private static void putToMetaTable(Connection connection, Put p) throws IOException { 1361 try (Table table = getMetaHTable(connection)) { 1362 put(table, p); 1363 } 1364 } 1365 1366 /** 1367 * @param t Table to use 1368 * @param p put to make 1369 */ 1370 private static void put(Table t, Put p) throws IOException { 1371 debugLogMutation(p); 1372 t.put(p); 1373 } 1374 1375 /** 1376 * Put the passed <code>ps</code> to the <code>hbase:meta</code> table. 1377 * @param connection connection we're using 1378 * @param ps Put to add to hbase:meta 1379 */ 1380 public static void putsToMetaTable(final Connection connection, final List<Put> ps) 1381 throws IOException { 1382 if (ps.isEmpty()) { 1383 return; 1384 } 1385 try (Table t = getMetaHTable(connection)) { 1386 debugLogMutations(ps); 1387 // the implementation for putting a single Put is much simpler so here we do a check first. 1388 if (ps.size() == 1) { 1389 t.put(ps.get(0)); 1390 } else { 1391 t.put(ps); 1392 } 1393 } 1394 } 1395 1396 /** 1397 * Delete the passed <code>d</code> from the <code>hbase:meta</code> table. 1398 * @param connection connection we're using 1399 * @param d Delete to add to hbase:meta 1400 */ 1401 private static void deleteFromMetaTable(final Connection connection, final Delete d) 1402 throws IOException { 1403 List<Delete> dels = new ArrayList<>(1); 1404 dels.add(d); 1405 deleteFromMetaTable(connection, dels); 1406 } 1407 1408 /** 1409 * Delete the passed <code>deletes</code> from the <code>hbase:meta</code> table. 1410 * @param connection connection we're using 1411 * @param deletes Deletes to add to hbase:meta This list should support #remove. 1412 */ 1413 private static void deleteFromMetaTable(final Connection connection, final List<Delete> deletes) 1414 throws IOException { 1415 try (Table t = getMetaHTable(connection)) { 1416 debugLogMutations(deletes); 1417 t.delete(deletes); 1418 } 1419 } 1420 1421 /** 1422 * Deletes some replica columns corresponding to replicas for the passed rows 1423 * @param metaRows rows in hbase:meta 1424 * @param replicaIndexToDeleteFrom the replica ID we would start deleting from 1425 * @param numReplicasToRemove how many replicas to remove 1426 * @param connection connection we're using to access meta table 1427 */ 1428 public static void removeRegionReplicasFromMeta(Set<byte[]> metaRows, 1429 int replicaIndexToDeleteFrom, int numReplicasToRemove, Connection connection) 1430 throws IOException { 1431 int absoluteIndex = replicaIndexToDeleteFrom + numReplicasToRemove; 1432 for (byte[] row : metaRows) { 1433 long now = EnvironmentEdgeManager.currentTime(); 1434 Delete deleteReplicaLocations = new Delete(row); 1435 for (int i = replicaIndexToDeleteFrom; i < absoluteIndex; i++) { 1436 deleteReplicaLocations.addColumns(getCatalogFamily(), 1437 getServerColumn(i), now); 1438 deleteReplicaLocations.addColumns(getCatalogFamily(), 1439 getSeqNumColumn(i), now); 1440 deleteReplicaLocations.addColumns(getCatalogFamily(), 1441 getStartCodeColumn(i), now); 1442 } 1443 deleteFromMetaTable(connection, deleteReplicaLocations); 1444 } 1445 } 1446 1447 private static void addRegionStateToPut(Put put, RegionState.State state) throws IOException { 1448 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1449 .setRow(put.getRow()) 1450 .setFamily(HConstants.CATALOG_FAMILY) 1451 .setQualifier(getRegionStateColumn()) 1452 .setTimestamp(put.getTimestamp()) 1453 .setType(Cell.Type.Put) 1454 .setValue(Bytes.toBytes(state.name())) 1455 .build()); 1456 } 1457 1458 /** 1459 * Adds daughter region infos to hbase:meta row for the specified region. Note that this does not 1460 * add its daughter's as different rows, but adds information about the daughters in the same row 1461 * as the parent. Use 1462 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} 1463 * if you want to do that. 1464 * @param connection connection we're using 1465 * @param regionInfo RegionInfo of parent region 1466 * @param splitA first split daughter of the parent regionInfo 1467 * @param splitB second split daughter of the parent regionInfo 1468 * @throws IOException if problem connecting or updating meta 1469 */ 1470 public static void addSplitsToParent(Connection connection, RegionInfo regionInfo, 1471 RegionInfo splitA, RegionInfo splitB) throws IOException { 1472 try (Table meta = getMetaHTable(connection)) { 1473 Put put = makePutFromRegionInfo(regionInfo, EnvironmentEdgeManager.currentTime()); 1474 addDaughtersToPut(put, splitA, splitB); 1475 meta.put(put); 1476 debugLogMutation(put); 1477 LOG.debug("Added region {}", regionInfo.getRegionNameAsString()); 1478 } 1479 } 1480 1481 /** 1482 * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this 1483 * does not add its daughter's as different rows, but adds information about the daughters 1484 * in the same row as the parent. Use 1485 * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} 1486 * if you want to do that. 1487 * @param connection connection we're using 1488 * @param regionInfo region information 1489 * @throws IOException if problem connecting or updating meta 1490 */ 1491 @VisibleForTesting 1492 public static void addRegionToMeta(Connection connection, RegionInfo regionInfo) 1493 throws IOException { 1494 addRegionsToMeta(connection, Collections.singletonList(regionInfo), 1); 1495 } 1496 1497 /** 1498 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions 1499 * is CLOSED. 1500 * @param connection connection we're using 1501 * @param regionInfos region information list 1502 * @throws IOException if problem connecting or updating meta 1503 */ 1504 public static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1505 int regionReplication) throws IOException { 1506 addRegionsToMeta(connection, regionInfos, regionReplication, 1507 EnvironmentEdgeManager.currentTime()); 1508 } 1509 1510 /** 1511 * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions 1512 * is CLOSED. 1513 * @param connection connection we're using 1514 * @param regionInfos region information list 1515 * @param ts desired timestamp 1516 * @throws IOException if problem connecting or updating meta 1517 */ 1518 private static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos, 1519 int regionReplication, long ts) throws IOException { 1520 List<Put> puts = new ArrayList<>(); 1521 for (RegionInfo regionInfo : regionInfos) { 1522 if (RegionReplicaUtil.isDefaultReplica(regionInfo)) { 1523 Put put = makePutFromRegionInfo(regionInfo, ts); 1524 // New regions are added with initial state of CLOSED. 1525 addRegionStateToPut(put, RegionState.State.CLOSED); 1526 // Add empty locations for region replicas so that number of replicas can be cached 1527 // whenever the primary region is looked up from meta 1528 for (int i = 1; i < regionReplication; i++) { 1529 addEmptyLocation(put, i); 1530 } 1531 puts.add(put); 1532 } 1533 } 1534 putsToMetaTable(connection, puts); 1535 LOG.info("Added {} regions to meta.", puts.size()); 1536 } 1537 1538 static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException { 1539 int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more. 1540 int max = mergeRegions.size(); 1541 if (max > limit) { 1542 // Should never happen!!!!! But just in case. 1543 throw new RuntimeException("Can't merge " + max + " regions in one go; " + limit + 1544 " is upper-limit."); 1545 } 1546 int counter = 0; 1547 for (RegionInfo ri: mergeRegions) { 1548 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++); 1549 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY). 1550 setRow(put.getRow()). 1551 setFamily(HConstants.CATALOG_FAMILY). 1552 setQualifier(Bytes.toBytes(qualifier)). 1553 setTimestamp(put.getTimestamp()). 1554 setType(Type.Put). 1555 setValue(RegionInfo.toByteArray(ri)). 1556 build()); 1557 } 1558 return put; 1559 } 1560 1561 /** 1562 * Merge regions into one in an atomic operation. Deletes the merging regions in 1563 * hbase:meta and adds the merged region. 1564 * @param connection connection we're using 1565 * @param mergedRegion the merged region 1566 * @param parentSeqNum Parent regions to merge and their next open sequence id used 1567 * by serial replication. Set to -1 if not needed by this table. 1568 * @param sn the location of the region 1569 */ 1570 public static void mergeRegions(Connection connection, RegionInfo mergedRegion, 1571 Map<RegionInfo, Long> parentSeqNum, ServerName sn, int regionReplication) 1572 throws IOException { 1573 try (Table meta = getMetaHTable(connection)) { 1574 long time = HConstants.LATEST_TIMESTAMP; 1575 List<Mutation> mutations = new ArrayList<>(); 1576 List<RegionInfo> replicationParents = new ArrayList<>(); 1577 for (Map.Entry<RegionInfo, Long> e: parentSeqNum.entrySet()) { 1578 RegionInfo ri = e.getKey(); 1579 long seqNum = e.getValue(); 1580 // Deletes for merging regions 1581 mutations.add(makeDeleteFromRegionInfo(ri, time)); 1582 if (seqNum > 0) { 1583 mutations.add(makePutForReplicationBarrier(ri, seqNum, time)); 1584 replicationParents.add(ri); 1585 } 1586 } 1587 // Put for parent 1588 Put putOfMerged = makePutFromRegionInfo(mergedRegion, time); 1589 putOfMerged = addMergeRegions(putOfMerged, parentSeqNum.keySet()); 1590 // Set initial state to CLOSED. 1591 // NOTE: If initial state is not set to CLOSED then merged region gets added with the 1592 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1593 // master tries to assign this offline region. This is followed by re-assignments of the 1594 // merged region from resumed {@link MergeTableRegionsProcedure} 1595 addRegionStateToPut(putOfMerged, RegionState.State.CLOSED); 1596 mutations.add(putOfMerged); 1597 // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null 1598 // if crash after merge happened but before we got to here.. means in-memory 1599 // locations of offlined merged, now-closed, regions is lost. Should be ok. We 1600 // assign the merged region later. 1601 if (sn != null) { 1602 addLocation(putOfMerged, sn, 1, mergedRegion.getReplicaId()); 1603 } 1604 1605 // Add empty locations for region replicas of the merged region so that number of replicas 1606 // can be cached whenever the primary region is looked up from meta 1607 for (int i = 1; i < regionReplication; i++) { 1608 addEmptyLocation(putOfMerged, i); 1609 } 1610 // add parent reference for serial replication 1611 if (!replicationParents.isEmpty()) { 1612 addReplicationParent(putOfMerged, replicationParents); 1613 } 1614 byte[] tableRow = Bytes.toBytes(mergedRegion.getRegionNameAsString() + HConstants.DELIMITER); 1615 multiMutate(connection, meta, tableRow, mutations); 1616 } 1617 } 1618 1619 /** 1620 * Splits the region into two in an atomic operation. Offlines the parent region with the 1621 * information that it is split into two, and also adds the daughter regions. Does not add the 1622 * location information to the daughter regions since they are not open yet. 1623 * @param connection connection we're using 1624 * @param parent the parent region which is split 1625 * @param parentOpenSeqNum the next open sequence id for parent region, used by serial 1626 * replication. -1 if not necessary. 1627 * @param splitA Split daughter region A 1628 * @param splitB Split daughter region B 1629 * @param sn the location of the region 1630 */ 1631 public static void splitRegion(Connection connection, RegionInfo parent, long parentOpenSeqNum, 1632 RegionInfo splitA, RegionInfo splitB, ServerName sn, int regionReplication) 1633 throws IOException { 1634 try (Table meta = getMetaHTable(connection)) { 1635 long time = EnvironmentEdgeManager.currentTime(); 1636 // Put for parent 1637 Put putParent = makePutFromRegionInfo(RegionInfoBuilder.newBuilder(parent) 1638 .setOffline(true) 1639 .setSplit(true).build(), time); 1640 addDaughtersToPut(putParent, splitA, splitB); 1641 1642 // Puts for daughters 1643 Put putA = makePutFromRegionInfo(splitA, time); 1644 Put putB = makePutFromRegionInfo(splitB, time); 1645 if (parentOpenSeqNum > 0) { 1646 addReplicationBarrier(putParent, parentOpenSeqNum); 1647 addReplicationParent(putA, Collections.singletonList(parent)); 1648 addReplicationParent(putB, Collections.singletonList(parent)); 1649 } 1650 // Set initial state to CLOSED 1651 // NOTE: If initial state is not set to CLOSED then daughter regions get added with the 1652 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 1653 // master tries to assign these offline regions. This is followed by re-assignments of the 1654 // daughter regions from resumed {@link SplitTableRegionProcedure} 1655 addRegionStateToPut(putA, RegionState.State.CLOSED); 1656 addRegionStateToPut(putB, RegionState.State.CLOSED); 1657 1658 addSequenceNum(putA, 1, splitA.getReplicaId()); // new regions, openSeqNum = 1 is fine. 1659 addSequenceNum(putB, 1, splitB.getReplicaId()); 1660 1661 // Add empty locations for region replicas of daughters so that number of replicas can be 1662 // cached whenever the primary region is looked up from meta 1663 for (int i = 1; i < regionReplication; i++) { 1664 addEmptyLocation(putA, i); 1665 addEmptyLocation(putB, i); 1666 } 1667 1668 byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER); 1669 multiMutate(connection, meta, tableRow, putParent, putA, putB); 1670 } 1671 } 1672 1673 /** 1674 * Update state of the table in meta. 1675 * @param connection what we use for update 1676 * @param state new state 1677 */ 1678 private static void updateTableState(Connection connection, TableState state) throws IOException { 1679 Put put = makePutFromTableState(state, EnvironmentEdgeManager.currentTime()); 1680 putToMetaTable(connection, put); 1681 LOG.info("Updated {} in hbase:meta", state); 1682 } 1683 1684 /** 1685 * Construct PUT for given state 1686 * @param state new state 1687 */ 1688 public static Put makePutFromTableState(TableState state, long ts) { 1689 Put put = new Put(state.getTableName().getName(), ts); 1690 put.addColumn(getTableFamily(), getTableStateColumn(), state.convert().toByteArray()); 1691 return put; 1692 } 1693 1694 /** 1695 * Remove state for table from meta 1696 * @param connection to use for deletion 1697 * @param table to delete state for 1698 */ 1699 public static void deleteTableState(Connection connection, TableName table) 1700 throws IOException { 1701 long time = EnvironmentEdgeManager.currentTime(); 1702 Delete delete = new Delete(table.getName()); 1703 delete.addColumns(getTableFamily(), getTableStateColumn(), time); 1704 deleteFromMetaTable(connection, delete); 1705 LOG.info("Deleted table " + table + " state from META"); 1706 } 1707 1708 private static void multiMutate(Connection connection, Table table, byte[] row, 1709 Mutation... mutations) throws IOException { 1710 multiMutate(connection, table, row, Arrays.asList(mutations)); 1711 } 1712 1713 /** 1714 * Performs an atomic multi-mutate operation against the given table. 1715 */ 1716 private static void multiMutate(Connection connection, final Table table, byte[] row, 1717 final List<Mutation> mutations) 1718 throws IOException { 1719 debugLogMutations(mutations); 1720 // TODO: Need rollback!!!! 1721 // TODO: Need Retry!!! 1722 // TODO: What for a timeout? Default write timeout? GET FROM HTABLE? 1723 // TODO: Review when we come through with ProcedureV2. 1724 RegionServerCallable<MutateRowsResponse, 1725 MultiRowMutationProtos.MultiRowMutationService.BlockingInterface> callable = 1726 new RegionServerCallable<MutateRowsResponse, 1727 MultiRowMutationProtos.MultiRowMutationService.BlockingInterface>( 1728 connection, table.getName(), row, null/*RpcController not used in this CPEP!*/) { 1729 @Override 1730 protected MutateRowsResponse rpcCall() throws Exception { 1731 final MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder(); 1732 for (Mutation mutation : mutations) { 1733 if (mutation instanceof Put) { 1734 builder.addMutationRequest(ProtobufUtil.toMutation( 1735 ClientProtos.MutationProto.MutationType.PUT, mutation)); 1736 } else if (mutation instanceof Delete) { 1737 builder.addMutationRequest(ProtobufUtil.toMutation( 1738 ClientProtos.MutationProto.MutationType.DELETE, mutation)); 1739 } else { 1740 throw new DoNotRetryIOException("multi in MetaEditor doesn't support " 1741 + mutation.getClass().getName()); 1742 } 1743 } 1744 // The call to #prepare that ran before this invocation will have populated HRegionLocation. 1745 HRegionLocation hrl = getLocation(); 1746 RegionSpecifier region = ProtobufUtil.buildRegionSpecifier( 1747 RegionSpecifierType.REGION_NAME, hrl.getRegion().getRegionName()); 1748 builder.setRegion(region); 1749 // The rpcController here is awkward. The Coprocessor Endpoint wants an instance of a 1750 // com.google.protobuf but we are going over an rpc that is all shaded protobuf so it 1751 // wants a org.apache.h.h.shaded.com.google.protobuf.RpcController. Set up a factory 1752 // that makes com.google.protobuf.RpcController and then copy into it configs. 1753 return getStub().mutateRows(null, builder.build()); 1754 } 1755 1756 @Override 1757 // Called on the end of the super.prepare call. Set the stub. 1758 protected void setStubByServiceName(ServerName serviceName/*Ignored*/) throws IOException { 1759 CoprocessorRpcChannel channel = table.coprocessorService(getRow()); 1760 setStub(MultiRowMutationProtos.MultiRowMutationService.newBlockingStub(channel)); 1761 } 1762 }; 1763 int writeTimeout = connection.getConfiguration().getInt(HConstants.HBASE_RPC_WRITE_TIMEOUT_KEY, 1764 connection.getConfiguration().getInt(HConstants.HBASE_RPC_TIMEOUT_KEY, 1765 HConstants.DEFAULT_HBASE_RPC_TIMEOUT)); 1766 // The region location should be cached in connection. Call prepare so this callable picks 1767 // up the region location (see super.prepare method). 1768 callable.prepare(false); 1769 callable.call(writeTimeout); 1770 } 1771 1772 /** 1773 * Updates the location of the specified region in hbase:meta to be the specified server hostname 1774 * and startcode. 1775 * <p> 1776 * Uses passed catalog tracker to get a connection to the server hosting hbase:meta and makes 1777 * edits to that region. 1778 * @param connection connection we're using 1779 * @param regionInfo region to update location of 1780 * @param openSeqNum the latest sequence number obtained when the region was open 1781 * @param sn Server name 1782 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1783 */ 1784 @VisibleForTesting 1785 public static void updateRegionLocation(Connection connection, RegionInfo regionInfo, 1786 ServerName sn, long openSeqNum, long masterSystemTime) throws IOException { 1787 updateLocation(connection, regionInfo, sn, openSeqNum, masterSystemTime); 1788 } 1789 1790 /** 1791 * Updates the location of the specified region to be the specified server. 1792 * <p> 1793 * Connects to the specified server which should be hosting the specified catalog region name to 1794 * perform the edit. 1795 * @param connection connection we're using 1796 * @param regionInfo region to update location of 1797 * @param sn Server name 1798 * @param openSeqNum the latest sequence number obtained when the region was open 1799 * @param masterSystemTime wall clock time from master if passed in the open region RPC 1800 * @throws IOException In particular could throw {@link java.net.ConnectException} if the server 1801 * is down on other end. 1802 */ 1803 private static void updateLocation(Connection connection, RegionInfo regionInfo, ServerName sn, 1804 long openSeqNum, long masterSystemTime) throws IOException { 1805 // region replicas are kept in the primary region's row 1806 Put put = new Put(getMetaKeyForRegion(regionInfo), masterSystemTime); 1807 addRegionInfo(put, regionInfo); 1808 addLocation(put, sn, openSeqNum, regionInfo.getReplicaId()); 1809 putToMetaTable(connection, put); 1810 LOG.info("Updated row {} with server=", regionInfo.getRegionNameAsString(), sn); 1811 } 1812 1813 /** 1814 * Deletes the specified region from META. 1815 * @param connection connection we're using 1816 * @param regionInfo region to be deleted from META 1817 */ 1818 public static void deleteRegionInfo(Connection connection, RegionInfo regionInfo) 1819 throws IOException { 1820 Delete delete = new Delete(regionInfo.getRegionName()); 1821 delete.addFamily(getCatalogFamily(), HConstants.LATEST_TIMESTAMP); 1822 deleteFromMetaTable(connection, delete); 1823 LOG.info("Deleted " + regionInfo.getRegionNameAsString()); 1824 } 1825 1826 /** 1827 * Deletes the specified regions from META. 1828 * @param connection connection we're using 1829 * @param regionsInfo list of regions to be deleted from META 1830 */ 1831 public static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo) 1832 throws IOException { 1833 deleteRegionInfos(connection, regionsInfo, EnvironmentEdgeManager.currentTime()); 1834 } 1835 1836 /** 1837 * Deletes the specified regions from META. 1838 * @param connection connection we're using 1839 * @param regionsInfo list of regions to be deleted from META 1840 */ 1841 private static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo, 1842 long ts) 1843 throws IOException { 1844 List<Delete> deletes = new ArrayList<>(regionsInfo.size()); 1845 for (RegionInfo hri : regionsInfo) { 1846 Delete e = new Delete(hri.getRegionName()); 1847 e.addFamily(getCatalogFamily(), ts); 1848 deletes.add(e); 1849 } 1850 deleteFromMetaTable(connection, deletes); 1851 LOG.info("Deleted {} regions from META", regionsInfo.size()); 1852 LOG.debug("Deleted regions: {}", regionsInfo); 1853 } 1854 1855 /** 1856 * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and 1857 * adds new ones. Regions added back have state CLOSED. 1858 * @param connection connection we're using 1859 * @param regionInfos list of regions to be added to META 1860 */ 1861 public static void overwriteRegions(Connection connection, List<RegionInfo> regionInfos, 1862 int regionReplication) throws IOException { 1863 // use master time for delete marker and the Put 1864 long now = EnvironmentEdgeManager.currentTime(); 1865 deleteRegionInfos(connection, regionInfos, now); 1866 // Why sleep? This is the easiest way to ensure that the previous deletes does not 1867 // eclipse the following puts, that might happen in the same ts from the server. 1868 // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed, 1869 // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep. 1870 // 1871 // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed 1872 addRegionsToMeta(connection, regionInfos, regionReplication, now + 1); 1873 LOG.info("Overwritten " + regionInfos.size() + " regions to Meta"); 1874 LOG.debug("Overwritten regions: {} ", regionInfos); 1875 } 1876 1877 /** 1878 * Deletes merge qualifiers for the specified merge region. 1879 * @param connection connection we're using 1880 * @param mergeRegion the merged region 1881 */ 1882 public static void deleteMergeQualifiers(Connection connection, final RegionInfo mergeRegion) 1883 throws IOException { 1884 Delete delete = new Delete(mergeRegion.getRegionName()); 1885 // NOTE: We are doing a new hbase:meta read here. 1886 Cell[] cells = getRegionResult(connection, mergeRegion.getRegionName()).rawCells(); 1887 if (cells == null || cells.length == 0) { 1888 return; 1889 } 1890 List<byte[]> qualifiers = new ArrayList<>(); 1891 for (Cell cell : cells) { 1892 if (!isMergeQualifierPrefix(cell)) { 1893 continue; 1894 } 1895 byte[] qualifier = CellUtil.cloneQualifier(cell); 1896 qualifiers.add(qualifier); 1897 delete.addColumns(getCatalogFamily(), qualifier, HConstants.LATEST_TIMESTAMP); 1898 } 1899 deleteFromMetaTable(connection, delete); 1900 LOG.info("Deleted merge references in " + mergeRegion.getRegionNameAsString() + 1901 ", deleted qualifiers " + qualifiers.stream().map(Bytes::toStringBinary). 1902 collect(Collectors.joining(", "))); 1903 } 1904 1905 public static Put addRegionInfo(final Put p, final RegionInfo hri) 1906 throws IOException { 1907 p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 1908 .setRow(p.getRow()) 1909 .setFamily(getCatalogFamily()) 1910 .setQualifier(HConstants.REGIONINFO_QUALIFIER) 1911 .setTimestamp(p.getTimestamp()) 1912 .setType(Type.Put) 1913 // Serialize the Default Replica HRI otherwise scan of hbase:meta 1914 // shows an info:regioninfo value with encoded name and region 1915 // name that differs from that of the hbase;meta row. 1916 .setValue(RegionInfo.toByteArray(RegionReplicaUtil.getRegionInfoForDefaultReplica(hri))) 1917 .build()); 1918 return p; 1919 } 1920 1921 public static Put addLocation(Put p, ServerName sn, long openSeqNum, int replicaId) 1922 throws IOException { 1923 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 1924 return p.add(builder.clear() 1925 .setRow(p.getRow()) 1926 .setFamily(getCatalogFamily()) 1927 .setQualifier(getServerColumn(replicaId)) 1928 .setTimestamp(p.getTimestamp()) 1929 .setType(Cell.Type.Put) 1930 .setValue(Bytes.toBytes(sn.getAddress().toString())) 1931 .build()) 1932 .add(builder.clear() 1933 .setRow(p.getRow()) 1934 .setFamily(getCatalogFamily()) 1935 .setQualifier(getStartCodeColumn(replicaId)) 1936 .setTimestamp(p.getTimestamp()) 1937 .setType(Cell.Type.Put) 1938 .setValue(Bytes.toBytes(sn.getStartcode())) 1939 .build()) 1940 .add(builder.clear() 1941 .setRow(p.getRow()) 1942 .setFamily(getCatalogFamily()) 1943 .setQualifier(getSeqNumColumn(replicaId)) 1944 .setTimestamp(p.getTimestamp()) 1945 .setType(Type.Put) 1946 .setValue(Bytes.toBytes(openSeqNum)) 1947 .build()); 1948 } 1949 1950 private static void writeRegionName(ByteArrayOutputStream out, byte[] regionName) { 1951 for (byte b : regionName) { 1952 if (b == ESCAPE_BYTE) { 1953 out.write(ESCAPE_BYTE); 1954 } 1955 out.write(b); 1956 } 1957 } 1958 1959 @VisibleForTesting 1960 public static byte[] getParentsBytes(List<RegionInfo> parents) { 1961 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1962 Iterator<RegionInfo> iter = parents.iterator(); 1963 writeRegionName(bos, iter.next().getRegionName()); 1964 while (iter.hasNext()) { 1965 bos.write(ESCAPE_BYTE); 1966 bos.write(SEPARATED_BYTE); 1967 writeRegionName(bos, iter.next().getRegionName()); 1968 } 1969 return bos.toByteArray(); 1970 } 1971 1972 private static List<byte[]> parseParentsBytes(byte[] bytes) { 1973 List<byte[]> parents = new ArrayList<>(); 1974 ByteArrayOutputStream bos = new ByteArrayOutputStream(); 1975 for (int i = 0; i < bytes.length; i++) { 1976 if (bytes[i] == ESCAPE_BYTE) { 1977 i++; 1978 if (bytes[i] == SEPARATED_BYTE) { 1979 parents.add(bos.toByteArray()); 1980 bos.reset(); 1981 continue; 1982 } 1983 // fall through to append the byte 1984 } 1985 bos.write(bytes[i]); 1986 } 1987 if (bos.size() > 0) { 1988 parents.add(bos.toByteArray()); 1989 } 1990 return parents; 1991 } 1992 1993 private static void addReplicationParent(Put put, List<RegionInfo> parents) throws IOException { 1994 byte[] value = getParentsBytes(parents); 1995 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 1996 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(REPLICATION_PARENT_QUALIFIER) 1997 .setTimestamp(put.getTimestamp()).setType(Type.Put).setValue(value).build()); 1998 } 1999 2000 public static Put makePutForReplicationBarrier(RegionInfo regionInfo, long openSeqNum, long ts) 2001 throws IOException { 2002 Put put = new Put(regionInfo.getRegionName(), ts); 2003 addReplicationBarrier(put, openSeqNum); 2004 return put; 2005 } 2006 2007 public static void addReplicationBarrier(Put put, long openSeqNum) throws IOException { 2008 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 2009 .setRow(put.getRow()) 2010 .setFamily(HConstants.REPLICATION_BARRIER_FAMILY) 2011 .setQualifier(HConstants.SEQNUM_QUALIFIER) 2012 .setTimestamp(put.getTimestamp()) 2013 .setType(Type.Put) 2014 .setValue(Bytes.toBytes(openSeqNum)) 2015 .build()); 2016 } 2017 2018 private static Put addEmptyLocation(Put p, int replicaId) throws IOException { 2019 CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); 2020 return p.add(builder.clear() 2021 .setRow(p.getRow()) 2022 .setFamily(getCatalogFamily()) 2023 .setQualifier(getServerColumn(replicaId)) 2024 .setTimestamp(p.getTimestamp()) 2025 .setType(Type.Put) 2026 .build()) 2027 .add(builder.clear() 2028 .setRow(p.getRow()) 2029 .setFamily(getCatalogFamily()) 2030 .setQualifier(getStartCodeColumn(replicaId)) 2031 .setTimestamp(p.getTimestamp()) 2032 .setType(Cell.Type.Put) 2033 .build()) 2034 .add(builder.clear() 2035 .setRow(p.getRow()) 2036 .setFamily(getCatalogFamily()) 2037 .setQualifier(getSeqNumColumn(replicaId)) 2038 .setTimestamp(p.getTimestamp()) 2039 .setType(Cell.Type.Put) 2040 .build()); 2041 } 2042 2043 public static final class ReplicationBarrierResult { 2044 private final long[] barriers; 2045 private final RegionState.State state; 2046 private final List<byte[]> parentRegionNames; 2047 2048 ReplicationBarrierResult(long[] barriers, State state, List<byte[]> parentRegionNames) { 2049 this.barriers = barriers; 2050 this.state = state; 2051 this.parentRegionNames = parentRegionNames; 2052 } 2053 2054 public long[] getBarriers() { 2055 return barriers; 2056 } 2057 2058 public RegionState.State getState() { 2059 return state; 2060 } 2061 2062 public List<byte[]> getParentRegionNames() { 2063 return parentRegionNames; 2064 } 2065 2066 @Override 2067 public String toString() { 2068 return "ReplicationBarrierResult [barriers=" + Arrays.toString(barriers) + ", state=" + 2069 state + ", parentRegionNames=" + 2070 parentRegionNames.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", ")) + 2071 "]"; 2072 } 2073 } 2074 2075 private static long getReplicationBarrier(Cell c) { 2076 return Bytes.toLong(c.getValueArray(), c.getValueOffset(), c.getValueLength()); 2077 } 2078 2079 public static long[] getReplicationBarriers(Result result) { 2080 return result.getColumnCells(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2081 .stream().mapToLong(MetaTableAccessor::getReplicationBarrier).sorted().distinct().toArray(); 2082 } 2083 2084 private static ReplicationBarrierResult getReplicationBarrierResult(Result result) { 2085 long[] barriers = getReplicationBarriers(result); 2086 byte[] stateBytes = result.getValue(getCatalogFamily(), getRegionStateColumn()); 2087 RegionState.State state = 2088 stateBytes != null ? RegionState.State.valueOf(Bytes.toString(stateBytes)) : null; 2089 byte[] parentRegionsBytes = 2090 result.getValue(HConstants.REPLICATION_BARRIER_FAMILY, REPLICATION_PARENT_QUALIFIER); 2091 List<byte[]> parentRegionNames = 2092 parentRegionsBytes != null ? parseParentsBytes(parentRegionsBytes) : Collections.emptyList(); 2093 return new ReplicationBarrierResult(barriers, state, parentRegionNames); 2094 } 2095 2096 public static ReplicationBarrierResult getReplicationBarrierResult(Connection conn, 2097 TableName tableName, byte[] row, byte[] encodedRegionName) throws IOException { 2098 byte[] metaStartKey = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false); 2099 byte[] metaStopKey = 2100 RegionInfo.createRegionName(tableName, HConstants.EMPTY_START_ROW, "", false); 2101 Scan scan = new Scan().withStartRow(metaStartKey).withStopRow(metaStopKey) 2102 .addColumn(getCatalogFamily(), getRegionStateColumn()) 2103 .addFamily(HConstants.REPLICATION_BARRIER_FAMILY).readAllVersions().setReversed(true) 2104 .setCaching(10); 2105 try (Table table = getMetaHTable(conn); ResultScanner scanner = table.getScanner(scan)) { 2106 for (Result result;;) { 2107 result = scanner.next(); 2108 if (result == null) { 2109 return new ReplicationBarrierResult(new long[0], null, Collections.emptyList()); 2110 } 2111 byte[] regionName = result.getRow(); 2112 // TODO: we may look up a region which has already been split or merged so we need to check 2113 // whether the encoded name matches. Need to find a way to quit earlier when there is no 2114 // record for the given region, for now it will scan to the end of the table. 2115 if (!Bytes.equals(encodedRegionName, 2116 Bytes.toBytes(RegionInfo.encodeRegionName(regionName)))) { 2117 continue; 2118 } 2119 return getReplicationBarrierResult(result); 2120 } 2121 } 2122 } 2123 2124 public static long[] getReplicationBarrier(Connection conn, byte[] regionName) 2125 throws IOException { 2126 try (Table table = getMetaHTable(conn)) { 2127 Result result = table.get(new Get(regionName) 2128 .addColumn(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER) 2129 .readAllVersions()); 2130 return getReplicationBarriers(result); 2131 } 2132 } 2133 2134 public static List<Pair<String, Long>> getTableEncodedRegionNameAndLastBarrier(Connection conn, 2135 TableName tableName) throws IOException { 2136 List<Pair<String, Long>> list = new ArrayList<>(); 2137 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2138 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, r -> { 2139 byte[] value = 2140 r.getValue(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER); 2141 if (value == null) { 2142 return true; 2143 } 2144 long lastBarrier = Bytes.toLong(value); 2145 String encodedRegionName = RegionInfo.encodeRegionName(r.getRow()); 2146 list.add(Pair.newPair(encodedRegionName, lastBarrier)); 2147 return true; 2148 }); 2149 return list; 2150 } 2151 2152 public static List<String> getTableEncodedRegionNamesForSerialReplication(Connection conn, 2153 TableName tableName) throws IOException { 2154 List<String> list = new ArrayList<>(); 2155 scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION), 2156 getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, 2157 new FirstKeyOnlyFilter(), Integer.MAX_VALUE, r -> { 2158 list.add(RegionInfo.encodeRegionName(r.getRow())); 2159 return true; 2160 }); 2161 return list; 2162 } 2163 2164 private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException { 2165 if (!METALOG.isDebugEnabled()) { 2166 return; 2167 } 2168 // Logging each mutation in separate line makes it easier to see diff between them visually 2169 // because of common starting indentation. 2170 for (Mutation mutation : mutations) { 2171 debugLogMutation(mutation); 2172 } 2173 } 2174 2175 private static void debugLogMutation(Mutation p) throws IOException { 2176 METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON()); 2177 } 2178 2179 private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException { 2180 return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY) 2181 .setRow(p.getRow()) 2182 .setFamily(HConstants.CATALOG_FAMILY) 2183 .setQualifier(getSeqNumColumn(replicaId)) 2184 .setTimestamp(p.getTimestamp()) 2185 .setType(Type.Put) 2186 .setValue(Bytes.toBytes(openSeqNum)) 2187 .build()); 2188 } 2189}