001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.Collections; 025import java.util.List; 026import java.util.concurrent.CompletableFuture; 027import java.util.stream.Collectors; 028import org.apache.hadoop.hbase.CatalogFamilyFormat; 029import org.apache.hadoop.hbase.Cell; 030import org.apache.hadoop.hbase.Cell.Type; 031import org.apache.hadoop.hbase.CellBuilderFactory; 032import org.apache.hadoop.hbase.CellBuilderType; 033import org.apache.hadoop.hbase.CellUtil; 034import org.apache.hadoop.hbase.ClientMetaTableAccessor; 035import org.apache.hadoop.hbase.DoNotRetryIOException; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.HRegionLocation; 038import org.apache.hadoop.hbase.MetaTableAccessor; 039import org.apache.hadoop.hbase.RegionLocations; 040import org.apache.hadoop.hbase.ServerName; 041import org.apache.hadoop.hbase.TableName; 042import org.apache.hadoop.hbase.client.AsyncTable; 043import org.apache.hadoop.hbase.client.Delete; 044import org.apache.hadoop.hbase.client.Get; 045import org.apache.hadoop.hbase.client.Mutation; 046import org.apache.hadoop.hbase.client.Put; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.RegionReplicaUtil; 050import org.apache.hadoop.hbase.client.Result; 051import org.apache.hadoop.hbase.client.ResultScanner; 052import org.apache.hadoop.hbase.client.Scan; 053import org.apache.hadoop.hbase.client.Table; 054import org.apache.hadoop.hbase.client.TableDescriptor; 055import org.apache.hadoop.hbase.master.MasterFileSystem; 056import org.apache.hadoop.hbase.master.MasterServices; 057import org.apache.hadoop.hbase.master.RegionState; 058import org.apache.hadoop.hbase.master.RegionState.State; 059import org.apache.hadoop.hbase.master.region.MasterRegion; 060import org.apache.hadoop.hbase.procedure2.Procedure; 061import org.apache.hadoop.hbase.procedure2.util.StringUtils; 062import org.apache.hadoop.hbase.replication.ReplicationBarrierFamilyFormat; 063import org.apache.hadoop.hbase.util.Bytes; 064import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 065import org.apache.hadoop.hbase.util.FutureUtils; 066import org.apache.hadoop.hbase.wal.WALSplitUtil; 067import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 068import org.apache.yetus.audience.InterfaceAudience; 069import org.apache.zookeeper.KeeperException; 070import org.slf4j.Logger; 071import org.slf4j.LoggerFactory; 072 073import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 074 075import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 076import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos; 077import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MultiRowMutationService; 078import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest; 079import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse; 080 081/** 082 * Store Region State to hbase:meta table. 083 */ 084@InterfaceAudience.Private 085public class RegionStateStore { 086 private static final Logger LOG = LoggerFactory.getLogger(RegionStateStore.class); 087 private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META"); 088 089 /** The delimiter for meta columns for replicaIds > 0 */ 090 protected static final char META_REPLICA_ID_DELIMITER = '_'; 091 092 private final MasterServices master; 093 094 private final MasterRegion masterRegion; 095 096 public RegionStateStore(MasterServices master, MasterRegion masterRegion) { 097 this.master = master; 098 this.masterRegion = masterRegion; 099 } 100 101 @FunctionalInterface 102 public interface RegionStateVisitor { 103 void visitRegionState(Result result, RegionInfo regionInfo, State state, 104 ServerName regionLocation, ServerName lastHost, long openSeqNum); 105 } 106 107 public void visitMeta(final RegionStateVisitor visitor) throws IOException { 108 MetaTableAccessor.fullScanRegions(master.getConnection(), 109 new ClientMetaTableAccessor.Visitor() { 110 final boolean isDebugEnabled = LOG.isDebugEnabled(); 111 112 @Override 113 public boolean visit(final Result r) throws IOException { 114 if (r != null && !r.isEmpty()) { 115 long st = 0; 116 if (LOG.isTraceEnabled()) { 117 st = EnvironmentEdgeManager.currentTime(); 118 } 119 visitMetaEntry(visitor, r); 120 if (LOG.isTraceEnabled()) { 121 long et = EnvironmentEdgeManager.currentTime(); 122 LOG.trace("[T] LOAD META PERF " + StringUtils.humanTimeDiff(et - st)); 123 } 124 } else if (isDebugEnabled) { 125 LOG.debug("NULL result from meta - ignoring but this is strange."); 126 } 127 return true; 128 } 129 }); 130 } 131 132 /** 133 * Queries META table for the passed region encoded name, delegating action upon results to the 134 * {@code RegionStateVisitor} passed as second parameter. 135 * @param regionEncodedName encoded name for the Region we want to query META for. 136 * @param visitor The {@code RegionStateVisitor} instance to react over the query 137 * results. 138 * @throws IOException If some error occurs while querying META or parsing results. 139 */ 140 public void visitMetaForRegion(final String regionEncodedName, final RegionStateVisitor visitor) 141 throws IOException { 142 Result result = 143 MetaTableAccessor.scanByRegionEncodedName(master.getConnection(), regionEncodedName); 144 if (result != null) { 145 visitMetaEntry(visitor, result); 146 } 147 } 148 149 public static void visitMetaEntry(final RegionStateVisitor visitor, final Result result) 150 throws IOException { 151 final RegionLocations rl = CatalogFamilyFormat.getRegionLocations(result); 152 if (rl == null) return; 153 154 final HRegionLocation[] locations = rl.getRegionLocations(); 155 if (locations == null) return; 156 157 for (int i = 0; i < locations.length; ++i) { 158 final HRegionLocation hrl = locations[i]; 159 if (hrl == null) continue; 160 161 final RegionInfo regionInfo = hrl.getRegion(); 162 if (regionInfo == null) continue; 163 164 final int replicaId = regionInfo.getReplicaId(); 165 final State state = getRegionState(result, regionInfo); 166 167 final ServerName lastHost = hrl.getServerName(); 168 ServerName regionLocation = MetaTableAccessor.getTargetServerName(result, replicaId); 169 final long openSeqNum = hrl.getSeqNum(); 170 171 LOG.debug( 172 "Load hbase:meta entry region={}, regionState={}, lastHost={}, " 173 + "regionLocation={}, openSeqNum={}", 174 regionInfo.getEncodedName(), state, lastHost, regionLocation, openSeqNum); 175 visitor.visitRegionState(result, regionInfo, state, regionLocation, lastHost, openSeqNum); 176 } 177 } 178 179 private Put generateUpdateRegionLocationPut(RegionStateNode regionStateNode) throws IOException { 180 long time = EnvironmentEdgeManager.currentTime(); 181 long openSeqNum = regionStateNode.getState() == State.OPEN 182 ? regionStateNode.getOpenSeqNum() 183 : HConstants.NO_SEQNUM; 184 RegionInfo regionInfo = regionStateNode.getRegionInfo(); 185 State state = regionStateNode.getState(); 186 ServerName regionLocation = regionStateNode.getRegionLocation(); 187 TransitRegionStateProcedure rit = regionStateNode.getProcedure(); 188 long pid = rit != null ? rit.getProcId() : Procedure.NO_PROC_ID; 189 final int replicaId = regionInfo.getReplicaId(); 190 final Put put = new Put(CatalogFamilyFormat.getMetaKeyForRegion(regionInfo), time); 191 MetaTableAccessor.addRegionInfo(put, regionInfo); 192 final StringBuilder info = 193 new StringBuilder("pid=").append(pid).append(" updating hbase:meta row=") 194 .append(regionInfo.getEncodedName()).append(", regionState=").append(state); 195 if (openSeqNum >= 0) { 196 Preconditions.checkArgument(state == State.OPEN && regionLocation != null, 197 "Open region should be on a server"); 198 MetaTableAccessor.addLocation(put, regionLocation, openSeqNum, replicaId); 199 // only update replication barrier for default replica 200 if ( 201 regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID 202 && hasGlobalReplicationScope(regionInfo.getTable()) 203 ) { 204 ReplicationBarrierFamilyFormat.addReplicationBarrier(put, openSeqNum); 205 info.append(", repBarrier=").append(openSeqNum); 206 } 207 info.append(", openSeqNum=").append(openSeqNum); 208 info.append(", regionLocation=").append(regionLocation); 209 } else if (regionLocation != null) { 210 // Ideally, if no regionLocation, write null to the hbase:meta but this will confuse clients 211 // currently; they want a server to hit. TODO: Make clients wait if no location. 212 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 213 .setFamily(HConstants.CATALOG_FAMILY) 214 .setQualifier(CatalogFamilyFormat.getServerNameColumn(replicaId)) 215 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put) 216 .setValue(Bytes.toBytes(regionLocation.getServerName())).build()); 217 info.append(", regionLocation=").append(regionLocation); 218 } 219 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 220 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getStateColumn(replicaId)) 221 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(state.name())) 222 .build()); 223 LOG.info(info.toString()); 224 return put; 225 } 226 227 CompletableFuture<Void> updateRegionLocation(RegionStateNode regionStateNode) { 228 Put put; 229 try { 230 put = generateUpdateRegionLocationPut(regionStateNode); 231 } catch (IOException e) { 232 return FutureUtils.failedFuture(e); 233 } 234 RegionInfo regionInfo = regionStateNode.getRegionInfo(); 235 State state = regionStateNode.getState(); 236 CompletableFuture<Void> future = updateRegionLocation(regionInfo, state, put); 237 if (regionInfo.isMetaRegion() && regionInfo.isFirst()) { 238 // mirror the meta location to zookeeper 239 // we store meta location in master local region which means the above method is 240 // synchronous(we just wrap the result with a CompletableFuture to make it look like 241 // asynchronous), so it is OK to just call this method directly here 242 assert future.isDone(); 243 if (!future.isCompletedExceptionally()) { 244 try { 245 mirrorMetaLocation(regionInfo, regionStateNode.getRegionLocation(), state); 246 } catch (IOException e) { 247 return FutureUtils.failedFuture(e); 248 } 249 } 250 } 251 return future; 252 } 253 254 private void mirrorMetaLocation(RegionInfo regionInfo, ServerName serverName, State state) 255 throws IOException { 256 try { 257 MetaTableLocator.setMetaLocation(master.getZooKeeper(), serverName, regionInfo.getReplicaId(), 258 state); 259 } catch (KeeperException e) { 260 throw new IOException(e); 261 } 262 } 263 264 private void removeMirrorMetaLocation(int oldReplicaCount, int newReplicaCount) 265 throws IOException { 266 try { 267 for (int i = newReplicaCount; i < oldReplicaCount; i++) { 268 MetaTableLocator.deleteMetaLocation(master.getZooKeeper(), i); 269 } 270 } catch (KeeperException e) { 271 throw new IOException(e); 272 } 273 } 274 275 private CompletableFuture<Void> updateRegionLocation(RegionInfo regionInfo, State state, 276 Put put) { 277 CompletableFuture<Void> future; 278 if (regionInfo.isMetaRegion()) { 279 try { 280 masterRegion.update(r -> r.put(put)); 281 future = CompletableFuture.completedFuture(null); 282 } catch (Exception e) { 283 future = FutureUtils.failedFuture(e); 284 } 285 } else { 286 AsyncTable<?> table = master.getAsyncConnection().getTable(TableName.META_TABLE_NAME); 287 future = table.put(put); 288 } 289 FutureUtils.addListener(future, (r, e) -> { 290 if (e != null) { 291 // TODO: Revist!!!! Means that if a server is loaded, then we will abort our host! 292 // In tests we abort the Master! 293 String msg = String.format("FAILED persisting region=%s state=%s", 294 regionInfo.getShortNameToLog(), state); 295 LOG.error(msg, e); 296 master.abort(msg, e); 297 } 298 }); 299 return future; 300 } 301 302 private long getOpenSeqNumForParentRegion(RegionInfo region) throws IOException { 303 MasterFileSystem fs = master.getMasterFileSystem(); 304 long maxSeqId = WALSplitUtil.getMaxRegionSequenceId(master.getConfiguration(), region, 305 fs::getFileSystem, fs::getWALFileSystem); 306 return maxSeqId > 0 ? maxSeqId + 1 : HConstants.NO_SEQNUM; 307 } 308 309 /** 310 * Performs an atomic multi-mutate operation against the given table. Used by the likes of merge 311 * and split as these want to make atomic mutations across multiple rows. 312 */ 313 private void multiMutate(RegionInfo ri, List<Mutation> mutations) throws IOException { 314 debugLogMutations(mutations); 315 byte[] row = 316 Bytes.toBytes(RegionReplicaUtil.getRegionInfoForDefaultReplica(ri).getRegionNameAsString() 317 + HConstants.DELIMITER); 318 MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder(); 319 for (Mutation mutation : mutations) { 320 if (mutation instanceof Put) { 321 builder.addMutationRequest( 322 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.PUT, mutation)); 323 } else if (mutation instanceof Delete) { 324 builder.addMutationRequest( 325 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.DELETE, mutation)); 326 } else { 327 throw new DoNotRetryIOException( 328 "multi in MetaEditor doesn't support " + mutation.getClass().getName()); 329 } 330 } 331 MutateRowsRequest request = builder.build(); 332 AsyncTable<?> table = 333 master.getConnection().toAsyncConnection().getTable(TableName.META_TABLE_NAME); 334 CompletableFuture<MutateRowsResponse> future = table.<MultiRowMutationService, 335 MutateRowsResponse> coprocessorService(MultiRowMutationService::newStub, 336 (stub, controller, done) -> stub.mutateRows(controller, request, done), row); 337 FutureUtils.get(future); 338 } 339 340 private Table getMetaTable() throws IOException { 341 return master.getConnection().getTable(TableName.META_TABLE_NAME); 342 } 343 344 private Result getRegionCatalogResult(RegionInfo region) throws IOException { 345 Get get = 346 new Get(CatalogFamilyFormat.getMetaKeyForRegion(region)).addFamily(HConstants.CATALOG_FAMILY); 347 try (Table table = getMetaTable()) { 348 return table.get(get); 349 } 350 } 351 352 private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException { 353 return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow()) 354 .setFamily(HConstants.CATALOG_FAMILY) 355 .setQualifier(CatalogFamilyFormat.getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp()) 356 .setType(Type.Put).setValue(Bytes.toBytes(openSeqNum)).build()); 357 } 358 359 // ============================================================================================ 360 // Update Region Splitting State helpers 361 // ============================================================================================ 362 /** 363 * Splits the region into two in an atomic operation. Offlines the parent region with the 364 * information that it is split into two, and also adds the daughter regions. Does not add the 365 * location information to the daughter regions since they are not open yet. 366 */ 367 public void splitRegion(RegionInfo parent, RegionInfo splitA, RegionInfo splitB, 368 ServerName serverName, TableDescriptor htd) throws IOException { 369 long parentOpenSeqNum = HConstants.NO_SEQNUM; 370 if (htd.hasGlobalReplicationScope()) { 371 parentOpenSeqNum = getOpenSeqNumForParentRegion(parent); 372 } 373 long time = EnvironmentEdgeManager.currentTime(); 374 // Put for parent 375 Put putParent = MetaTableAccessor.makePutFromRegionInfo( 376 RegionInfoBuilder.newBuilder(parent).setOffline(true).setSplit(true).build(), time); 377 MetaTableAccessor.addDaughtersToPut(putParent, splitA, splitB); 378 379 // Puts for daughters 380 Put putA = MetaTableAccessor.makePutFromRegionInfo(splitA, time); 381 Put putB = MetaTableAccessor.makePutFromRegionInfo(splitB, time); 382 if (parentOpenSeqNum > 0) { 383 ReplicationBarrierFamilyFormat.addReplicationBarrier(putParent, parentOpenSeqNum); 384 ReplicationBarrierFamilyFormat.addReplicationParent(putA, Collections.singletonList(parent)); 385 ReplicationBarrierFamilyFormat.addReplicationParent(putB, Collections.singletonList(parent)); 386 } 387 // Set initial state to CLOSED 388 // NOTE: If initial state is not set to CLOSED then daughter regions get added with the 389 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 390 // master tries to assign these offline regions. This is followed by re-assignments of the 391 // daughter regions from resumed {@link SplitTableRegionProcedure} 392 MetaTableAccessor.addRegionStateToPut(putA, RegionInfo.DEFAULT_REPLICA_ID, 393 RegionState.State.CLOSED); 394 MetaTableAccessor.addRegionStateToPut(putB, RegionInfo.DEFAULT_REPLICA_ID, 395 RegionState.State.CLOSED); 396 397 // new regions, openSeqNum = 1 is fine. 398 addSequenceNum(putA, 1, splitA.getReplicaId()); 399 addSequenceNum(putB, 1, splitB.getReplicaId()); 400 401 // Add empty locations for region replicas of daughters so that number of replicas can be 402 // cached whenever the primary region is looked up from meta 403 int regionReplication = getRegionReplication(htd); 404 for (int i = 1; i < regionReplication; i++) { 405 MetaTableAccessor.addEmptyLocation(putA, i); 406 MetaTableAccessor.addEmptyLocation(putB, i); 407 } 408 409 multiMutate(parent, Arrays.asList(putParent, putA, putB)); 410 } 411 412 // ============================================================================================ 413 // Update Region Merging State helpers 414 // ============================================================================================ 415 public void mergeRegions(RegionInfo child, RegionInfo[] parents, ServerName serverName, 416 TableDescriptor htd) throws IOException { 417 boolean globalScope = htd.hasGlobalReplicationScope(); 418 long time = HConstants.LATEST_TIMESTAMP; 419 List<Mutation> mutations = new ArrayList<>(); 420 List<RegionInfo> replicationParents = new ArrayList<>(); 421 for (RegionInfo ri : parents) { 422 long seqNum = globalScope ? getOpenSeqNumForParentRegion(ri) : -1; 423 // Deletes for merging regions 424 mutations.add(MetaTableAccessor.makeDeleteFromRegionInfo(ri, time)); 425 if (seqNum > 0) { 426 mutations 427 .add(ReplicationBarrierFamilyFormat.makePutForReplicationBarrier(ri, seqNum, time)); 428 replicationParents.add(ri); 429 } 430 } 431 // Put for parent 432 Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(child, time); 433 putOfMerged = addMergeRegions(putOfMerged, Arrays.asList(parents)); 434 // Set initial state to CLOSED. 435 // NOTE: If initial state is not set to CLOSED then merged region gets added with the 436 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 437 // master tries to assign this offline region. This is followed by re-assignments of the 438 // merged region from resumed {@link MergeTableRegionsProcedure} 439 MetaTableAccessor.addRegionStateToPut(putOfMerged, RegionInfo.DEFAULT_REPLICA_ID, 440 RegionState.State.CLOSED); 441 mutations.add(putOfMerged); 442 // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null 443 // if crash after merge happened but before we got to here.. means in-memory 444 // locations of offlined merged, now-closed, regions is lost. Should be ok. We 445 // assign the merged region later. 446 if (serverName != null) { 447 MetaTableAccessor.addLocation(putOfMerged, serverName, 1, child.getReplicaId()); 448 } 449 450 // Add empty locations for region replicas of the merged region so that number of replicas 451 // can be cached whenever the primary region is looked up from meta 452 int regionReplication = getRegionReplication(htd); 453 for (int i = 1; i < regionReplication; i++) { 454 MetaTableAccessor.addEmptyLocation(putOfMerged, i); 455 } 456 // add parent reference for serial replication 457 if (!replicationParents.isEmpty()) { 458 ReplicationBarrierFamilyFormat.addReplicationParent(putOfMerged, replicationParents); 459 } 460 multiMutate(child, mutations); 461 } 462 463 /** 464 * Check whether the given {@code region} has any 'info:merge*' columns. 465 */ 466 public boolean hasMergeRegions(RegionInfo region) throws IOException { 467 return CatalogFamilyFormat.hasMergeRegions(getRegionCatalogResult(region).rawCells()); 468 } 469 470 /** 471 * Returns Return all regioninfos listed in the 'info:merge*' columns of the given {@code region}. 472 */ 473 public List<RegionInfo> getMergeRegions(RegionInfo region) throws IOException { 474 return CatalogFamilyFormat.getMergeRegions(getRegionCatalogResult(region).rawCells()); 475 } 476 477 /** 478 * Deletes merge qualifiers for the specified merge region. 479 * @param connection connection we're using 480 * @param mergeRegion the merged region 481 */ 482 public void deleteMergeQualifiers(RegionInfo mergeRegion) throws IOException { 483 // NOTE: We are doing a new hbase:meta read here. 484 Cell[] cells = getRegionCatalogResult(mergeRegion).rawCells(); 485 if (cells == null || cells.length == 0) { 486 return; 487 } 488 Delete delete = new Delete(mergeRegion.getRegionName()); 489 List<byte[]> qualifiers = new ArrayList<>(); 490 for (Cell cell : cells) { 491 if (!CatalogFamilyFormat.isMergeQualifierPrefix(cell)) { 492 continue; 493 } 494 byte[] qualifier = CellUtil.cloneQualifier(cell); 495 qualifiers.add(qualifier); 496 delete.addColumns(HConstants.CATALOG_FAMILY, qualifier, HConstants.LATEST_TIMESTAMP); 497 } 498 499 // There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while 500 // the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second 501 // GCMultipleMergedRegionsProcedure could delete the merged region by accident! 502 if (qualifiers.isEmpty()) { 503 LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString() 504 + " in meta table, they are cleaned up already, Skip."); 505 return; 506 } 507 try (Table table = master.getConnection().getTable(TableName.META_TABLE_NAME)) { 508 table.delete(delete); 509 } 510 LOG.info( 511 "Deleted merge references in " + mergeRegion.getRegionNameAsString() + ", deleted qualifiers " 512 + qualifiers.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", "))); 513 } 514 515 static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException { 516 int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more. 517 int max = mergeRegions.size(); 518 if (max > limit) { 519 // Should never happen!!!!! But just in case. 520 throw new RuntimeException( 521 "Can't merge " + max + " regions in one go; " + limit + " is upper-limit."); 522 } 523 int counter = 0; 524 for (RegionInfo ri : mergeRegions) { 525 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++); 526 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 527 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(Bytes.toBytes(qualifier)) 528 .setTimestamp(put.getTimestamp()).setType(Type.Put).setValue(RegionInfo.toByteArray(ri)) 529 .build()); 530 } 531 return put; 532 } 533 534 // ============================================================================================ 535 // Delete Region State helpers 536 // ============================================================================================ 537 /** 538 * Deletes the specified region. 539 */ 540 public void deleteRegion(final RegionInfo regionInfo) throws IOException { 541 deleteRegions(Collections.singletonList(regionInfo)); 542 } 543 544 /** 545 * Deletes the specified regions. 546 */ 547 public void deleteRegions(final List<RegionInfo> regions) throws IOException { 548 deleteRegions(regions, EnvironmentEdgeManager.currentTime()); 549 } 550 551 private void deleteRegions(List<RegionInfo> regions, long ts) throws IOException { 552 List<Delete> deletes = new ArrayList<>(regions.size()); 553 for (RegionInfo hri : regions) { 554 Delete e = new Delete(hri.getRegionName()); 555 e.addFamily(HConstants.CATALOG_FAMILY, ts); 556 deletes.add(e); 557 } 558 try (Table table = getMetaTable()) { 559 debugLogMutations(deletes); 560 table.delete(deletes); 561 } 562 LOG.info("Deleted {} regions from META", regions.size()); 563 LOG.debug("Deleted regions: {}", regions); 564 } 565 566 /** 567 * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and 568 * adds new ones. Regions added back have state CLOSED. 569 * @param connection connection we're using 570 * @param regionInfos list of regions to be added to META 571 */ 572 public void overwriteRegions(List<RegionInfo> regionInfos, int regionReplication) 573 throws IOException { 574 // use master time for delete marker and the Put 575 long now = EnvironmentEdgeManager.currentTime(); 576 deleteRegions(regionInfos, now); 577 // Why sleep? This is the easiest way to ensure that the previous deletes does not 578 // eclipse the following puts, that might happen in the same ts from the server. 579 // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed, 580 // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep. 581 // 582 // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed 583 MetaTableAccessor.addRegionsToMeta(master.getConnection(), regionInfos, regionReplication, 584 now + 1); 585 LOG.info("Overwritten " + regionInfos.size() + " regions to Meta"); 586 LOG.debug("Overwritten regions: {} ", regionInfos); 587 } 588 589 private Scan getScanForUpdateRegionReplicas(TableName tableName) { 590 Scan scan; 591 if (TableName.isMetaTableName(tableName)) { 592 // Notice that, we do not use MetaCellComparator for master local region, so we can not use 593 // the same logic to set start key and end key for scanning meta table when locating entries 594 // in master local region. And since there is only one table in master local region(the record 595 // for meta table), so we do not need set start key and end key. 596 scan = new Scan(); 597 } else { 598 scan = MetaTableAccessor.getScanForTableName(master.getConfiguration(), tableName); 599 } 600 return scan.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); 601 } 602 603 private List<Delete> deleteRegionReplicas(ResultScanner scanner, int oldReplicaCount, 604 int newReplicaCount, long now) throws IOException { 605 List<Delete> deletes = new ArrayList<>(); 606 for (;;) { 607 Result result = scanner.next(); 608 if (result == null) { 609 break; 610 } 611 RegionInfo primaryRegionInfo = CatalogFamilyFormat.getRegionInfo(result); 612 if (primaryRegionInfo == null || primaryRegionInfo.isSplit()) { 613 continue; 614 } 615 Delete delete = new Delete(result.getRow()); 616 for (int i = newReplicaCount; i < oldReplicaCount; i++) { 617 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(i), now); 618 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getSeqNumColumn(i), now); 619 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getStartCodeColumn(i), 620 now); 621 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerNameColumn(i), 622 now); 623 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getRegionStateColumn(i), 624 now); 625 } 626 deletes.add(delete); 627 } 628 return deletes; 629 } 630 631 public void removeRegionReplicas(TableName tableName, int oldReplicaCount, int newReplicaCount) 632 throws IOException { 633 Scan scan = getScanForUpdateRegionReplicas(tableName); 634 long now = EnvironmentEdgeManager.currentTime(); 635 if (TableName.isMetaTableName(tableName)) { 636 List<Delete> deletes; 637 try (ResultScanner scanner = masterRegion.getScanner(scan)) { 638 deletes = deleteRegionReplicas(scanner, oldReplicaCount, newReplicaCount, now); 639 } 640 debugLogMutations(deletes); 641 masterRegion.update(r -> { 642 for (Delete d : deletes) { 643 r.delete(d); 644 } 645 }); 646 // also delete the mirrored location on zk 647 removeMirrorMetaLocation(oldReplicaCount, newReplicaCount); 648 } else { 649 try (Table metaTable = getMetaTable(); ResultScanner scanner = metaTable.getScanner(scan)) { 650 List<Delete> deletes = deleteRegionReplicas(scanner, oldReplicaCount, newReplicaCount, now); 651 debugLogMutations(deletes); 652 metaTable.delete(deletes); 653 } 654 } 655 } 656 657 // ========================================================================== 658 // Table Descriptors helpers 659 // ========================================================================== 660 private boolean hasGlobalReplicationScope(TableName tableName) throws IOException { 661 return hasGlobalReplicationScope(getDescriptor(tableName)); 662 } 663 664 private boolean hasGlobalReplicationScope(TableDescriptor htd) { 665 return htd != null ? htd.hasGlobalReplicationScope() : false; 666 } 667 668 private int getRegionReplication(TableDescriptor htd) { 669 return htd != null ? htd.getRegionReplication() : 1; 670 } 671 672 private TableDescriptor getDescriptor(TableName tableName) throws IOException { 673 return master.getTableDescriptors().get(tableName); 674 } 675 676 // ========================================================================== 677 // Region State 678 // ========================================================================== 679 680 /** 681 * Pull the region state from a catalog table {@link Result}. 682 * @return the region state, or null if unknown. 683 */ 684 public static State getRegionState(final Result r, RegionInfo regionInfo) { 685 Cell cell = 686 r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getStateColumn(regionInfo.getReplicaId())); 687 if (cell == null || cell.getValueLength() == 0) { 688 return null; 689 } 690 691 String state = 692 Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 693 try { 694 return State.valueOf(state); 695 } catch (IllegalArgumentException e) { 696 LOG.warn( 697 "BAD value {} in hbase:meta info:state column for region {} , " 698 + "Consider using HBCK2 setRegionState ENCODED_REGION_NAME STATE", 699 state, regionInfo.getEncodedName()); 700 return null; 701 } 702 } 703 704 public static byte[] getStateColumn(int replicaId) { 705 return replicaId == 0 706 ? HConstants.STATE_QUALIFIER 707 : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 708 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 709 } 710 711 private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException { 712 if (!METALOG.isDebugEnabled()) { 713 return; 714 } 715 // Logging each mutation in separate line makes it easier to see diff between them visually 716 // because of common starting indentation. 717 for (Mutation mutation : mutations) { 718 debugLogMutation(mutation); 719 } 720 } 721 722 private static void debugLogMutation(Mutation p) throws IOException { 723 METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON()); 724 } 725}