001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.Collections; 025import java.util.List; 026import java.util.concurrent.CompletableFuture; 027import java.util.stream.Collectors; 028import org.apache.hadoop.hbase.CatalogFamilyFormat; 029import org.apache.hadoop.hbase.Cell; 030import org.apache.hadoop.hbase.Cell.Type; 031import org.apache.hadoop.hbase.CellBuilderFactory; 032import org.apache.hadoop.hbase.CellBuilderType; 033import org.apache.hadoop.hbase.CellUtil; 034import org.apache.hadoop.hbase.ClientMetaTableAccessor; 035import org.apache.hadoop.hbase.DoNotRetryIOException; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.HRegionLocation; 038import org.apache.hadoop.hbase.MetaTableAccessor; 039import org.apache.hadoop.hbase.RegionLocations; 040import org.apache.hadoop.hbase.ServerName; 041import org.apache.hadoop.hbase.TableName; 042import org.apache.hadoop.hbase.client.AsyncTable; 043import org.apache.hadoop.hbase.client.Delete; 044import org.apache.hadoop.hbase.client.Get; 045import org.apache.hadoop.hbase.client.Mutation; 046import org.apache.hadoop.hbase.client.Put; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.RegionReplicaUtil; 050import org.apache.hadoop.hbase.client.Result; 051import org.apache.hadoop.hbase.client.ResultScanner; 052import org.apache.hadoop.hbase.client.Scan; 053import org.apache.hadoop.hbase.client.Table; 054import org.apache.hadoop.hbase.client.TableDescriptor; 055import org.apache.hadoop.hbase.master.MasterFileSystem; 056import org.apache.hadoop.hbase.master.MasterServices; 057import org.apache.hadoop.hbase.master.RegionState; 058import org.apache.hadoop.hbase.master.RegionState.State; 059import org.apache.hadoop.hbase.master.region.MasterRegion; 060import org.apache.hadoop.hbase.procedure2.Procedure; 061import org.apache.hadoop.hbase.procedure2.util.StringUtils; 062import org.apache.hadoop.hbase.replication.ReplicationBarrierFamilyFormat; 063import org.apache.hadoop.hbase.util.Bytes; 064import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 065import org.apache.hadoop.hbase.util.FutureUtils; 066import org.apache.hadoop.hbase.wal.WALSplitUtil; 067import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 068import org.apache.yetus.audience.InterfaceAudience; 069import org.apache.zookeeper.KeeperException; 070import org.slf4j.Logger; 071import org.slf4j.LoggerFactory; 072 073import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 074 075import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 076import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos; 077import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MultiRowMutationService; 078import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest; 079import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse; 080 081/** 082 * Store Region State to hbase:meta table. 083 */ 084@InterfaceAudience.Private 085public class RegionStateStore { 086 private static final Logger LOG = LoggerFactory.getLogger(RegionStateStore.class); 087 private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META"); 088 089 /** The delimiter for meta columns for replicaIds > 0 */ 090 protected static final char META_REPLICA_ID_DELIMITER = '_'; 091 092 private final MasterServices master; 093 094 private final MasterRegion masterRegion; 095 096 public RegionStateStore(MasterServices master, MasterRegion masterRegion) { 097 this.master = master; 098 this.masterRegion = masterRegion; 099 } 100 101 @FunctionalInterface 102 public interface RegionStateVisitor { 103 void visitRegionState(Result result, RegionInfo regionInfo, State state, 104 ServerName regionLocation, ServerName lastHost, long openSeqNum); 105 } 106 107 public void visitMeta(final RegionStateVisitor visitor) throws IOException { 108 MetaTableAccessor.fullScanRegions(master.getConnection(), 109 new ClientMetaTableAccessor.Visitor() { 110 final boolean isDebugEnabled = LOG.isDebugEnabled(); 111 112 @Override 113 public boolean visit(final Result r) throws IOException { 114 if (r != null && !r.isEmpty()) { 115 long st = 0; 116 if (LOG.isTraceEnabled()) { 117 st = EnvironmentEdgeManager.currentTime(); 118 } 119 visitMetaEntry(visitor, r); 120 if (LOG.isTraceEnabled()) { 121 long et = EnvironmentEdgeManager.currentTime(); 122 LOG.trace("[T] LOAD META PERF " + StringUtils.humanTimeDiff(et - st)); 123 } 124 } else if (isDebugEnabled) { 125 LOG.debug("NULL result from meta - ignoring but this is strange."); 126 } 127 return true; 128 } 129 }); 130 } 131 132 /** 133 * Queries META table for the passed region encoded name, delegating action upon results to the 134 * {@code RegionStateVisitor} passed as second parameter. 135 * @param regionEncodedName encoded name for the Region we want to query META for. 136 * @param visitor The {@code RegionStateVisitor} instance to react over the query 137 * results. 138 * @throws IOException If some error occurs while querying META or parsing results. 139 */ 140 public void visitMetaForRegion(final String regionEncodedName, final RegionStateVisitor visitor) 141 throws IOException { 142 Result result = 143 MetaTableAccessor.scanByRegionEncodedName(master.getConnection(), regionEncodedName); 144 if (result != null) { 145 visitMetaEntry(visitor, result); 146 } 147 } 148 149 public static void visitMetaEntry(final RegionStateVisitor visitor, final Result result) 150 throws IOException { 151 final RegionLocations rl = CatalogFamilyFormat.getRegionLocations(result); 152 if (rl == null) return; 153 154 final HRegionLocation[] locations = rl.getRegionLocations(); 155 if (locations == null) return; 156 157 for (int i = 0; i < locations.length; ++i) { 158 final HRegionLocation hrl = locations[i]; 159 if (hrl == null) continue; 160 161 final RegionInfo regionInfo = hrl.getRegion(); 162 if (regionInfo == null) continue; 163 164 final int replicaId = regionInfo.getReplicaId(); 165 final State state = getRegionState(result, regionInfo); 166 167 final ServerName lastHost = hrl.getServerName(); 168 ServerName regionLocation = MetaTableAccessor.getTargetServerName(result, replicaId); 169 final long openSeqNum = hrl.getSeqNum(); 170 171 LOG.debug( 172 "Load {} entry region={}, regionState={}, lastHost={}, " 173 + "regionLocation={}, openSeqNum={}", 174 TableName.META_TABLE_NAME, regionInfo.getEncodedName(), state, lastHost, regionLocation, 175 openSeqNum); 176 visitor.visitRegionState(result, regionInfo, state, regionLocation, lastHost, openSeqNum); 177 } 178 } 179 180 private Put generateUpdateRegionLocationPut(RegionStateNode regionStateNode) throws IOException { 181 long time = EnvironmentEdgeManager.currentTime(); 182 long openSeqNum = regionStateNode.getState() == State.OPEN 183 ? regionStateNode.getOpenSeqNum() 184 : HConstants.NO_SEQNUM; 185 RegionInfo regionInfo = regionStateNode.getRegionInfo(); 186 State state = regionStateNode.getState(); 187 ServerName regionLocation = regionStateNode.getRegionLocation(); 188 TransitRegionStateProcedure rit = regionStateNode.getProcedure(); 189 long pid = rit != null ? rit.getProcId() : Procedure.NO_PROC_ID; 190 final int replicaId = regionInfo.getReplicaId(); 191 final Put put = new Put(CatalogFamilyFormat.getMetaKeyForRegion(regionInfo), time); 192 MetaTableAccessor.addRegionInfo(put, regionInfo); 193 final StringBuilder info = 194 new StringBuilder("pid=").append(pid).append(" updating ").append(TableName.META_TABLE_NAME) 195 .append(" row=").append(regionInfo.getEncodedName()).append(", regionState=").append(state); 196 if (openSeqNum >= 0) { 197 Preconditions.checkArgument(state == State.OPEN && regionLocation != null, 198 "Open region should be on a server"); 199 MetaTableAccessor.addLocation(put, regionLocation, openSeqNum, replicaId); 200 // only update replication barrier for default replica 201 if ( 202 regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID 203 && hasGlobalReplicationScope(regionInfo.getTable()) 204 ) { 205 ReplicationBarrierFamilyFormat.addReplicationBarrier(put, openSeqNum); 206 info.append(", repBarrier=").append(openSeqNum); 207 } 208 info.append(", openSeqNum=").append(openSeqNum); 209 info.append(", regionLocation=").append(regionLocation); 210 } else if (regionLocation != null) { 211 // Ideally, if no regionLocation, write null to the hbase:meta but this will confuse clients 212 // currently; they want a server to hit. TODO: Make clients wait if no location. 213 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 214 .setFamily(HConstants.CATALOG_FAMILY) 215 .setQualifier(CatalogFamilyFormat.getServerNameColumn(replicaId)) 216 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put) 217 .setValue(Bytes.toBytes(regionLocation.getServerName())).build()); 218 info.append(", regionLocation=").append(regionLocation); 219 } 220 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 221 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getStateColumn(replicaId)) 222 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(state.name())) 223 .build()); 224 LOG.info(info.toString()); 225 return put; 226 } 227 228 CompletableFuture<Void> updateRegionLocation(RegionStateNode regionStateNode) { 229 Put put; 230 try { 231 put = generateUpdateRegionLocationPut(regionStateNode); 232 } catch (IOException e) { 233 return FutureUtils.failedFuture(e); 234 } 235 RegionInfo regionInfo = regionStateNode.getRegionInfo(); 236 State state = regionStateNode.getState(); 237 CompletableFuture<Void> future = updateRegionLocation(regionInfo, state, put); 238 if (regionInfo.isMetaRegion() && regionInfo.isFirst()) { 239 // mirror the meta location to zookeeper 240 // we store meta location in master local region which means the above method is 241 // synchronous(we just wrap the result with a CompletableFuture to make it look like 242 // asynchronous), so it is OK to just call this method directly here 243 assert future.isDone(); 244 if (!future.isCompletedExceptionally()) { 245 try { 246 mirrorMetaLocation(regionInfo, regionStateNode.getRegionLocation(), state); 247 } catch (IOException e) { 248 return FutureUtils.failedFuture(e); 249 } 250 } 251 } 252 return future; 253 } 254 255 private void mirrorMetaLocation(RegionInfo regionInfo, ServerName serverName, State state) 256 throws IOException { 257 try { 258 MetaTableLocator.setMetaLocation(master.getZooKeeper(), serverName, regionInfo.getReplicaId(), 259 state); 260 } catch (KeeperException e) { 261 throw new IOException(e); 262 } 263 } 264 265 private void removeMirrorMetaLocation(int oldReplicaCount, int newReplicaCount) 266 throws IOException { 267 try { 268 for (int i = newReplicaCount; i < oldReplicaCount; i++) { 269 MetaTableLocator.deleteMetaLocation(master.getZooKeeper(), i); 270 } 271 } catch (KeeperException e) { 272 throw new IOException(e); 273 } 274 } 275 276 private CompletableFuture<Void> updateRegionLocation(RegionInfo regionInfo, State state, 277 Put put) { 278 CompletableFuture<Void> future; 279 if (regionInfo.isMetaRegion()) { 280 try { 281 masterRegion.update(r -> r.put(put)); 282 future = CompletableFuture.completedFuture(null); 283 } catch (Exception e) { 284 future = FutureUtils.failedFuture(e); 285 } 286 } else { 287 AsyncTable<?> table = master.getAsyncConnection().getTable(TableName.META_TABLE_NAME); 288 future = table.put(put); 289 } 290 FutureUtils.addListener(future, (r, e) -> { 291 if (e != null) { 292 // TODO: Revist!!!! Means that if a server is loaded, then we will abort our host! 293 // In tests we abort the Master! 294 String msg = String.format("FAILED persisting region=%s state=%s", 295 regionInfo.getShortNameToLog(), state); 296 LOG.error(msg, e); 297 master.abort(msg, e); 298 } 299 }); 300 return future; 301 } 302 303 private long getOpenSeqNumForParentRegion(RegionInfo region) throws IOException { 304 MasterFileSystem fs = master.getMasterFileSystem(); 305 long maxSeqId = WALSplitUtil.getMaxRegionSequenceId(master.getConfiguration(), region, 306 fs::getFileSystem, fs::getWALFileSystem); 307 return maxSeqId > 0 ? maxSeqId + 1 : HConstants.NO_SEQNUM; 308 } 309 310 /** 311 * Performs an atomic multi-mutate operation against the given table. Used by the likes of merge 312 * and split as these want to make atomic mutations across multiple rows. 313 */ 314 private void multiMutate(RegionInfo ri, List<Mutation> mutations) throws IOException { 315 debugLogMutations(mutations); 316 byte[] row = 317 Bytes.toBytes(RegionReplicaUtil.getRegionInfoForDefaultReplica(ri).getRegionNameAsString() 318 + HConstants.DELIMITER); 319 MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder(); 320 for (Mutation mutation : mutations) { 321 if (mutation instanceof Put) { 322 builder.addMutationRequest( 323 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.PUT, mutation)); 324 } else if (mutation instanceof Delete) { 325 builder.addMutationRequest( 326 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.DELETE, mutation)); 327 } else { 328 throw new DoNotRetryIOException( 329 "multi in MetaEditor doesn't support " + mutation.getClass().getName()); 330 } 331 } 332 MutateRowsRequest request = builder.build(); 333 AsyncTable<?> table = 334 master.getConnection().toAsyncConnection().getTable(TableName.META_TABLE_NAME); 335 CompletableFuture<MutateRowsResponse> future = table.<MultiRowMutationService, 336 MutateRowsResponse> coprocessorService(MultiRowMutationService::newStub, 337 (stub, controller, done) -> stub.mutateRows(controller, request, done), row); 338 FutureUtils.get(future); 339 } 340 341 private Table getMetaTable() throws IOException { 342 return master.getConnection().getTable(TableName.META_TABLE_NAME); 343 } 344 345 private Result getRegionCatalogResult(RegionInfo region) throws IOException { 346 Get get = 347 new Get(CatalogFamilyFormat.getMetaKeyForRegion(region)).addFamily(HConstants.CATALOG_FAMILY); 348 try (Table table = getMetaTable()) { 349 return table.get(get); 350 } 351 } 352 353 private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException { 354 return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow()) 355 .setFamily(HConstants.CATALOG_FAMILY) 356 .setQualifier(CatalogFamilyFormat.getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp()) 357 .setType(Type.Put).setValue(Bytes.toBytes(openSeqNum)).build()); 358 } 359 360 // ============================================================================================ 361 // Update Region Splitting State helpers 362 // ============================================================================================ 363 /** 364 * Splits the region into two in an atomic operation. Offlines the parent region with the 365 * information that it is split into two, and also adds the daughter regions. Does not add the 366 * location information to the daughter regions since they are not open yet. 367 */ 368 public void splitRegion(RegionInfo parent, RegionInfo splitA, RegionInfo splitB, 369 ServerName serverName, TableDescriptor htd) throws IOException { 370 long parentOpenSeqNum = HConstants.NO_SEQNUM; 371 if (htd.hasGlobalReplicationScope()) { 372 parentOpenSeqNum = getOpenSeqNumForParentRegion(parent); 373 } 374 long time = EnvironmentEdgeManager.currentTime(); 375 // Put for parent 376 Put putParent = MetaTableAccessor.makePutFromRegionInfo( 377 RegionInfoBuilder.newBuilder(parent).setOffline(true).setSplit(true).build(), time); 378 MetaTableAccessor.addDaughtersToPut(putParent, splitA, splitB); 379 380 // Puts for daughters 381 Put putA = MetaTableAccessor.makePutFromRegionInfo(splitA, time); 382 Put putB = MetaTableAccessor.makePutFromRegionInfo(splitB, time); 383 if (parentOpenSeqNum > 0) { 384 ReplicationBarrierFamilyFormat.addReplicationBarrier(putParent, parentOpenSeqNum); 385 ReplicationBarrierFamilyFormat.addReplicationParent(putA, Collections.singletonList(parent)); 386 ReplicationBarrierFamilyFormat.addReplicationParent(putB, Collections.singletonList(parent)); 387 } 388 // Set initial state to CLOSED 389 // NOTE: If initial state is not set to CLOSED then daughter regions get added with the 390 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 391 // master tries to assign these offline regions. This is followed by re-assignments of the 392 // daughter regions from resumed {@link SplitTableRegionProcedure} 393 MetaTableAccessor.addRegionStateToPut(putA, RegionInfo.DEFAULT_REPLICA_ID, 394 RegionState.State.CLOSED); 395 MetaTableAccessor.addRegionStateToPut(putB, RegionInfo.DEFAULT_REPLICA_ID, 396 RegionState.State.CLOSED); 397 398 // new regions, openSeqNum = 1 is fine. 399 addSequenceNum(putA, 1, splitA.getReplicaId()); 400 addSequenceNum(putB, 1, splitB.getReplicaId()); 401 402 // Add empty locations for region replicas of daughters so that number of replicas can be 403 // cached whenever the primary region is looked up from meta 404 int regionReplication = getRegionReplication(htd); 405 for (int i = 1; i < regionReplication; i++) { 406 MetaTableAccessor.addEmptyLocation(putA, i); 407 MetaTableAccessor.addEmptyLocation(putB, i); 408 } 409 410 multiMutate(parent, Arrays.asList(putParent, putA, putB)); 411 } 412 413 // ============================================================================================ 414 // Update Region Merging State helpers 415 // ============================================================================================ 416 public void mergeRegions(RegionInfo child, RegionInfo[] parents, ServerName serverName, 417 TableDescriptor htd) throws IOException { 418 boolean globalScope = htd.hasGlobalReplicationScope(); 419 long time = EnvironmentEdgeManager.currentTime(); 420 List<Mutation> mutations = new ArrayList<>(); 421 List<RegionInfo> replicationParents = new ArrayList<>(); 422 for (RegionInfo ri : parents) { 423 long seqNum = globalScope ? getOpenSeqNumForParentRegion(ri) : -1; 424 // Deletes for merging regions 425 mutations.add(MetaTableAccessor.makeDeleteFromRegionInfo(ri, time)); 426 if (seqNum > 0) { 427 mutations 428 .add(ReplicationBarrierFamilyFormat.makePutForReplicationBarrier(ri, seqNum, time)); 429 replicationParents.add(ri); 430 } 431 } 432 // Put for parent 433 Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(child, time); 434 putOfMerged = addMergeRegions(putOfMerged, Arrays.asList(parents)); 435 // Set initial state to CLOSED. 436 // NOTE: If initial state is not set to CLOSED then merged region gets added with the 437 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 438 // master tries to assign this offline region. This is followed by re-assignments of the 439 // merged region from resumed {@link MergeTableRegionsProcedure} 440 MetaTableAccessor.addRegionStateToPut(putOfMerged, RegionInfo.DEFAULT_REPLICA_ID, 441 RegionState.State.CLOSED); 442 mutations.add(putOfMerged); 443 // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null 444 // if crash after merge happened but before we got to here.. means in-memory 445 // locations of offlined merged, now-closed, regions is lost. Should be ok. We 446 // assign the merged region later. 447 if (serverName != null) { 448 MetaTableAccessor.addLocation(putOfMerged, serverName, 1, child.getReplicaId()); 449 } 450 451 // Add empty locations for region replicas of the merged region so that number of replicas 452 // can be cached whenever the primary region is looked up from meta 453 int regionReplication = getRegionReplication(htd); 454 for (int i = 1; i < regionReplication; i++) { 455 MetaTableAccessor.addEmptyLocation(putOfMerged, i); 456 } 457 // add parent reference for serial replication 458 if (!replicationParents.isEmpty()) { 459 ReplicationBarrierFamilyFormat.addReplicationParent(putOfMerged, replicationParents); 460 } 461 multiMutate(child, mutations); 462 } 463 464 /** 465 * Check whether the given {@code region} has any 'info:merge*' columns. 466 */ 467 public boolean hasMergeRegions(RegionInfo region) throws IOException { 468 return CatalogFamilyFormat.hasMergeRegions(getRegionCatalogResult(region).rawCells()); 469 } 470 471 /** 472 * Returns Return all regioninfos listed in the 'info:merge*' columns of the given {@code region}. 473 */ 474 public List<RegionInfo> getMergeRegions(RegionInfo region) throws IOException { 475 return CatalogFamilyFormat.getMergeRegions(getRegionCatalogResult(region).rawCells()); 476 } 477 478 /** 479 * Deletes merge qualifiers for the specified merge region. 480 * @param connection connection we're using 481 * @param mergeRegion the merged region 482 */ 483 public void deleteMergeQualifiers(RegionInfo mergeRegion) throws IOException { 484 // NOTE: We are doing a new hbase:meta read here. 485 Cell[] cells = getRegionCatalogResult(mergeRegion).rawCells(); 486 if (cells == null || cells.length == 0) { 487 return; 488 } 489 Delete delete = new Delete(mergeRegion.getRegionName()); 490 List<byte[]> qualifiers = new ArrayList<>(); 491 for (Cell cell : cells) { 492 if (!CatalogFamilyFormat.isMergeQualifierPrefix(cell)) { 493 continue; 494 } 495 byte[] qualifier = CellUtil.cloneQualifier(cell); 496 qualifiers.add(qualifier); 497 delete.addColumns(HConstants.CATALOG_FAMILY, qualifier, HConstants.LATEST_TIMESTAMP); 498 } 499 500 // There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while 501 // the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second 502 // GCMultipleMergedRegionsProcedure could delete the merged region by accident! 503 if (qualifiers.isEmpty()) { 504 LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString() 505 + " in meta table, they are cleaned up already, Skip."); 506 return; 507 } 508 try (Table table = master.getConnection().getTable(TableName.META_TABLE_NAME)) { 509 table.delete(delete); 510 } 511 LOG.info( 512 "Deleted merge references in " + mergeRegion.getRegionNameAsString() + ", deleted qualifiers " 513 + qualifiers.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", "))); 514 } 515 516 static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException { 517 int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more. 518 int max = mergeRegions.size(); 519 if (max > limit) { 520 // Should never happen!!!!! But just in case. 521 throw new RuntimeException( 522 "Can't merge " + max + " regions in one go; " + limit + " is upper-limit."); 523 } 524 int counter = 0; 525 for (RegionInfo ri : mergeRegions) { 526 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++); 527 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 528 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(Bytes.toBytes(qualifier)) 529 .setTimestamp(put.getTimestamp()).setType(Type.Put).setValue(RegionInfo.toByteArray(ri)) 530 .build()); 531 } 532 return put; 533 } 534 535 // ============================================================================================ 536 // Delete Region State helpers 537 // ============================================================================================ 538 /** 539 * Deletes the specified region. 540 */ 541 public void deleteRegion(final RegionInfo regionInfo) throws IOException { 542 deleteRegions(Collections.singletonList(regionInfo)); 543 } 544 545 /** 546 * Deletes the specified regions. 547 */ 548 public void deleteRegions(final List<RegionInfo> regions) throws IOException { 549 deleteRegions(regions, EnvironmentEdgeManager.currentTime()); 550 } 551 552 private void deleteRegions(List<RegionInfo> regions, long ts) throws IOException { 553 List<Delete> deletes = new ArrayList<>(regions.size()); 554 for (RegionInfo hri : regions) { 555 Delete e = new Delete(hri.getRegionName()); 556 e.addFamily(HConstants.CATALOG_FAMILY, ts); 557 deletes.add(e); 558 } 559 try (Table table = getMetaTable()) { 560 debugLogMutations(deletes); 561 table.delete(deletes); 562 } 563 LOG.info("Deleted {} regions from META", regions.size()); 564 LOG.debug("Deleted regions: {}", regions); 565 } 566 567 /** 568 * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and 569 * adds new ones. Regions added back have state CLOSED. 570 * @param connection connection we're using 571 * @param regionInfos list of regions to be added to META 572 */ 573 public void overwriteRegions(List<RegionInfo> regionInfos, int regionReplication) 574 throws IOException { 575 // use master time for delete marker and the Put 576 long now = EnvironmentEdgeManager.currentTime(); 577 deleteRegions(regionInfos, now); 578 // Why sleep? This is the easiest way to ensure that the previous deletes does not 579 // eclipse the following puts, that might happen in the same ts from the server. 580 // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed, 581 // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep. 582 // 583 // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed 584 MetaTableAccessor.addRegionsToMeta(master.getConnection(), regionInfos, regionReplication, 585 now + 1); 586 LOG.info("Overwritten " + regionInfos.size() + " regions to Meta"); 587 LOG.debug("Overwritten regions: {} ", regionInfos); 588 } 589 590 private Scan getScanForUpdateRegionReplicas(TableName tableName) { 591 Scan scan; 592 if (TableName.isMetaTableName(tableName)) { 593 // Notice that, we do not use MetaCellComparator for master local region, so we can not use 594 // the same logic to set start key and end key for scanning meta table when locating entries 595 // in master local region. And since there is only one table in master local region(the record 596 // for meta table), so we do not need set start key and end key. 597 scan = new Scan(); 598 } else { 599 scan = MetaTableAccessor.getScanForTableName(master.getConfiguration(), tableName); 600 } 601 return scan.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); 602 } 603 604 private List<Delete> deleteRegionReplicas(ResultScanner scanner, int oldReplicaCount, 605 int newReplicaCount, long now) throws IOException { 606 List<Delete> deletes = new ArrayList<>(); 607 for (;;) { 608 Result result = scanner.next(); 609 if (result == null) { 610 break; 611 } 612 RegionInfo primaryRegionInfo = CatalogFamilyFormat.getRegionInfo(result); 613 if (primaryRegionInfo == null || primaryRegionInfo.isSplit()) { 614 continue; 615 } 616 Delete delete = new Delete(result.getRow()); 617 for (int i = newReplicaCount; i < oldReplicaCount; i++) { 618 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(i), now); 619 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getSeqNumColumn(i), now); 620 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getStartCodeColumn(i), 621 now); 622 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerNameColumn(i), 623 now); 624 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getRegionStateColumn(i), 625 now); 626 } 627 deletes.add(delete); 628 } 629 return deletes; 630 } 631 632 public void removeRegionReplicas(TableName tableName, int oldReplicaCount, int newReplicaCount) 633 throws IOException { 634 Scan scan = getScanForUpdateRegionReplicas(tableName); 635 long now = EnvironmentEdgeManager.currentTime(); 636 if (TableName.isMetaTableName(tableName)) { 637 List<Delete> deletes; 638 try (ResultScanner scanner = masterRegion.getScanner(scan)) { 639 deletes = deleteRegionReplicas(scanner, oldReplicaCount, newReplicaCount, now); 640 } 641 debugLogMutations(deletes); 642 masterRegion.update(r -> { 643 for (Delete d : deletes) { 644 r.delete(d); 645 } 646 }); 647 // also delete the mirrored location on zk 648 removeMirrorMetaLocation(oldReplicaCount, newReplicaCount); 649 } else { 650 try (Table metaTable = getMetaTable(); ResultScanner scanner = metaTable.getScanner(scan)) { 651 List<Delete> deletes = deleteRegionReplicas(scanner, oldReplicaCount, newReplicaCount, now); 652 debugLogMutations(deletes); 653 metaTable.delete(deletes); 654 } 655 } 656 } 657 658 // ========================================================================== 659 // Table Descriptors helpers 660 // ========================================================================== 661 private boolean hasGlobalReplicationScope(TableName tableName) throws IOException { 662 return hasGlobalReplicationScope(getDescriptor(tableName)); 663 } 664 665 private boolean hasGlobalReplicationScope(TableDescriptor htd) { 666 return htd != null ? htd.hasGlobalReplicationScope() : false; 667 } 668 669 private int getRegionReplication(TableDescriptor htd) { 670 return htd != null ? htd.getRegionReplication() : 1; 671 } 672 673 private TableDescriptor getDescriptor(TableName tableName) throws IOException { 674 return master.getTableDescriptors().get(tableName); 675 } 676 677 // ========================================================================== 678 // Region State 679 // ========================================================================== 680 681 /** 682 * Pull the region state from a catalog table {@link Result}. 683 * @return the region state, or null if unknown. 684 */ 685 public static State getRegionState(final Result r, RegionInfo regionInfo) { 686 Cell cell = 687 r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getStateColumn(regionInfo.getReplicaId())); 688 if (cell == null || cell.getValueLength() == 0) { 689 return null; 690 } 691 692 String state = 693 Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 694 try { 695 return State.valueOf(state); 696 } catch (IllegalArgumentException e) { 697 LOG.warn( 698 "BAD value {} in " + TableName.META_TABLE_NAME + " info:state column for region {} , " 699 + "Consider using HBCK2 setRegionState ENCODED_REGION_NAME STATE", 700 state, regionInfo.getEncodedName()); 701 return null; 702 } 703 } 704 705 public static byte[] getStateColumn(int replicaId) { 706 return replicaId == 0 707 ? HConstants.STATE_QUALIFIER 708 : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 709 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 710 } 711 712 private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException { 713 if (!METALOG.isDebugEnabled()) { 714 return; 715 } 716 // Logging each mutation in separate line makes it easier to see diff between them visually 717 // because of common starting indentation. 718 for (Mutation mutation : mutations) { 719 debugLogMutation(mutation); 720 } 721 } 722 723 private static void debugLogMutation(Mutation p) throws IOException { 724 METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON()); 725 } 726}