001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.assignment; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Arrays; 023import java.util.Collection; 024import java.util.Collections; 025import java.util.List; 026import java.util.concurrent.CompletableFuture; 027import java.util.stream.Collectors; 028import org.apache.hadoop.hbase.CatalogFamilyFormat; 029import org.apache.hadoop.hbase.Cell; 030import org.apache.hadoop.hbase.Cell.Type; 031import org.apache.hadoop.hbase.CellBuilderFactory; 032import org.apache.hadoop.hbase.CellBuilderType; 033import org.apache.hadoop.hbase.CellUtil; 034import org.apache.hadoop.hbase.ClientMetaTableAccessor; 035import org.apache.hadoop.hbase.DoNotRetryIOException; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.HRegionLocation; 038import org.apache.hadoop.hbase.MetaTableAccessor; 039import org.apache.hadoop.hbase.RegionLocations; 040import org.apache.hadoop.hbase.ServerName; 041import org.apache.hadoop.hbase.TableName; 042import org.apache.hadoop.hbase.client.AsyncTable; 043import org.apache.hadoop.hbase.client.Delete; 044import org.apache.hadoop.hbase.client.Get; 045import org.apache.hadoop.hbase.client.Mutation; 046import org.apache.hadoop.hbase.client.Put; 047import org.apache.hadoop.hbase.client.RegionInfo; 048import org.apache.hadoop.hbase.client.RegionInfoBuilder; 049import org.apache.hadoop.hbase.client.RegionReplicaUtil; 050import org.apache.hadoop.hbase.client.Result; 051import org.apache.hadoop.hbase.client.ResultScanner; 052import org.apache.hadoop.hbase.client.Scan; 053import org.apache.hadoop.hbase.client.Table; 054import org.apache.hadoop.hbase.client.TableDescriptor; 055import org.apache.hadoop.hbase.master.MasterFileSystem; 056import org.apache.hadoop.hbase.master.MasterServices; 057import org.apache.hadoop.hbase.master.RegionState; 058import org.apache.hadoop.hbase.master.RegionState.State; 059import org.apache.hadoop.hbase.master.region.MasterRegion; 060import org.apache.hadoop.hbase.procedure2.Procedure; 061import org.apache.hadoop.hbase.procedure2.util.StringUtils; 062import org.apache.hadoop.hbase.replication.ReplicationBarrierFamilyFormat; 063import org.apache.hadoop.hbase.util.Bytes; 064import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 065import org.apache.hadoop.hbase.util.FutureUtils; 066import org.apache.hadoop.hbase.wal.WALSplitUtil; 067import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 068import org.apache.yetus.audience.InterfaceAudience; 069import org.apache.zookeeper.KeeperException; 070import org.slf4j.Logger; 071import org.slf4j.LoggerFactory; 072 073import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 074 075import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 076import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos; 077import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MultiRowMutationService; 078import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest; 079import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse; 080 081/** 082 * Store Region State to hbase:meta table. 083 */ 084@InterfaceAudience.Private 085public class RegionStateStore { 086 private static final Logger LOG = LoggerFactory.getLogger(RegionStateStore.class); 087 private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META"); 088 089 /** The delimiter for meta columns for replicaIds > 0 */ 090 protected static final char META_REPLICA_ID_DELIMITER = '_'; 091 092 private final MasterServices master; 093 094 private final MasterRegion masterRegion; 095 096 public RegionStateStore(MasterServices master, MasterRegion masterRegion) { 097 this.master = master; 098 this.masterRegion = masterRegion; 099 } 100 101 @FunctionalInterface 102 public interface RegionStateVisitor { 103 void visitRegionState(Result result, RegionInfo regionInfo, State state, 104 ServerName regionLocation, ServerName lastHost, long openSeqNum); 105 } 106 107 public void visitMeta(final RegionStateVisitor visitor) throws IOException { 108 MetaTableAccessor.fullScanRegions(master.getConnection(), 109 new ClientMetaTableAccessor.Visitor() { 110 final boolean isDebugEnabled = LOG.isDebugEnabled(); 111 112 @Override 113 public boolean visit(final Result r) throws IOException { 114 if (r != null && !r.isEmpty()) { 115 long st = 0; 116 if (LOG.isTraceEnabled()) { 117 st = EnvironmentEdgeManager.currentTime(); 118 } 119 visitMetaEntry(visitor, r); 120 if (LOG.isTraceEnabled()) { 121 long et = EnvironmentEdgeManager.currentTime(); 122 LOG.trace("[T] LOAD META PERF " + StringUtils.humanTimeDiff(et - st)); 123 } 124 } else if (isDebugEnabled) { 125 LOG.debug("NULL result from meta - ignoring but this is strange."); 126 } 127 return true; 128 } 129 }); 130 } 131 132 /** 133 * Queries META table for the passed region encoded name, delegating action upon results to the 134 * <code>RegionStateVisitor</code> passed as second parameter. 135 * @param regionEncodedName encoded name for the Region we want to query META for. 136 * @param visitor The <code>RegionStateVisitor</code> instance to react over the query 137 * results. 138 * @throws IOException If some error occurs while querying META or parsing results. 139 */ 140 public void visitMetaForRegion(final String regionEncodedName, final RegionStateVisitor visitor) 141 throws IOException { 142 Result result = 143 MetaTableAccessor.scanByRegionEncodedName(master.getConnection(), regionEncodedName); 144 if (result != null) { 145 visitMetaEntry(visitor, result); 146 } 147 } 148 149 public static void visitMetaEntry(final RegionStateVisitor visitor, final Result result) 150 throws IOException { 151 final RegionLocations rl = CatalogFamilyFormat.getRegionLocations(result); 152 if (rl == null) return; 153 154 final HRegionLocation[] locations = rl.getRegionLocations(); 155 if (locations == null) return; 156 157 for (int i = 0; i < locations.length; ++i) { 158 final HRegionLocation hrl = locations[i]; 159 if (hrl == null) continue; 160 161 final RegionInfo regionInfo = hrl.getRegion(); 162 if (regionInfo == null) continue; 163 164 final int replicaId = regionInfo.getReplicaId(); 165 final State state = getRegionState(result, regionInfo); 166 167 final ServerName lastHost = hrl.getServerName(); 168 ServerName regionLocation = MetaTableAccessor.getTargetServerName(result, replicaId); 169 final long openSeqNum = hrl.getSeqNum(); 170 171 LOG.debug( 172 "Load hbase:meta entry region={}, regionState={}, lastHost={}, " 173 + "regionLocation={}, openSeqNum={}", 174 regionInfo.getEncodedName(), state, lastHost, regionLocation, openSeqNum); 175 visitor.visitRegionState(result, regionInfo, state, regionLocation, lastHost, openSeqNum); 176 } 177 } 178 179 void updateRegionLocation(RegionStateNode regionStateNode) throws IOException { 180 long time = EnvironmentEdgeManager.currentTime(); 181 long openSeqNum = regionStateNode.getState() == State.OPEN 182 ? regionStateNode.getOpenSeqNum() 183 : HConstants.NO_SEQNUM; 184 RegionInfo regionInfo = regionStateNode.getRegionInfo(); 185 State state = regionStateNode.getState(); 186 ServerName regionLocation = regionStateNode.getRegionLocation(); 187 TransitRegionStateProcedure rit = regionStateNode.getProcedure(); 188 long pid = rit != null ? rit.getProcId() : Procedure.NO_PROC_ID; 189 final int replicaId = regionInfo.getReplicaId(); 190 final Put put = new Put(CatalogFamilyFormat.getMetaKeyForRegion(regionInfo), time); 191 MetaTableAccessor.addRegionInfo(put, regionInfo); 192 final StringBuilder info = 193 new StringBuilder("pid=").append(pid).append(" updating hbase:meta row=") 194 .append(regionInfo.getEncodedName()).append(", regionState=").append(state); 195 if (openSeqNum >= 0) { 196 Preconditions.checkArgument(state == State.OPEN && regionLocation != null, 197 "Open region should be on a server"); 198 MetaTableAccessor.addLocation(put, regionLocation, openSeqNum, replicaId); 199 // only update replication barrier for default replica 200 if ( 201 regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID 202 && hasGlobalReplicationScope(regionInfo.getTable()) 203 ) { 204 ReplicationBarrierFamilyFormat.addReplicationBarrier(put, openSeqNum); 205 info.append(", repBarrier=").append(openSeqNum); 206 } 207 info.append(", openSeqNum=").append(openSeqNum); 208 info.append(", regionLocation=").append(regionLocation); 209 } else if (regionLocation != null) { 210 // Ideally, if no regionLocation, write null to the hbase:meta but this will confuse clients 211 // currently; they want a server to hit. TODO: Make clients wait if no location. 212 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 213 .setFamily(HConstants.CATALOG_FAMILY) 214 .setQualifier(CatalogFamilyFormat.getServerNameColumn(replicaId)) 215 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put) 216 .setValue(Bytes.toBytes(regionLocation.getServerName())).build()); 217 info.append(", regionLocation=").append(regionLocation); 218 } 219 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 220 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getStateColumn(replicaId)) 221 .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(state.name())) 222 .build()); 223 LOG.info(info.toString()); 224 updateRegionLocation(regionInfo, state, put); 225 if (regionInfo.isMetaRegion() && regionInfo.isFirst()) { 226 // mirror the meta location to zookeeper 227 mirrorMetaLocation(regionInfo, regionLocation, state); 228 } 229 } 230 231 private void mirrorMetaLocation(RegionInfo regionInfo, ServerName serverName, State state) 232 throws IOException { 233 try { 234 MetaTableLocator.setMetaLocation(master.getZooKeeper(), serverName, regionInfo.getReplicaId(), 235 state); 236 } catch (KeeperException e) { 237 throw new IOException(e); 238 } 239 } 240 241 private void removeMirrorMetaLocation(int oldReplicaCount, int newReplicaCount) 242 throws IOException { 243 try { 244 for (int i = newReplicaCount; i < oldReplicaCount; i++) { 245 MetaTableLocator.deleteMetaLocation(master.getZooKeeper(), i); 246 } 247 } catch (KeeperException e) { 248 throw new IOException(e); 249 } 250 } 251 252 private void updateRegionLocation(RegionInfo regionInfo, State state, Put put) 253 throws IOException { 254 try { 255 if (regionInfo.isMetaRegion()) { 256 masterRegion.update(r -> r.put(put)); 257 } else { 258 try (Table table = master.getConnection().getTable(TableName.META_TABLE_NAME)) { 259 table.put(put); 260 } 261 } 262 } catch (IOException e) { 263 // TODO: Revist!!!! Means that if a server is loaded, then we will abort our host! 264 // In tests we abort the Master! 265 String msg = String.format("FAILED persisting region=%s state=%s", 266 regionInfo.getShortNameToLog(), state); 267 LOG.error(msg, e); 268 master.abort(msg, e); 269 throw e; 270 } 271 } 272 273 private long getOpenSeqNumForParentRegion(RegionInfo region) throws IOException { 274 MasterFileSystem fs = master.getMasterFileSystem(); 275 long maxSeqId = WALSplitUtil.getMaxRegionSequenceId(master.getConfiguration(), region, 276 fs::getFileSystem, fs::getWALFileSystem); 277 return maxSeqId > 0 ? maxSeqId + 1 : HConstants.NO_SEQNUM; 278 } 279 280 /** 281 * Performs an atomic multi-mutate operation against the given table. Used by the likes of merge 282 * and split as these want to make atomic mutations across multiple rows. 283 */ 284 private void multiMutate(RegionInfo ri, List<Mutation> mutations) throws IOException { 285 debugLogMutations(mutations); 286 byte[] row = 287 Bytes.toBytes(RegionReplicaUtil.getRegionInfoForDefaultReplica(ri).getRegionNameAsString() 288 + HConstants.DELIMITER); 289 MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder(); 290 for (Mutation mutation : mutations) { 291 if (mutation instanceof Put) { 292 builder.addMutationRequest( 293 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.PUT, mutation)); 294 } else if (mutation instanceof Delete) { 295 builder.addMutationRequest( 296 ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.DELETE, mutation)); 297 } else { 298 throw new DoNotRetryIOException( 299 "multi in MetaEditor doesn't support " + mutation.getClass().getName()); 300 } 301 } 302 MutateRowsRequest request = builder.build(); 303 AsyncTable<?> table = 304 master.getConnection().toAsyncConnection().getTable(TableName.META_TABLE_NAME); 305 CompletableFuture<MutateRowsResponse> future = table.<MultiRowMutationService, 306 MutateRowsResponse> coprocessorService(MultiRowMutationService::newStub, 307 (stub, controller, done) -> stub.mutateRows(controller, request, done), row); 308 FutureUtils.get(future); 309 } 310 311 private Table getMetaTable() throws IOException { 312 return master.getConnection().getTable(TableName.META_TABLE_NAME); 313 } 314 315 private Result getRegionCatalogResult(RegionInfo region) throws IOException { 316 Get get = 317 new Get(CatalogFamilyFormat.getMetaKeyForRegion(region)).addFamily(HConstants.CATALOG_FAMILY); 318 try (Table table = getMetaTable()) { 319 return table.get(get); 320 } 321 } 322 323 private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException { 324 return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow()) 325 .setFamily(HConstants.CATALOG_FAMILY) 326 .setQualifier(CatalogFamilyFormat.getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp()) 327 .setType(Type.Put).setValue(Bytes.toBytes(openSeqNum)).build()); 328 } 329 330 // ============================================================================================ 331 // Update Region Splitting State helpers 332 // ============================================================================================ 333 /** 334 * Splits the region into two in an atomic operation. Offlines the parent region with the 335 * information that it is split into two, and also adds the daughter regions. Does not add the 336 * location information to the daughter regions since they are not open yet. 337 */ 338 public void splitRegion(RegionInfo parent, RegionInfo splitA, RegionInfo splitB, 339 ServerName serverName, TableDescriptor htd) throws IOException { 340 long parentOpenSeqNum = HConstants.NO_SEQNUM; 341 if (htd.hasGlobalReplicationScope()) { 342 parentOpenSeqNum = getOpenSeqNumForParentRegion(parent); 343 } 344 long time = EnvironmentEdgeManager.currentTime(); 345 // Put for parent 346 Put putParent = MetaTableAccessor.makePutFromRegionInfo( 347 RegionInfoBuilder.newBuilder(parent).setOffline(true).setSplit(true).build(), time); 348 MetaTableAccessor.addDaughtersToPut(putParent, splitA, splitB); 349 350 // Puts for daughters 351 Put putA = MetaTableAccessor.makePutFromRegionInfo(splitA, time); 352 Put putB = MetaTableAccessor.makePutFromRegionInfo(splitB, time); 353 if (parentOpenSeqNum > 0) { 354 ReplicationBarrierFamilyFormat.addReplicationBarrier(putParent, parentOpenSeqNum); 355 ReplicationBarrierFamilyFormat.addReplicationParent(putA, Collections.singletonList(parent)); 356 ReplicationBarrierFamilyFormat.addReplicationParent(putB, Collections.singletonList(parent)); 357 } 358 // Set initial state to CLOSED 359 // NOTE: If initial state is not set to CLOSED then daughter regions get added with the 360 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 361 // master tries to assign these offline regions. This is followed by re-assignments of the 362 // daughter regions from resumed {@link SplitTableRegionProcedure} 363 MetaTableAccessor.addRegionStateToPut(putA, RegionState.State.CLOSED); 364 MetaTableAccessor.addRegionStateToPut(putB, RegionState.State.CLOSED); 365 366 // new regions, openSeqNum = 1 is fine. 367 addSequenceNum(putA, 1, splitA.getReplicaId()); 368 addSequenceNum(putB, 1, splitB.getReplicaId()); 369 370 // Add empty locations for region replicas of daughters so that number of replicas can be 371 // cached whenever the primary region is looked up from meta 372 int regionReplication = getRegionReplication(htd); 373 for (int i = 1; i < regionReplication; i++) { 374 MetaTableAccessor.addEmptyLocation(putA, i); 375 MetaTableAccessor.addEmptyLocation(putB, i); 376 } 377 378 multiMutate(parent, Arrays.asList(putParent, putA, putB)); 379 } 380 381 // ============================================================================================ 382 // Update Region Merging State helpers 383 // ============================================================================================ 384 public void mergeRegions(RegionInfo child, RegionInfo[] parents, ServerName serverName, 385 TableDescriptor htd) throws IOException { 386 boolean globalScope = htd.hasGlobalReplicationScope(); 387 long time = HConstants.LATEST_TIMESTAMP; 388 List<Mutation> mutations = new ArrayList<>(); 389 List<RegionInfo> replicationParents = new ArrayList<>(); 390 for (RegionInfo ri : parents) { 391 long seqNum = globalScope ? getOpenSeqNumForParentRegion(ri) : -1; 392 // Deletes for merging regions 393 mutations.add(MetaTableAccessor.makeDeleteFromRegionInfo(ri, time)); 394 if (seqNum > 0) { 395 mutations 396 .add(ReplicationBarrierFamilyFormat.makePutForReplicationBarrier(ri, seqNum, time)); 397 replicationParents.add(ri); 398 } 399 } 400 // Put for parent 401 Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(child, time); 402 putOfMerged = addMergeRegions(putOfMerged, Arrays.asList(parents)); 403 // Set initial state to CLOSED. 404 // NOTE: If initial state is not set to CLOSED then merged region gets added with the 405 // default OFFLINE state. If Master gets restarted after this step, start up sequence of 406 // master tries to assign this offline region. This is followed by re-assignments of the 407 // merged region from resumed {@link MergeTableRegionsProcedure} 408 MetaTableAccessor.addRegionStateToPut(putOfMerged, RegionState.State.CLOSED); 409 mutations.add(putOfMerged); 410 // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null 411 // if crash after merge happened but before we got to here.. means in-memory 412 // locations of offlined merged, now-closed, regions is lost. Should be ok. We 413 // assign the merged region later. 414 if (serverName != null) { 415 MetaTableAccessor.addLocation(putOfMerged, serverName, 1, child.getReplicaId()); 416 } 417 418 // Add empty locations for region replicas of the merged region so that number of replicas 419 // can be cached whenever the primary region is looked up from meta 420 int regionReplication = getRegionReplication(htd); 421 for (int i = 1; i < regionReplication; i++) { 422 MetaTableAccessor.addEmptyLocation(putOfMerged, i); 423 } 424 // add parent reference for serial replication 425 if (!replicationParents.isEmpty()) { 426 ReplicationBarrierFamilyFormat.addReplicationParent(putOfMerged, replicationParents); 427 } 428 multiMutate(child, mutations); 429 } 430 431 /** 432 * Check whether the given {@code region} has any 'info:merge*' columns. 433 */ 434 public boolean hasMergeRegions(RegionInfo region) throws IOException { 435 return CatalogFamilyFormat.hasMergeRegions(getRegionCatalogResult(region).rawCells()); 436 } 437 438 /** 439 * @return Return all regioninfos listed in the 'info:merge*' columns of the given {@code region}. 440 */ 441 public List<RegionInfo> getMergeRegions(RegionInfo region) throws IOException { 442 return CatalogFamilyFormat.getMergeRegions(getRegionCatalogResult(region).rawCells()); 443 } 444 445 /** 446 * Deletes merge qualifiers for the specified merge region. 447 * @param connection connection we're using 448 * @param mergeRegion the merged region 449 */ 450 public void deleteMergeQualifiers(RegionInfo mergeRegion) throws IOException { 451 // NOTE: We are doing a new hbase:meta read here. 452 Cell[] cells = getRegionCatalogResult(mergeRegion).rawCells(); 453 if (cells == null || cells.length == 0) { 454 return; 455 } 456 Delete delete = new Delete(mergeRegion.getRegionName()); 457 List<byte[]> qualifiers = new ArrayList<>(); 458 for (Cell cell : cells) { 459 if (!CatalogFamilyFormat.isMergeQualifierPrefix(cell)) { 460 continue; 461 } 462 byte[] qualifier = CellUtil.cloneQualifier(cell); 463 qualifiers.add(qualifier); 464 delete.addColumns(HConstants.CATALOG_FAMILY, qualifier, HConstants.LATEST_TIMESTAMP); 465 } 466 467 // There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while 468 // the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second 469 // GCMultipleMergedRegionsProcedure could delete the merged region by accident! 470 if (qualifiers.isEmpty()) { 471 LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString() 472 + " in meta table, they are cleaned up already, Skip."); 473 return; 474 } 475 try (Table table = master.getConnection().getTable(TableName.META_TABLE_NAME)) { 476 table.delete(delete); 477 } 478 LOG.info( 479 "Deleted merge references in " + mergeRegion.getRegionNameAsString() + ", deleted qualifiers " 480 + qualifiers.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", "))); 481 } 482 483 static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException { 484 int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more. 485 int max = mergeRegions.size(); 486 if (max > limit) { 487 // Should never happen!!!!! But just in case. 488 throw new RuntimeException( 489 "Can't merge " + max + " regions in one go; " + limit + " is upper-limit."); 490 } 491 int counter = 0; 492 for (RegionInfo ri : mergeRegions) { 493 String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++); 494 put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow()) 495 .setFamily(HConstants.CATALOG_FAMILY).setQualifier(Bytes.toBytes(qualifier)) 496 .setTimestamp(put.getTimestamp()).setType(Type.Put).setValue(RegionInfo.toByteArray(ri)) 497 .build()); 498 } 499 return put; 500 } 501 502 // ============================================================================================ 503 // Delete Region State helpers 504 // ============================================================================================ 505 /** 506 * Deletes the specified region. 507 */ 508 public void deleteRegion(final RegionInfo regionInfo) throws IOException { 509 deleteRegions(Collections.singletonList(regionInfo)); 510 } 511 512 /** 513 * Deletes the specified regions. 514 */ 515 public void deleteRegions(final List<RegionInfo> regions) throws IOException { 516 deleteRegions(regions, EnvironmentEdgeManager.currentTime()); 517 } 518 519 private void deleteRegions(List<RegionInfo> regions, long ts) throws IOException { 520 List<Delete> deletes = new ArrayList<>(regions.size()); 521 for (RegionInfo hri : regions) { 522 Delete e = new Delete(hri.getRegionName()); 523 e.addFamily(HConstants.CATALOG_FAMILY, ts); 524 deletes.add(e); 525 } 526 try (Table table = getMetaTable()) { 527 debugLogMutations(deletes); 528 table.delete(deletes); 529 } 530 LOG.info("Deleted {} regions from META", regions.size()); 531 LOG.debug("Deleted regions: {}", regions); 532 } 533 534 /** 535 * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and 536 * adds new ones. Regions added back have state CLOSED. 537 * @param connection connection we're using 538 * @param regionInfos list of regions to be added to META 539 */ 540 public void overwriteRegions(List<RegionInfo> regionInfos, int regionReplication) 541 throws IOException { 542 // use master time for delete marker and the Put 543 long now = EnvironmentEdgeManager.currentTime(); 544 deleteRegions(regionInfos, now); 545 // Why sleep? This is the easiest way to ensure that the previous deletes does not 546 // eclipse the following puts, that might happen in the same ts from the server. 547 // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed, 548 // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep. 549 // 550 // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed 551 MetaTableAccessor.addRegionsToMeta(master.getConnection(), regionInfos, regionReplication, 552 now + 1); 553 LOG.info("Overwritten " + regionInfos.size() + " regions to Meta"); 554 LOG.debug("Overwritten regions: {} ", regionInfos); 555 } 556 557 private Scan getScanForUpdateRegionReplicas(TableName tableName) { 558 Scan scan; 559 if (TableName.isMetaTableName(tableName)) { 560 // Notice that, we do not use MetaCellComparator for master local region, so we can not use 561 // the same logic to set start key and end key for scanning meta table when locating entries 562 // in master local region. And since there is only one table in master local region(the record 563 // for meta table), so we do not need set start key and end key. 564 scan = new Scan(); 565 } else { 566 scan = MetaTableAccessor.getScanForTableName(master.getConfiguration(), tableName); 567 } 568 return scan.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER); 569 } 570 571 private List<Delete> deleteRegionReplicas(ResultScanner scanner, int oldReplicaCount, 572 int newReplicaCount, long now) throws IOException { 573 List<Delete> deletes = new ArrayList<>(); 574 for (;;) { 575 Result result = scanner.next(); 576 if (result == null) { 577 break; 578 } 579 RegionInfo primaryRegionInfo = CatalogFamilyFormat.getRegionInfo(result); 580 if (primaryRegionInfo == null || primaryRegionInfo.isSplit()) { 581 continue; 582 } 583 Delete delete = new Delete(result.getRow()); 584 for (int i = newReplicaCount; i < oldReplicaCount; i++) { 585 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(i), now); 586 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getSeqNumColumn(i), now); 587 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getStartCodeColumn(i), 588 now); 589 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerNameColumn(i), 590 now); 591 delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getRegionStateColumn(i), 592 now); 593 } 594 deletes.add(delete); 595 } 596 return deletes; 597 } 598 599 public void removeRegionReplicas(TableName tableName, int oldReplicaCount, int newReplicaCount) 600 throws IOException { 601 Scan scan = getScanForUpdateRegionReplicas(tableName); 602 long now = EnvironmentEdgeManager.currentTime(); 603 if (TableName.isMetaTableName(tableName)) { 604 List<Delete> deletes; 605 try (ResultScanner scanner = masterRegion.getScanner(scan)) { 606 deletes = deleteRegionReplicas(scanner, oldReplicaCount, newReplicaCount, now); 607 } 608 debugLogMutations(deletes); 609 masterRegion.update(r -> { 610 for (Delete d : deletes) { 611 r.delete(d); 612 } 613 }); 614 // also delete the mirrored location on zk 615 removeMirrorMetaLocation(oldReplicaCount, newReplicaCount); 616 } else { 617 try (Table metaTable = getMetaTable(); ResultScanner scanner = metaTable.getScanner(scan)) { 618 List<Delete> deletes = deleteRegionReplicas(scanner, oldReplicaCount, newReplicaCount, now); 619 debugLogMutations(deletes); 620 metaTable.delete(deletes); 621 } 622 } 623 } 624 625 // ========================================================================== 626 // Table Descriptors helpers 627 // ========================================================================== 628 private boolean hasGlobalReplicationScope(TableName tableName) throws IOException { 629 return hasGlobalReplicationScope(getDescriptor(tableName)); 630 } 631 632 private boolean hasGlobalReplicationScope(TableDescriptor htd) { 633 return htd != null ? htd.hasGlobalReplicationScope() : false; 634 } 635 636 private int getRegionReplication(TableDescriptor htd) { 637 return htd != null ? htd.getRegionReplication() : 1; 638 } 639 640 private TableDescriptor getDescriptor(TableName tableName) throws IOException { 641 return master.getTableDescriptors().get(tableName); 642 } 643 644 // ========================================================================== 645 // Region State 646 // ========================================================================== 647 648 /** 649 * Pull the region state from a catalog table {@link Result}. 650 * @return the region state, or null if unknown. 651 */ 652 public static State getRegionState(final Result r, RegionInfo regionInfo) { 653 Cell cell = 654 r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getStateColumn(regionInfo.getReplicaId())); 655 if (cell == null || cell.getValueLength() == 0) { 656 return null; 657 } 658 659 String state = 660 Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()); 661 try { 662 return State.valueOf(state); 663 } catch (IllegalArgumentException e) { 664 LOG.warn( 665 "BAD value {} in hbase:meta info:state column for region {} , " 666 + "Consider using HBCK2 setRegionState ENCODED_REGION_NAME STATE", 667 state, regionInfo.getEncodedName()); 668 return null; 669 } 670 } 671 672 public static byte[] getStateColumn(int replicaId) { 673 return replicaId == 0 674 ? HConstants.STATE_QUALIFIER 675 : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER 676 + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId)); 677 } 678 679 private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException { 680 if (!METALOG.isDebugEnabled()) { 681 return; 682 } 683 // Logging each mutation in separate line makes it easier to see diff between them visually 684 // because of common starting indentation. 685 for (Mutation mutation : mutations) { 686 debugLogMutation(mutation); 687 } 688 } 689 690 private static void debugLogMutation(Mutation p) throws IOException { 691 METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON()); 692 } 693}