001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import com.google.errorprone.annotations.RestrictedApi; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.HashMap; 024import java.util.Iterator; 025import java.util.List; 026import java.util.Map; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileSystem; 029import org.apache.hadoop.fs.Path; 030import org.apache.hadoop.hbase.DoNotRetryIOException; 031import org.apache.hadoop.hbase.HBaseIOException; 032import org.apache.hadoop.hbase.MetaTableAccessor; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.TableNotFoundException; 035import org.apache.hadoop.hbase.client.Connection; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.client.RegionReplicaUtil; 038import org.apache.hadoop.hbase.client.TableDescriptor; 039import org.apache.hadoop.hbase.errorhandling.ForeignException; 040import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; 041import org.apache.hadoop.hbase.favored.FavoredNodesManager; 042import org.apache.hadoop.hbase.master.MasterFileSystem; 043import org.apache.hadoop.hbase.master.MetricsSnapshot; 044import org.apache.hadoop.hbase.master.RegionState; 045import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 046import org.apache.hadoop.hbase.monitoring.MonitoredTask; 047import org.apache.hadoop.hbase.monitoring.TaskMonitor; 048import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 049import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; 050import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; 051import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 052import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 053import org.apache.hadoop.hbase.util.Pair; 054import org.apache.yetus.audience.InterfaceAudience; 055import org.slf4j.Logger; 056import org.slf4j.LoggerFactory; 057 058import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 059import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; 060import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RestoreParentToChildRegionsPair; 061import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RestoreSnapshotState; 062import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RestoreSnapshotStateData; 063import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 064 065@InterfaceAudience.Private 066public class RestoreSnapshotProcedure 067 extends AbstractStateMachineTableProcedure<RestoreSnapshotState> { 068 private static final Logger LOG = LoggerFactory.getLogger(RestoreSnapshotProcedure.class); 069 070 private TableDescriptor modifiedTableDescriptor; 071 private List<RegionInfo> regionsToRestore = null; 072 private List<RegionInfo> regionsToRemove = null; 073 private List<RegionInfo> regionsToAdd = null; 074 private Map<String, Pair<String, String>> parentsToChildrenPairMap = new HashMap<>(); 075 076 private SnapshotDescription snapshot; 077 private boolean restoreAcl; 078 079 // Monitor 080 private MonitoredTask monitorStatus = null; 081 082 /** 083 * Constructor (for failover) 084 */ 085 public RestoreSnapshotProcedure() { 086 } 087 088 public RestoreSnapshotProcedure(final MasterProcedureEnv env, 089 final TableDescriptor tableDescriptor, final SnapshotDescription snapshot) 090 throws HBaseIOException { 091 this(env, tableDescriptor, snapshot, false); 092 } 093 094 /** 095 * Constructor 096 * @param env MasterProcedureEnv 097 * @param tableDescriptor the table to operate on 098 * @param snapshot snapshot to restore from n 099 */ 100 public RestoreSnapshotProcedure(final MasterProcedureEnv env, 101 final TableDescriptor tableDescriptor, final SnapshotDescription snapshot, 102 final boolean restoreAcl) throws HBaseIOException { 103 super(env); 104 // This is the new schema we are going to write out as this modification. 105 this.modifiedTableDescriptor = tableDescriptor; 106 preflightChecks(env, null/* Table can be online when restore is called? */); 107 // Snapshot information 108 this.snapshot = snapshot; 109 this.restoreAcl = restoreAcl; 110 111 // Monitor 112 getMonitorStatus(); 113 } 114 115 /** 116 * Set up monitor status if it is not created. 117 */ 118 private MonitoredTask getMonitorStatus() { 119 if (monitorStatus == null) { 120 monitorStatus = TaskMonitor.get().createStatus( 121 "Restoring snapshot '" + snapshot.getName() + "' to table " + getTableName()); 122 } 123 return monitorStatus; 124 } 125 126 @Override 127 protected Flow executeFromState(final MasterProcedureEnv env, final RestoreSnapshotState state) 128 throws InterruptedException { 129 LOG.trace("{} execute state={}", this, state); 130 131 // Make sure that the monitor status is set up 132 getMonitorStatus(); 133 134 try { 135 switch (state) { 136 case RESTORE_SNAPSHOT_PRE_OPERATION: 137 // Verify if we can restore the table 138 prepareRestore(env); 139 setNextState(RestoreSnapshotState.RESTORE_SNAPSHOT_UPDATE_TABLE_DESCRIPTOR); 140 break; 141 case RESTORE_SNAPSHOT_UPDATE_TABLE_DESCRIPTOR: 142 updateTableDescriptor(env); 143 setNextState(RestoreSnapshotState.RESTORE_SNAPSHOT_WRITE_FS_LAYOUT); 144 break; 145 case RESTORE_SNAPSHOT_WRITE_FS_LAYOUT: 146 restoreSnapshot(env); 147 setNextState(RestoreSnapshotState.RESTORE_SNAPSHOT_UPDATE_META); 148 break; 149 case RESTORE_SNAPSHOT_UPDATE_META: 150 updateMETA(env); 151 setNextState(RestoreSnapshotState.RESTORE_SNAPSHOT_RESTORE_ACL); 152 break; 153 case RESTORE_SNAPSHOT_RESTORE_ACL: 154 restoreSnapshotAcl(env); 155 return Flow.NO_MORE_STATE; 156 default: 157 throw new UnsupportedOperationException("unhandled state=" + state); 158 } 159 } catch (IOException e) { 160 if (isRollbackSupported(state)) { 161 setFailure("master-restore-snapshot", e); 162 } else { 163 LOG.warn("Retriable error trying to restore snapshot=" + snapshot.getName() + " to table=" 164 + getTableName() + " (in state=" + state + ")", e); 165 } 166 } 167 return Flow.HAS_MORE_STATE; 168 } 169 170 @Override 171 protected void rollbackState(final MasterProcedureEnv env, final RestoreSnapshotState state) 172 throws IOException { 173 if (state == RestoreSnapshotState.RESTORE_SNAPSHOT_PRE_OPERATION) { 174 // nothing to rollback 175 return; 176 } 177 178 // The restore snapshot doesn't have a rollback. The execution will succeed, at some point. 179 throw new UnsupportedOperationException("unhandled state=" + state); 180 } 181 182 @Override 183 protected boolean isRollbackSupported(final RestoreSnapshotState state) { 184 switch (state) { 185 case RESTORE_SNAPSHOT_PRE_OPERATION: 186 return true; 187 default: 188 return false; 189 } 190 } 191 192 @Override 193 protected RestoreSnapshotState getState(final int stateId) { 194 return RestoreSnapshotState.valueOf(stateId); 195 } 196 197 @Override 198 protected int getStateId(final RestoreSnapshotState state) { 199 return state.getNumber(); 200 } 201 202 @Override 203 protected RestoreSnapshotState getInitialState() { 204 return RestoreSnapshotState.RESTORE_SNAPSHOT_PRE_OPERATION; 205 } 206 207 @Override 208 public TableName getTableName() { 209 return modifiedTableDescriptor.getTableName(); 210 } 211 212 @Override 213 public TableOperationType getTableOperationType() { 214 return TableOperationType.EDIT; // Restore is modifying a table 215 } 216 217 @Override 218 public boolean abort(final MasterProcedureEnv env) { 219 // TODO: We may be able to abort if the procedure is not started yet. 220 return false; 221 } 222 223 @Override 224 public void toStringClassDetails(StringBuilder sb) { 225 sb.append(getClass().getSimpleName()); 226 sb.append(" (table="); 227 sb.append(getTableName()); 228 sb.append(" snapshot="); 229 sb.append(snapshot); 230 sb.append(")"); 231 } 232 233 @Override 234 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 235 super.serializeStateData(serializer); 236 237 RestoreSnapshotStateData.Builder restoreSnapshotMsg = RestoreSnapshotStateData.newBuilder() 238 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())).setSnapshot(this.snapshot) 239 .setModifiedTableSchema(ProtobufUtil.toTableSchema(modifiedTableDescriptor)); 240 241 if (regionsToRestore != null) { 242 for (RegionInfo hri : regionsToRestore) { 243 restoreSnapshotMsg.addRegionInfoForRestore(ProtobufUtil.toRegionInfo(hri)); 244 } 245 } 246 if (regionsToRemove != null) { 247 for (RegionInfo hri : regionsToRemove) { 248 restoreSnapshotMsg.addRegionInfoForRemove(ProtobufUtil.toRegionInfo(hri)); 249 } 250 } 251 if (regionsToAdd != null) { 252 for (RegionInfo hri : regionsToAdd) { 253 restoreSnapshotMsg.addRegionInfoForAdd(ProtobufUtil.toRegionInfo(hri)); 254 } 255 } 256 if (!parentsToChildrenPairMap.isEmpty()) { 257 final Iterator<Map.Entry<String, Pair<String, String>>> it = 258 parentsToChildrenPairMap.entrySet().iterator(); 259 while (it.hasNext()) { 260 final Map.Entry<String, Pair<String, String>> entry = it.next(); 261 262 RestoreParentToChildRegionsPair.Builder parentToChildrenPair = 263 RestoreParentToChildRegionsPair.newBuilder().setParentRegionName(entry.getKey()) 264 .setChild1RegionName(entry.getValue().getFirst()) 265 .setChild2RegionName(entry.getValue().getSecond()); 266 restoreSnapshotMsg.addParentToChildRegionsPairList(parentToChildrenPair); 267 } 268 } 269 restoreSnapshotMsg.setRestoreAcl(restoreAcl); 270 serializer.serialize(restoreSnapshotMsg.build()); 271 } 272 273 @Override 274 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 275 super.deserializeStateData(serializer); 276 277 RestoreSnapshotStateData restoreSnapshotMsg = 278 serializer.deserialize(RestoreSnapshotStateData.class); 279 setUser(MasterProcedureUtil.toUserInfo(restoreSnapshotMsg.getUserInfo())); 280 snapshot = restoreSnapshotMsg.getSnapshot(); 281 modifiedTableDescriptor = 282 ProtobufUtil.toTableDescriptor(restoreSnapshotMsg.getModifiedTableSchema()); 283 284 if (restoreSnapshotMsg.getRegionInfoForRestoreCount() == 0) { 285 regionsToRestore = null; 286 } else { 287 regionsToRestore = new ArrayList<>(restoreSnapshotMsg.getRegionInfoForRestoreCount()); 288 for (HBaseProtos.RegionInfo hri : restoreSnapshotMsg.getRegionInfoForRestoreList()) { 289 regionsToRestore.add(ProtobufUtil.toRegionInfo(hri)); 290 } 291 } 292 if (restoreSnapshotMsg.getRegionInfoForRemoveCount() == 0) { 293 regionsToRemove = null; 294 } else { 295 regionsToRemove = new ArrayList<>(restoreSnapshotMsg.getRegionInfoForRemoveCount()); 296 for (HBaseProtos.RegionInfo hri : restoreSnapshotMsg.getRegionInfoForRemoveList()) { 297 regionsToRemove.add(ProtobufUtil.toRegionInfo(hri)); 298 } 299 } 300 if (restoreSnapshotMsg.getRegionInfoForAddCount() == 0) { 301 regionsToAdd = null; 302 } else { 303 regionsToAdd = new ArrayList<>(restoreSnapshotMsg.getRegionInfoForAddCount()); 304 for (HBaseProtos.RegionInfo hri : restoreSnapshotMsg.getRegionInfoForAddList()) { 305 regionsToAdd.add(ProtobufUtil.toRegionInfo(hri)); 306 } 307 } 308 if (restoreSnapshotMsg.getParentToChildRegionsPairListCount() > 0) { 309 for (RestoreParentToChildRegionsPair parentToChildrenPair : restoreSnapshotMsg 310 .getParentToChildRegionsPairListList()) { 311 parentsToChildrenPairMap.put(parentToChildrenPair.getParentRegionName(), new Pair<>( 312 parentToChildrenPair.getChild1RegionName(), parentToChildrenPair.getChild2RegionName())); 313 } 314 } 315 if (restoreSnapshotMsg.hasRestoreAcl()) { 316 restoreAcl = restoreSnapshotMsg.getRestoreAcl(); 317 } 318 } 319 320 /** 321 * Action before any real action of restoring from snapshot. 322 * @param env MasterProcedureEnv n 323 */ 324 private void prepareRestore(final MasterProcedureEnv env) throws IOException { 325 final TableName tableName = getTableName(); 326 // Checks whether the table exists 327 if (!env.getMasterServices().getTableDescriptors().exists(tableName)) { 328 throw new TableNotFoundException(tableName); 329 } 330 331 // Check whether table is disabled. 332 env.getMasterServices().checkTableModifiable(tableName); 333 334 // Check that we have at least 1 CF 335 if (modifiedTableDescriptor.getColumnFamilyCount() == 0) { 336 throw new DoNotRetryIOException( 337 "Table " + getTableName().toString() + " should have at least one column family."); 338 } 339 340 if (!getTableName().isSystemTable()) { 341 // Table already exist. Check and update the region quota for this table namespace. 342 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 343 SnapshotManifest manifest = 344 SnapshotManifest.open(env.getMasterConfiguration(), mfs.getFileSystem(), 345 SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, mfs.getRootDir()), snapshot); 346 int snapshotRegionCount = manifest.getRegionManifestsMap().size(); 347 int tableRegionCount = 348 ProcedureSyncWait.getMasterQuotaManager(env).getRegionCountOfTable(tableName); 349 350 if (snapshotRegionCount > 0 && tableRegionCount != snapshotRegionCount) { 351 ProcedureSyncWait.getMasterQuotaManager(env).checkAndUpdateNamespaceRegionQuota(tableName, 352 snapshotRegionCount); 353 } 354 } 355 } 356 357 /** 358 * Update descriptor 359 * @param env MasterProcedureEnv n 360 **/ 361 private void updateTableDescriptor(final MasterProcedureEnv env) throws IOException { 362 env.getMasterServices().getTableDescriptors().update(modifiedTableDescriptor); 363 } 364 365 /** 366 * Execute the on-disk Restore 367 * @param env MasterProcedureEnv n 368 **/ 369 private void restoreSnapshot(final MasterProcedureEnv env) throws IOException { 370 MasterFileSystem fileSystemManager = env.getMasterServices().getMasterFileSystem(); 371 FileSystem fs = fileSystemManager.getFileSystem(); 372 Path rootDir = fileSystemManager.getRootDir(); 373 final ForeignExceptionDispatcher monitorException = new ForeignExceptionDispatcher(); 374 final Configuration conf = new Configuration(env.getMasterConfiguration()); 375 376 LOG.info("Starting restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot)); 377 try { 378 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); 379 SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshot); 380 RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper(conf, fs, manifest, 381 modifiedTableDescriptor, rootDir, monitorException, getMonitorStatus()); 382 383 RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions(); 384 regionsToRestore = metaChanges.getRegionsToRestore(); 385 regionsToRemove = metaChanges.getRegionsToRemove(); 386 regionsToAdd = metaChanges.getRegionsToAdd(); 387 parentsToChildrenPairMap = metaChanges.getParentToChildrenPairMap(); 388 } catch (IOException e) { 389 String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) 390 + " failed in on-disk restore. Try re-running the restore command."; 391 LOG.error(msg, e); 392 monitorException 393 .receive(new ForeignException(env.getMasterServices().getServerName().toString(), e)); 394 throw new IOException(msg, e); 395 } 396 } 397 398 /** 399 * Apply changes to hbase:meta 400 * @param env MasterProcedureEnv n 401 **/ 402 private void updateMETA(final MasterProcedureEnv env) throws IOException { 403 try { 404 Connection conn = env.getMasterServices().getConnection(); 405 int regionReplication = modifiedTableDescriptor.getRegionReplication(); 406 407 // 1. Prepare to restore 408 getMonitorStatus().setStatus("Preparing to restore each region"); 409 410 // 2. Applies changes to hbase:meta and in-memory states 411 // (2.1). Removes the current set of regions from META and in-memory states 412 // 413 // By removing also the regions to restore (the ones present both in the snapshot 414 // and in the current state) we ensure that no extra fields are present in META 415 // e.g. with a simple add addRegionToMeta() the splitA and splitB attributes 416 // not overwritten/removed, so you end up with old informations 417 // that are not correct after the restore. 418 if (regionsToRemove != null) { 419 MetaTableAccessor.deleteRegionInfos(conn, regionsToRemove); 420 deleteRegionsFromInMemoryStates(regionsToRemove, env, regionReplication); 421 } 422 423 // (2.2). Add the new set of regions to META and in-memory states 424 // 425 // At this point the old regions are no longer present in META. 426 // and the set of regions present in the snapshot will be written to META. 427 // All the information in hbase:meta are coming from the .regioninfo of each region present 428 // in the snapshot folder. 429 if (regionsToAdd != null) { 430 MetaTableAccessor.addRegionsToMeta(conn, regionsToAdd, regionReplication); 431 addRegionsToInMemoryStates(regionsToAdd, env, regionReplication); 432 } 433 434 if (regionsToRestore != null) { 435 MetaTableAccessor.overwriteRegions(conn, regionsToRestore, regionReplication); 436 437 deleteRegionsFromInMemoryStates(regionsToRestore, env, regionReplication); 438 addRegionsToInMemoryStates(regionsToRestore, env, regionReplication); 439 } 440 441 RestoreSnapshotHelper.RestoreMetaChanges metaChanges = 442 new RestoreSnapshotHelper.RestoreMetaChanges(modifiedTableDescriptor, 443 parentsToChildrenPairMap); 444 metaChanges.updateMetaParentRegions(conn, regionsToAdd); 445 446 // At this point the restore is complete. 447 LOG.info("Restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) 448 + " on table=" + getTableName() + " completed!"); 449 } catch (IOException e) { 450 final ForeignExceptionDispatcher monitorException = new ForeignExceptionDispatcher(); 451 String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) 452 + " failed in meta update. Try re-running the restore command."; 453 LOG.error(msg, e); 454 monitorException 455 .receive(new ForeignException(env.getMasterServices().getServerName().toString(), e)); 456 throw new IOException(msg, e); 457 } 458 459 monitorStatus.markComplete("Restore snapshot '" + snapshot.getName() + "'!"); 460 MetricsSnapshot metricsSnapshot = new MetricsSnapshot(); 461 metricsSnapshot 462 .addSnapshotRestore(monitorStatus.getCompletionTimestamp() - monitorStatus.getStartTime()); 463 } 464 465 /** 466 * Delete regions from in-memory states 467 * @param regionInfos regions to delete 468 * @param env MasterProcedureEnv 469 * @param regionReplication the number of region replications 470 */ 471 private void deleteRegionsFromInMemoryStates(List<RegionInfo> regionInfos, MasterProcedureEnv env, 472 int regionReplication) { 473 FavoredNodesManager fnm = env.getMasterServices().getFavoredNodesManager(); 474 475 env.getAssignmentManager().getRegionStates().deleteRegions(regionInfos); 476 env.getMasterServices().getServerManager().removeRegions(regionInfos); 477 if (fnm != null) { 478 fnm.deleteFavoredNodesForRegions(regionInfos); 479 } 480 481 // For region replicas 482 if (regionReplication > 1) { 483 for (RegionInfo regionInfo : regionInfos) { 484 for (int i = 1; i < regionReplication; i++) { 485 RegionInfo regionInfoForReplica = 486 RegionReplicaUtil.getRegionInfoForReplica(regionInfo, i); 487 env.getAssignmentManager().getRegionStates().deleteRegion(regionInfoForReplica); 488 env.getMasterServices().getServerManager().removeRegion(regionInfoForReplica); 489 if (fnm != null) { 490 fnm.deleteFavoredNodesForRegion(regionInfoForReplica); 491 } 492 } 493 } 494 } 495 } 496 497 /** 498 * Add regions to in-memory states 499 * @param regionInfos regions to add 500 * @param env MasterProcedureEnv 501 * @param regionReplication the number of region replications 502 */ 503 private void addRegionsToInMemoryStates(List<RegionInfo> regionInfos, MasterProcedureEnv env, 504 int regionReplication) { 505 AssignmentManager am = env.getAssignmentManager(); 506 for (RegionInfo regionInfo : regionInfos) { 507 if (regionInfo.isSplit()) { 508 am.getRegionStates().updateRegionState(regionInfo, RegionState.State.SPLIT); 509 } else { 510 am.getRegionStates().updateRegionState(regionInfo, RegionState.State.CLOSED); 511 512 // For region replicas 513 for (int i = 1; i < regionReplication; i++) { 514 RegionInfo regionInfoForReplica = 515 RegionReplicaUtil.getRegionInfoForReplica(regionInfo, i); 516 am.getRegionStates().updateRegionState(regionInfoForReplica, RegionState.State.CLOSED); 517 } 518 } 519 } 520 } 521 522 private void restoreSnapshotAcl(final MasterProcedureEnv env) throws IOException { 523 if ( 524 restoreAcl && snapshot.hasUsersAndPermissions() && snapshot.getUsersAndPermissions() != null 525 && SnapshotDescriptionUtils.isSecurityAvailable(env.getMasterServices().getConfiguration()) 526 ) { 527 // restore acl of snapshot to table. 528 RestoreSnapshotHelper.restoreSnapshotAcl(snapshot, TableName.valueOf(snapshot.getTable()), 529 env.getMasterServices().getConfiguration()); 530 } 531 } 532 533 /** 534 * Exposed for Testing: HBASE-26462 535 */ 536 @RestrictedApi(explanation = "Should only be called in tests", link = "", 537 allowedOnPath = ".*/src/test/.*") 538 public boolean getRestoreAcl() { 539 return restoreAcl; 540 } 541}