001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.master.procedure; 020 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.HashMap; 024import java.util.Iterator; 025import java.util.List; 026import java.util.Map; 027 028import org.apache.hadoop.fs.FileSystem; 029import org.apache.hadoop.fs.Path; 030import org.apache.hadoop.hbase.DoNotRetryIOException; 031import org.apache.hadoop.hbase.HBaseIOException; 032import org.apache.hadoop.hbase.MetaTableAccessor; 033import org.apache.hadoop.hbase.TableName; 034import org.apache.hadoop.hbase.TableNotFoundException; 035import org.apache.hadoop.hbase.client.Connection; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.client.RegionReplicaUtil; 038import org.apache.hadoop.hbase.client.TableDescriptor; 039import org.apache.hadoop.hbase.errorhandling.ForeignException; 040import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher; 041import org.apache.hadoop.hbase.favored.FavoredNodesManager; 042import org.apache.hadoop.hbase.master.MasterFileSystem; 043import org.apache.hadoop.hbase.master.MetricsSnapshot; 044import org.apache.hadoop.hbase.master.RegionState; 045import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 046import org.apache.hadoop.hbase.monitoring.MonitoredTask; 047import org.apache.hadoop.hbase.monitoring.TaskMonitor; 048import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 049import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; 050import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; 051import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 052import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 053import org.apache.hadoop.hbase.util.Pair; 054import org.apache.yetus.audience.InterfaceAudience; 055import org.slf4j.Logger; 056import org.slf4j.LoggerFactory; 057import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 058import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; 059import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos; 060import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.RestoreSnapshotState; 061import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 062 063@InterfaceAudience.Private 064public class RestoreSnapshotProcedure 065 extends AbstractStateMachineTableProcedure<RestoreSnapshotState> { 066 private static final Logger LOG = LoggerFactory.getLogger(RestoreSnapshotProcedure.class); 067 068 private TableDescriptor modifiedTableDescriptor; 069 private List<RegionInfo> regionsToRestore = null; 070 private List<RegionInfo> regionsToRemove = null; 071 private List<RegionInfo> regionsToAdd = null; 072 private Map<String, Pair<String, String>> parentsToChildrenPairMap = new HashMap<>(); 073 074 private SnapshotDescription snapshot; 075 private boolean restoreAcl; 076 077 // Monitor 078 private MonitoredTask monitorStatus = null; 079 080 /** 081 * Constructor (for failover) 082 */ 083 public RestoreSnapshotProcedure() { 084 } 085 086 public RestoreSnapshotProcedure(final MasterProcedureEnv env, 087 final TableDescriptor tableDescriptor, final SnapshotDescription snapshot) 088 throws HBaseIOException { 089 this(env, tableDescriptor, snapshot, false); 090 } 091 /** 092 * Constructor 093 * @param env MasterProcedureEnv 094 * @param tableDescriptor the table to operate on 095 * @param snapshot snapshot to restore from 096 * @throws IOException 097 */ 098 public RestoreSnapshotProcedure( 099 final MasterProcedureEnv env, 100 final TableDescriptor tableDescriptor, 101 final SnapshotDescription snapshot, 102 final boolean restoreAcl) 103 throws HBaseIOException { 104 super(env); 105 // This is the new schema we are going to write out as this modification. 106 this.modifiedTableDescriptor = tableDescriptor; 107 preflightChecks(env, null/*Table can be online when restore is called?*/); 108 // Snapshot information 109 this.snapshot = snapshot; 110 this.restoreAcl = restoreAcl; 111 112 // Monitor 113 getMonitorStatus(); 114 } 115 116 /** 117 * Set up monitor status if it is not created. 118 */ 119 private MonitoredTask getMonitorStatus() { 120 if (monitorStatus == null) { 121 monitorStatus = TaskMonitor.get().createStatus("Restoring snapshot '" + snapshot.getName() 122 + "' to table " + getTableName()); 123 } 124 return monitorStatus; 125 } 126 127 @Override 128 protected Flow executeFromState(final MasterProcedureEnv env, final RestoreSnapshotState state) 129 throws InterruptedException { 130 LOG.trace("{} execute state={}", this, state); 131 132 // Make sure that the monitor status is set up 133 getMonitorStatus(); 134 135 try { 136 switch (state) { 137 case RESTORE_SNAPSHOT_PRE_OPERATION: 138 // Verify if we can restore the table 139 prepareRestore(env); 140 setNextState(RestoreSnapshotState.RESTORE_SNAPSHOT_UPDATE_TABLE_DESCRIPTOR); 141 break; 142 case RESTORE_SNAPSHOT_UPDATE_TABLE_DESCRIPTOR: 143 updateTableDescriptor(env); 144 setNextState(RestoreSnapshotState.RESTORE_SNAPSHOT_WRITE_FS_LAYOUT); 145 break; 146 case RESTORE_SNAPSHOT_WRITE_FS_LAYOUT: 147 restoreSnapshot(env); 148 setNextState(RestoreSnapshotState.RESTORE_SNAPSHOT_UPDATE_META); 149 break; 150 case RESTORE_SNAPSHOT_UPDATE_META: 151 updateMETA(env); 152 setNextState(RestoreSnapshotState.RESTORE_SNAPSHOT_RESTORE_ACL); 153 break; 154 case RESTORE_SNAPSHOT_RESTORE_ACL: 155 restoreSnapshotAcl(env); 156 return Flow.NO_MORE_STATE; 157 default: 158 throw new UnsupportedOperationException("unhandled state=" + state); 159 } 160 } catch (IOException e) { 161 if (isRollbackSupported(state)) { 162 setFailure("master-restore-snapshot", e); 163 } else { 164 LOG.warn("Retriable error trying to restore snapshot=" + snapshot.getName() + 165 " to table=" + getTableName() + " (in state=" + state + ")", e); 166 } 167 } 168 return Flow.HAS_MORE_STATE; 169 } 170 171 @Override 172 protected void rollbackState(final MasterProcedureEnv env, final RestoreSnapshotState state) 173 throws IOException { 174 if (state == RestoreSnapshotState.RESTORE_SNAPSHOT_PRE_OPERATION) { 175 // nothing to rollback 176 return; 177 } 178 179 // The restore snapshot doesn't have a rollback. The execution will succeed, at some point. 180 throw new UnsupportedOperationException("unhandled state=" + state); 181 } 182 183 @Override 184 protected boolean isRollbackSupported(final RestoreSnapshotState state) { 185 switch (state) { 186 case RESTORE_SNAPSHOT_PRE_OPERATION: 187 return true; 188 default: 189 return false; 190 } 191 } 192 193 @Override 194 protected RestoreSnapshotState getState(final int stateId) { 195 return RestoreSnapshotState.valueOf(stateId); 196 } 197 198 @Override 199 protected int getStateId(final RestoreSnapshotState state) { 200 return state.getNumber(); 201 } 202 203 @Override 204 protected RestoreSnapshotState getInitialState() { 205 return RestoreSnapshotState.RESTORE_SNAPSHOT_PRE_OPERATION; 206 } 207 208 @Override 209 public TableName getTableName() { 210 return modifiedTableDescriptor.getTableName(); 211 } 212 213 @Override 214 public TableOperationType getTableOperationType() { 215 return TableOperationType.EDIT; // Restore is modifying a table 216 } 217 218 @Override 219 public boolean abort(final MasterProcedureEnv env) { 220 // TODO: We may be able to abort if the procedure is not started yet. 221 return false; 222 } 223 224 @Override 225 public void toStringClassDetails(StringBuilder sb) { 226 sb.append(getClass().getSimpleName()); 227 sb.append(" (table="); 228 sb.append(getTableName()); 229 sb.append(" snapshot="); 230 sb.append(snapshot); 231 sb.append(")"); 232 } 233 234 @Override 235 protected void serializeStateData(ProcedureStateSerializer serializer) 236 throws IOException { 237 super.serializeStateData(serializer); 238 239 MasterProcedureProtos.RestoreSnapshotStateData.Builder restoreSnapshotMsg = 240 MasterProcedureProtos.RestoreSnapshotStateData.newBuilder() 241 .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser())) 242 .setSnapshot(this.snapshot) 243 .setModifiedTableSchema(ProtobufUtil.toTableSchema(modifiedTableDescriptor)); 244 245 if (regionsToRestore != null) { 246 for (RegionInfo hri: regionsToRestore) { 247 restoreSnapshotMsg.addRegionInfoForRestore(ProtobufUtil.toRegionInfo(hri)); 248 } 249 } 250 if (regionsToRemove != null) { 251 for (RegionInfo hri: regionsToRemove) { 252 restoreSnapshotMsg.addRegionInfoForRemove(ProtobufUtil.toRegionInfo(hri)); 253 } 254 } 255 if (regionsToAdd != null) { 256 for (RegionInfo hri: regionsToAdd) { 257 restoreSnapshotMsg.addRegionInfoForAdd(ProtobufUtil.toRegionInfo(hri)); 258 } 259 } 260 if (!parentsToChildrenPairMap.isEmpty()) { 261 final Iterator<Map.Entry<String, Pair<String, String>>> it = 262 parentsToChildrenPairMap.entrySet().iterator(); 263 while (it.hasNext()) { 264 final Map.Entry<String, Pair<String, String>> entry = it.next(); 265 266 MasterProcedureProtos.RestoreParentToChildRegionsPair.Builder parentToChildrenPair = 267 MasterProcedureProtos.RestoreParentToChildRegionsPair.newBuilder() 268 .setParentRegionName(entry.getKey()) 269 .setChild1RegionName(entry.getValue().getFirst()) 270 .setChild2RegionName(entry.getValue().getSecond()); 271 restoreSnapshotMsg.addParentToChildRegionsPairList (parentToChildrenPair); 272 } 273 } 274 serializer.serialize(restoreSnapshotMsg.build()); 275 } 276 277 @Override 278 protected void deserializeStateData(ProcedureStateSerializer serializer) 279 throws IOException { 280 super.deserializeStateData(serializer); 281 282 MasterProcedureProtos.RestoreSnapshotStateData restoreSnapshotMsg = 283 serializer.deserialize(MasterProcedureProtos.RestoreSnapshotStateData.class); 284 setUser(MasterProcedureUtil.toUserInfo(restoreSnapshotMsg.getUserInfo())); 285 snapshot = restoreSnapshotMsg.getSnapshot(); 286 modifiedTableDescriptor = 287 ProtobufUtil.toTableDescriptor(restoreSnapshotMsg.getModifiedTableSchema()); 288 289 if (restoreSnapshotMsg.getRegionInfoForRestoreCount() == 0) { 290 regionsToRestore = null; 291 } else { 292 regionsToRestore = new ArrayList<>(restoreSnapshotMsg.getRegionInfoForRestoreCount()); 293 for (HBaseProtos.RegionInfo hri: restoreSnapshotMsg.getRegionInfoForRestoreList()) { 294 regionsToRestore.add(ProtobufUtil.toRegionInfo(hri)); 295 } 296 } 297 if (restoreSnapshotMsg.getRegionInfoForRemoveCount() == 0) { 298 regionsToRemove = null; 299 } else { 300 regionsToRemove = new ArrayList<>(restoreSnapshotMsg.getRegionInfoForRemoveCount()); 301 for (HBaseProtos.RegionInfo hri: restoreSnapshotMsg.getRegionInfoForRemoveList()) { 302 regionsToRemove.add(ProtobufUtil.toRegionInfo(hri)); 303 } 304 } 305 if (restoreSnapshotMsg.getRegionInfoForAddCount() == 0) { 306 regionsToAdd = null; 307 } else { 308 regionsToAdd = new ArrayList<>(restoreSnapshotMsg.getRegionInfoForAddCount()); 309 for (HBaseProtos.RegionInfo hri: restoreSnapshotMsg.getRegionInfoForAddList()) { 310 regionsToAdd.add(ProtobufUtil.toRegionInfo(hri)); 311 } 312 } 313 if (restoreSnapshotMsg.getParentToChildRegionsPairListCount() > 0) { 314 for (MasterProcedureProtos.RestoreParentToChildRegionsPair parentToChildrenPair: 315 restoreSnapshotMsg.getParentToChildRegionsPairListList()) { 316 parentsToChildrenPairMap.put( 317 parentToChildrenPair.getParentRegionName(), 318 new Pair<>( 319 parentToChildrenPair.getChild1RegionName(), 320 parentToChildrenPair.getChild2RegionName())); 321 } 322 } 323 } 324 325 /** 326 * Action before any real action of restoring from snapshot. 327 * @param env MasterProcedureEnv 328 * @throws IOException 329 */ 330 private void prepareRestore(final MasterProcedureEnv env) throws IOException { 331 final TableName tableName = getTableName(); 332 // Checks whether the table exists 333 if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) { 334 throw new TableNotFoundException(tableName); 335 } 336 337 // Check whether table is disabled. 338 env.getMasterServices().checkTableModifiable(tableName); 339 340 // Check that we have at least 1 CF 341 if (modifiedTableDescriptor.getColumnFamilyCount() == 0) { 342 throw new DoNotRetryIOException("Table " + getTableName().toString() + 343 " should have at least one column family."); 344 } 345 346 if (!getTableName().isSystemTable()) { 347 // Table already exist. Check and update the region quota for this table namespace. 348 final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); 349 SnapshotManifest manifest = SnapshotManifest.open( 350 env.getMasterConfiguration(), 351 mfs.getFileSystem(), 352 SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, mfs.getRootDir()), 353 snapshot); 354 int snapshotRegionCount = manifest.getRegionManifestsMap().size(); 355 int tableRegionCount = 356 ProcedureSyncWait.getMasterQuotaManager(env).getRegionCountOfTable(tableName); 357 358 if (snapshotRegionCount > 0 && tableRegionCount != snapshotRegionCount) { 359 ProcedureSyncWait.getMasterQuotaManager(env).checkAndUpdateNamespaceRegionQuota( 360 tableName, snapshotRegionCount); 361 } 362 } 363 } 364 365 /** 366 * Update descriptor 367 * @param env MasterProcedureEnv 368 * @throws IOException 369 **/ 370 private void updateTableDescriptor(final MasterProcedureEnv env) throws IOException { 371 env.getMasterServices().getTableDescriptors().add(modifiedTableDescriptor); 372 } 373 374 /** 375 * Execute the on-disk Restore 376 * @param env MasterProcedureEnv 377 * @throws IOException 378 **/ 379 private void restoreSnapshot(final MasterProcedureEnv env) throws IOException { 380 MasterFileSystem fileSystemManager = env.getMasterServices().getMasterFileSystem(); 381 FileSystem fs = fileSystemManager.getFileSystem(); 382 Path rootDir = fileSystemManager.getRootDir(); 383 final ForeignExceptionDispatcher monitorException = new ForeignExceptionDispatcher(); 384 385 LOG.info("Starting restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot)); 386 try { 387 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir); 388 SnapshotManifest manifest = SnapshotManifest.open( 389 env.getMasterServices().getConfiguration(), fs, snapshotDir, snapshot); 390 RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper( 391 env.getMasterServices().getConfiguration(), 392 fs, 393 manifest, 394 modifiedTableDescriptor, 395 rootDir, 396 monitorException, 397 getMonitorStatus()); 398 399 RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions(); 400 regionsToRestore = metaChanges.getRegionsToRestore(); 401 regionsToRemove = metaChanges.getRegionsToRemove(); 402 regionsToAdd = metaChanges.getRegionsToAdd(); 403 parentsToChildrenPairMap = metaChanges.getParentToChildrenPairMap(); 404 } catch (IOException e) { 405 String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) 406 + " failed in on-disk restore. Try re-running the restore command."; 407 LOG.error(msg, e); 408 monitorException.receive( 409 new ForeignException(env.getMasterServices().getServerName().toString(), e)); 410 throw new IOException(msg, e); 411 } 412 } 413 414 /** 415 * Apply changes to hbase:meta 416 * @param env MasterProcedureEnv 417 * @throws IOException 418 **/ 419 private void updateMETA(final MasterProcedureEnv env) throws IOException { 420 try { 421 Connection conn = env.getMasterServices().getConnection(); 422 int regionReplication = modifiedTableDescriptor.getRegionReplication(); 423 424 // 1. Prepare to restore 425 getMonitorStatus().setStatus("Preparing to restore each region"); 426 427 // 2. Applies changes to hbase:meta and in-memory states 428 // (2.1). Removes the current set of regions from META and in-memory states 429 // 430 // By removing also the regions to restore (the ones present both in the snapshot 431 // and in the current state) we ensure that no extra fields are present in META 432 // e.g. with a simple add addRegionToMeta() the splitA and splitB attributes 433 // not overwritten/removed, so you end up with old informations 434 // that are not correct after the restore. 435 if (regionsToRemove != null) { 436 MetaTableAccessor.deleteRegionInfos(conn, regionsToRemove); 437 deleteRegionsFromInMemoryStates(regionsToRemove, env, regionReplication); 438 } 439 440 // (2.2). Add the new set of regions to META and in-memory states 441 // 442 // At this point the old regions are no longer present in META. 443 // and the set of regions present in the snapshot will be written to META. 444 // All the information in hbase:meta are coming from the .regioninfo of each region present 445 // in the snapshot folder. 446 if (regionsToAdd != null) { 447 MetaTableAccessor.addRegionsToMeta(conn, regionsToAdd, regionReplication); 448 addRegionsToInMemoryStates(regionsToAdd, env, regionReplication); 449 } 450 451 if (regionsToRestore != null) { 452 MetaTableAccessor.overwriteRegions(conn, regionsToRestore, regionReplication); 453 454 deleteRegionsFromInMemoryStates(regionsToRestore, env, regionReplication); 455 addRegionsToInMemoryStates(regionsToRestore, env, regionReplication); 456 } 457 458 RestoreSnapshotHelper.RestoreMetaChanges metaChanges = 459 new RestoreSnapshotHelper.RestoreMetaChanges( 460 modifiedTableDescriptor, parentsToChildrenPairMap); 461 metaChanges.updateMetaParentRegions(conn, regionsToAdd); 462 463 // At this point the restore is complete. 464 LOG.info("Restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) + 465 " on table=" + getTableName() + " completed!"); 466 } catch (IOException e) { 467 final ForeignExceptionDispatcher monitorException = new ForeignExceptionDispatcher(); 468 String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) 469 + " failed in meta update. Try re-running the restore command."; 470 LOG.error(msg, e); 471 monitorException.receive( 472 new ForeignException(env.getMasterServices().getServerName().toString(), e)); 473 throw new IOException(msg, e); 474 } 475 476 monitorStatus.markComplete("Restore snapshot '"+ snapshot.getName() +"'!"); 477 MetricsSnapshot metricsSnapshot = new MetricsSnapshot(); 478 metricsSnapshot.addSnapshotRestore( 479 monitorStatus.getCompletionTimestamp() - monitorStatus.getStartTime()); 480 } 481 482 /** 483 * Delete regions from in-memory states 484 * @param regionInfos regions to delete 485 * @param env MasterProcedureEnv 486 * @param regionReplication the number of region replications 487 */ 488 private void deleteRegionsFromInMemoryStates(List<RegionInfo> regionInfos, 489 MasterProcedureEnv env, int regionReplication) { 490 FavoredNodesManager fnm = env.getMasterServices().getFavoredNodesManager(); 491 492 env.getAssignmentManager().getRegionStates().deleteRegions(regionInfos); 493 env.getMasterServices().getServerManager().removeRegions(regionInfos); 494 if (fnm != null) { 495 fnm.deleteFavoredNodesForRegions(regionInfos); 496 } 497 498 // For region replicas 499 if (regionReplication > 1) { 500 for (RegionInfo regionInfo : regionInfos) { 501 for (int i = 1; i < regionReplication; i++) { 502 RegionInfo regionInfoForReplica = 503 RegionReplicaUtil.getRegionInfoForReplica(regionInfo, i); 504 env.getAssignmentManager().getRegionStates().deleteRegion(regionInfoForReplica); 505 env.getMasterServices().getServerManager().removeRegion(regionInfoForReplica); 506 if (fnm != null) { 507 fnm.deleteFavoredNodesForRegion(regionInfoForReplica); 508 } 509 } 510 } 511 } 512 } 513 514 /** 515 * Add regions to in-memory states 516 * @param regionInfos regions to add 517 * @param env MasterProcedureEnv 518 * @param regionReplication the number of region replications 519 */ 520 private void addRegionsToInMemoryStates(List<RegionInfo> regionInfos, MasterProcedureEnv env, 521 int regionReplication) { 522 AssignmentManager am = env.getAssignmentManager(); 523 for (RegionInfo regionInfo : regionInfos) { 524 if (regionInfo.isSplit()) { 525 am.getRegionStates().updateRegionState(regionInfo, RegionState.State.SPLIT); 526 } else { 527 am.getRegionStates().updateRegionState(regionInfo, RegionState.State.CLOSED); 528 529 // For region replicas 530 for (int i = 1; i < regionReplication; i++) { 531 RegionInfo regionInfoForReplica = 532 RegionReplicaUtil.getRegionInfoForReplica(regionInfo, i); 533 am.getRegionStates().updateRegionState(regionInfoForReplica, RegionState.State.CLOSED); 534 } 535 } 536 } 537 } 538 539 private void restoreSnapshotAcl(final MasterProcedureEnv env) throws IOException { 540 if (restoreAcl && snapshot.hasUsersAndPermissions() && snapshot.getUsersAndPermissions() != null 541 && SnapshotDescriptionUtils 542 .isSecurityAvailable(env.getMasterServices().getConfiguration())) { 543 // restore acl of snapshot to table. 544 RestoreSnapshotHelper.restoreSnapshotAcl(snapshot, TableName.valueOf(snapshot.getTable()), 545 env.getMasterServices().getConfiguration()); 546 } 547 } 548}