001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.snapshot; 019 020import java.io.IOException; 021import java.util.HashSet; 022import java.util.List; 023import java.util.Set; 024 025import org.apache.hadoop.hbase.ServerName; 026import org.apache.hadoop.hbase.client.RegionInfo; 027import org.apache.hadoop.hbase.client.RegionReplicaUtil; 028import org.apache.hadoop.hbase.errorhandling.ForeignException; 029import org.apache.hadoop.hbase.master.MasterServices; 030import org.apache.hadoop.hbase.mob.MobUtils; 031import org.apache.hadoop.hbase.procedure.Procedure; 032import org.apache.hadoop.hbase.procedure.ProcedureCoordinator; 033import org.apache.hadoop.hbase.snapshot.HBaseSnapshotException; 034import org.apache.hadoop.hbase.util.Pair; 035import org.apache.yetus.audience.InterfaceAudience; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 039import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 040 041/** 042 * Handle the master side of taking a snapshot of an online table, regardless of snapshot type. 043 * Uses a {@link Procedure} to run the snapshot across all the involved region servers. 044 * @see ProcedureCoordinator 045 */ 046@InterfaceAudience.Private 047public class EnabledTableSnapshotHandler extends TakeSnapshotHandler { 048 049 private static final Logger LOG = LoggerFactory.getLogger(EnabledTableSnapshotHandler.class); 050 private final ProcedureCoordinator coordinator; 051 052 public EnabledTableSnapshotHandler(SnapshotDescription snapshot, MasterServices master, 053 final SnapshotManager manager) { 054 super(snapshot, master, manager); 055 this.coordinator = manager.getCoordinator(); 056 } 057 058 @Override 059 public EnabledTableSnapshotHandler prepare() throws Exception { 060 return (EnabledTableSnapshotHandler) super.prepare(); 061 } 062 063 // TODO consider switching over to using regionnames, rather than server names. This would allow 064 // regions to migrate during a snapshot, and then be involved when they are ready. Still want to 065 // enforce a snapshot time constraints, but lets us be potentially a bit more robust. 066 067 /** 068 * This method kicks off a snapshot procedure. Other than that it hangs around for various 069 * phases to complete. 070 */ 071 @Override 072 protected void snapshotRegions(List<Pair<RegionInfo, ServerName>> regions) 073 throws HBaseSnapshotException, IOException { 074 Set<String> regionServers = new HashSet<>(regions.size()); 075 for (Pair<RegionInfo, ServerName> region : regions) { 076 if (region != null && region.getFirst() != null && region.getSecond() != null) { 077 RegionInfo hri = region.getFirst(); 078 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) continue; 079 regionServers.add(region.getSecond().toString()); 080 } 081 } 082 083 // start the snapshot on the RS 084 Procedure proc = coordinator.startProcedure(this.monitor, this.snapshot.getName(), 085 this.snapshot.toByteArray(), Lists.newArrayList(regionServers)); 086 if (proc == null) { 087 String msg = "Failed to submit distributed procedure for snapshot '" 088 + snapshot.getName() + "'"; 089 LOG.error(msg); 090 throw new HBaseSnapshotException(msg); 091 } 092 093 try { 094 // wait for the snapshot to complete. A timer thread is kicked off that should cancel this 095 // if it takes too long. 096 proc.waitForCompleted(); 097 LOG.info("Done waiting - online snapshot for " + this.snapshot.getName()); 098 099 // Take the offline regions as disabled 100 for (Pair<RegionInfo, ServerName> region : regions) { 101 RegionInfo regionInfo = region.getFirst(); 102 if (regionInfo.isOffline() && (regionInfo.isSplit() || regionInfo.isSplitParent()) && 103 RegionReplicaUtil.isDefaultReplica(regionInfo)) { 104 LOG.info("Take disabled snapshot of offline region=" + regionInfo); 105 snapshotDisabledRegion(regionInfo); 106 } 107 } 108 // handle the mob files if any. 109 boolean mobEnabled = MobUtils.hasMobColumns(htd); 110 if (mobEnabled) { 111 LOG.info("Taking snapshot for mob files in table " + htd.getTableName()); 112 // snapshot the mob files as a offline region. 113 RegionInfo mobRegionInfo = MobUtils.getMobRegionInfo(htd.getTableName()); 114 snapshotMobRegion(mobRegionInfo); 115 } 116 } catch (InterruptedException e) { 117 ForeignException ee = 118 new ForeignException("Interrupted while waiting for snapshot to finish", e); 119 monitor.receive(ee); 120 Thread.currentThread().interrupt(); 121 } catch (ForeignException e) { 122 monitor.receive(e); 123 } 124 } 125 126 /** 127 * Takes a snapshot of the mob region 128 */ 129 private void snapshotMobRegion(final RegionInfo regionInfo) 130 throws IOException { 131 snapshotManifest.addMobRegion(regionInfo); 132 monitor.rethrowException(); 133 status.setStatus("Completed referencing HFiles for the mob region of table: " + snapshotTable); 134 } 135 136 @Override 137 protected boolean downgradeToSharedTableLock() { 138 // return true here to change from exclusive lock to shared lock, so we can still assign regions 139 // while taking snapshots. This is important, as region server crash can happen at any time, if 140 // we can not assign regions then the cluster will be in trouble as the regions can not online. 141 return true; 142 } 143}