View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.master.snapshot;
21  
22  import java.io.IOException;
23  import java.util.LinkedList;
24  import java.util.List;
25  import java.util.concurrent.CancellationException;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.TableName;
35  import org.apache.hadoop.hbase.MetaTableAccessor;
36  import org.apache.hadoop.hbase.client.Connection;
37  import org.apache.hadoop.hbase.errorhandling.ForeignException;
38  import org.apache.hadoop.hbase.errorhandling.ForeignExceptionDispatcher;
39  import org.apache.hadoop.hbase.executor.EventType;
40  import org.apache.hadoop.hbase.master.AssignmentManager;
41  import org.apache.hadoop.hbase.master.MasterFileSystem;
42  import org.apache.hadoop.hbase.master.MasterServices;
43  import org.apache.hadoop.hbase.master.MetricsSnapshot;
44  import org.apache.hadoop.hbase.master.RegionStates;
45  import org.apache.hadoop.hbase.master.SnapshotSentinel;
46  import org.apache.hadoop.hbase.master.handler.TableEventHandler;
47  import org.apache.hadoop.hbase.monitoring.MonitoredTask;
48  import org.apache.hadoop.hbase.monitoring.TaskMonitor;
49  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
50  import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
51  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotException;
52  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
53  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
54  import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
55  
56  /**
57   * Handler to Restore a snapshot.
58   *
59   * <p>Uses {@link RestoreSnapshotHelper} to replace the table content with the
60   * data available in the snapshot.
61   */
62  @InterfaceAudience.Private
63  public class RestoreSnapshotHandler extends TableEventHandler implements SnapshotSentinel {
64    private static final Log LOG = LogFactory.getLog(RestoreSnapshotHandler.class);
65  
66    private final HTableDescriptor hTableDescriptor;
67    private final SnapshotDescription snapshot;
68  
69    private final ForeignExceptionDispatcher monitor;
70    private final MetricsSnapshot metricsSnapshot = new MetricsSnapshot();
71    private final MonitoredTask status;
72  
73    private volatile boolean stopped = false;
74  
75    public RestoreSnapshotHandler(final MasterServices masterServices,
76        final SnapshotDescription snapshot, final HTableDescriptor htd) throws IOException {
77      super(EventType.C_M_RESTORE_SNAPSHOT, htd.getTableName(), masterServices, masterServices);
78  
79      // Snapshot information
80      this.snapshot = snapshot;
81  
82      // Monitor
83      this.monitor = new ForeignExceptionDispatcher();
84  
85      // Check table exists.
86      getTableDescriptor();
87  
88      // This is the new schema we are going to write out as this modification.
89      this.hTableDescriptor = htd;
90  
91      this.status = TaskMonitor.get().createStatus(
92        "Restoring  snapshot '" + snapshot.getName() + "' to table "
93            + hTableDescriptor.getTableName());
94    }
95  
96    @Override
97    public RestoreSnapshotHandler prepare() throws IOException {
98      return (RestoreSnapshotHandler) super.prepare();
99    }
100 
101   /**
102    * The restore table is executed in place.
103    *  - The on-disk data will be restored - reference files are put in place without moving data
104    *  -  [if something fail here: you need to delete the table and re-run the restore]
105    *  - hbase:meta will be updated
106    *  -  [if something fail here: you need to run hbck to fix hbase:meta entries]
107    * The passed in list gets changed in this method
108    */
109   @Override
110   protected void handleTableOperation(List<HRegionInfo> hris) throws IOException {
111     MasterFileSystem fileSystemManager = masterServices.getMasterFileSystem();
112     Connection conn = masterServices.getConnection();
113     FileSystem fs = fileSystemManager.getFileSystem();
114     Path rootDir = fileSystemManager.getRootDir();
115     TableName tableName = hTableDescriptor.getTableName();
116 
117     try {
118       // 1. Update descriptor
119       this.masterServices.getTableDescriptors().add(hTableDescriptor);
120 
121       // 2. Execute the on-disk Restore
122       LOG.debug("Starting restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot));
123       Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshot, rootDir);
124       SnapshotManifest manifest = SnapshotManifest.open(masterServices.getConfiguration(), fs,
125                                                         snapshotDir, snapshot);
126       RestoreSnapshotHelper restoreHelper = new RestoreSnapshotHelper(
127           masterServices.getConfiguration(), fs, manifest,
128           this.hTableDescriptor, rootDir, monitor, status);
129       RestoreSnapshotHelper.RestoreMetaChanges metaChanges = restoreHelper.restoreHdfsRegions();
130 
131       // 3. Forces all the RegionStates to be offline
132       //
133       // The AssignmentManager keeps all the region states around
134       // with no possibility to remove them, until the master is restarted.
135       // This means that a region marked as SPLIT before the restore will never be assigned again.
136       // To avoid having all states around all the regions are switched to the OFFLINE state,
137       // which is the same state that the regions will be after a delete table.
138       forceRegionsOffline(metaChanges);
139 
140       // 4. Applies changes to hbase:meta
141       status.setStatus("Preparing to restore each region");
142 
143       // 4.1 Removes the current set of regions from META
144       //
145       // By removing also the regions to restore (the ones present both in the snapshot
146       // and in the current state) we ensure that no extra fields are present in META
147       // e.g. with a simple add addRegionToMeta() the splitA and splitB attributes
148       // not overwritten/removed, so you end up with old informations
149       // that are not correct after the restore.
150       List<HRegionInfo> hrisToRemove = new LinkedList<HRegionInfo>();
151       if (metaChanges.hasRegionsToRemove()) hrisToRemove.addAll(metaChanges.getRegionsToRemove());
152       MetaTableAccessor.deleteRegions(conn, hrisToRemove);
153 
154       // 4.2 Add the new set of regions to META
155       //
156       // At this point the old regions are no longer present in META.
157       // and the set of regions present in the snapshot will be written to META.
158       // All the information in hbase:meta are coming from the .regioninfo of each region present
159       // in the snapshot folder.
160       hris.clear();
161       if (metaChanges.hasRegionsToAdd()) hris.addAll(metaChanges.getRegionsToAdd());
162       MetaTableAccessor.addRegionsToMeta(conn, hris, hTableDescriptor.getRegionReplication());
163       if (metaChanges.hasRegionsToRestore()) {
164         MetaTableAccessor.overwriteRegions(conn, metaChanges.getRegionsToRestore(),
165           hTableDescriptor.getRegionReplication());
166       }
167       metaChanges.updateMetaParentRegions(this.server.getConnection(), hris);
168 
169       // At this point the restore is complete. Next step is enabling the table.
170       LOG.info("Restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot) +
171         " on table=" + tableName + " completed!");
172     } catch (IOException e) {
173       String msg = "restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot)
174           + " failed. Try re-running the restore command.";
175       LOG.error(msg, e);
176       monitor.receive(new ForeignException(masterServices.getServerName().toString(), e));
177       throw new RestoreSnapshotException(msg, e);
178     }
179   }
180 
181   private void forceRegionsOffline(final RestoreSnapshotHelper.RestoreMetaChanges metaChanges) {
182     forceRegionsOffline(metaChanges.getRegionsToAdd());
183     forceRegionsOffline(metaChanges.getRegionsToRestore());
184     forceRegionsOffline(metaChanges.getRegionsToRemove());
185   }
186 
187   private void forceRegionsOffline(final List<HRegionInfo> hris) {
188     AssignmentManager am = this.masterServices.getAssignmentManager();
189     RegionStates states = am.getRegionStates();
190     if (hris != null) {
191       for (HRegionInfo hri: hris) {
192         states.regionOffline(hri);
193       }
194     }
195   }
196 
197   @Override
198   protected void completed(final Throwable exception) {
199     this.stopped = true;
200     if (exception != null) {
201       status.abort("Restore snapshot '" + snapshot.getName() + "' failed because " +
202           exception.getMessage());
203     } else {
204       status.markComplete("Restore snapshot '"+ snapshot.getName() +"'!");
205     }
206     metricsSnapshot.addSnapshotRestore(status.getCompletionTimestamp() - status.getStartTime());
207     super.completed(exception);
208   }
209 
210   @Override
211   public boolean isFinished() {
212     return this.stopped;
213   }
214 
215   @Override
216   public long getCompletionTimestamp() {
217     return this.status.getCompletionTimestamp();
218   }
219 
220   @Override
221   public SnapshotDescription getSnapshot() {
222     return snapshot;
223   }
224 
225   @Override
226   public void cancel(String why) {
227     if (this.stopped) return;
228     this.stopped = true;
229     String msg = "Stopping restore snapshot=" + ClientSnapshotDescriptionUtils.toString(snapshot)
230         + " because: " + why;
231     LOG.info(msg);
232     CancellationException ce = new CancellationException(why);
233     this.monitor.receive(new ForeignException(masterServices.getServerName().toString(), ce));
234   }
235 
236   @Override
237   public ForeignException getExceptionIfFailed() {
238     return this.monitor.getException();
239   }
240 
241   @Override
242   public void rethrowExceptionIfFailed() throws ForeignException {
243     monitor.rethrowException();
244   }
245 }