View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import java.io.IOException;
21  import java.util.List;
22  import java.util.Set;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.classification.InterfaceAudience;
27  import org.apache.hadoop.classification.InterfaceStability;
28  import org.apache.hadoop.fs.FSDataInputStream;
29  import org.apache.hadoop.fs.FileStatus;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.fs.PathFilter;
33  import org.apache.hadoop.hbase.HConstants;
34  import org.apache.hadoop.hbase.HRegionInfo;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.catalog.MetaReader;
37  import org.apache.hadoop.hbase.io.HFileLink;
38  import org.apache.hadoop.hbase.master.MasterServices;
39  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
40  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription.Type;
41  import org.apache.hadoop.hbase.regionserver.HRegion;
42  import org.apache.hadoop.hbase.regionserver.StoreFile;
43  import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
44  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
45  import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
46  import org.apache.hadoop.hbase.snapshot.TakeSnapshotUtils;
47  import org.apache.hadoop.hbase.util.Bytes;
48  import org.apache.hadoop.hbase.util.FSTableDescriptors;
49  import org.apache.hadoop.hbase.util.FSUtils;
50  import org.apache.hadoop.hbase.util.FSVisitor;
51  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
52  
53  /**
54   * General snapshot verification on the master.
55   * <p>
56   * This is a light-weight verification mechanism for all the files in a snapshot. It doesn't
57   * attempt to verify that the files are exact copies (that would be paramount to taking the
58   * snapshot again!), but instead just attempts to ensure that the files match the expected
59   * files and are the same length.
60   * <p>
61   * Taking an online snapshots can race against other operations and this is an last line of
62   * defense.  For example, if meta changes between when snapshots are taken not all regions of a
63   * table may be present.  This can be caused by a region split (daughters present on this scan,
64   * but snapshot took parent), or move (snapshots only checks lists of region servers, a move could
65   * have caused a region to be skipped or done twice).
66   * <p>
67   * Current snapshot files checked:
68   * <ol>
69   * <li>SnapshotDescription is readable</li>
70   * <li>Table info is readable</li>
71   * <li>Regions</li>
72   * <ul>
73   * <li>Matching regions in the snapshot as currently in the table</li>
74   * <li>{@link HRegionInfo} matches the current and stored regions</li>
75   * <li>All referenced hfiles have valid names</li>
76   * <li>All the hfiles are present (either in .archive directory in the region)</li>
77   * <li>All recovered.edits files are present (by name) and have the correct file size</li>
78   * </ul>
79   * </ol>
80   */
81  @InterfaceAudience.Private
82  @InterfaceStability.Unstable
83  public final class MasterSnapshotVerifier {
84    private static final Log LOG = LogFactory.getLog(MasterSnapshotVerifier.class);
85  
86    private SnapshotDescription snapshot;
87    private FileSystem fs;
88    private Path rootDir;
89    private String tableName;
90    private MasterServices services;
91  
92    /**
93     * @param services services for the master
94     * @param snapshot snapshot to check
95     * @param rootDir root directory of the hbase installation.
96     */
97    public MasterSnapshotVerifier(MasterServices services, SnapshotDescription snapshot, Path rootDir) {
98      this.fs = services.getMasterFileSystem().getFileSystem();
99      this.services = services;
100     this.snapshot = snapshot;
101     this.rootDir = rootDir;
102     this.tableName = snapshot.getTable();
103   }
104 
105   /**
106    * Verify that the snapshot in the directory is a valid snapshot
107    * @param snapshotDir snapshot directory to check
108    * @param snapshotServers {@link ServerName} of the servers that are involved in the snapshot
109    * @throws CorruptedSnapshotException if the snapshot is invalid
110    * @throws IOException if there is an unexpected connection issue to the filesystem
111    */
112   public void verifySnapshot(Path snapshotDir, Set<String> snapshotServers)
113       throws CorruptedSnapshotException, IOException {
114     // verify snapshot info matches
115     verifySnapshotDescription(snapshotDir);
116 
117     // check that tableinfo is a valid table description
118     verifyTableInfo(snapshotDir);
119 
120     // check that each region is valid
121     verifyRegions(snapshotDir);
122   }
123 
124   /**
125    * Check that the snapshot description written in the filesystem matches the current snapshot
126    * @param snapshotDir snapshot directory to check
127    */
128   private void verifySnapshotDescription(Path snapshotDir) throws CorruptedSnapshotException {
129     SnapshotDescription found = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
130     if (!this.snapshot.equals(found)) {
131       throw new CorruptedSnapshotException("Snapshot read (" + found
132           + ") doesn't equal snapshot we ran (" + snapshot + ").", snapshot);
133     }
134   }
135 
136   /**
137    * Check that the table descriptor for the snapshot is a valid table descriptor
138    * @param snapshotDir snapshot directory to check
139    */
140   private void verifyTableInfo(Path snapshotDir) throws IOException {
141     FSTableDescriptors.getTableDescriptor(fs, snapshotDir);
142   }
143 
144   /**
145    * Check that all the regions in the snapshot are valid, and accounted for.
146    * @param snapshotDir snapshot directory to check
147    * @throws IOException if we can't reach .META. or read the files from the FS
148    */
149   private void verifyRegions(Path snapshotDir) throws IOException {
150     List<HRegionInfo> regions = MetaReader.getTableRegions(this.services.getCatalogTracker(),
151       Bytes.toBytes(tableName));
152 
153     Set<String> snapshotRegions = SnapshotReferenceUtil.getSnapshotRegionNames(fs, snapshotDir);
154     if (snapshotRegions == null) {
155       String msg = "Snapshot " + SnapshotDescriptionUtils.toString(snapshot) + " looks empty";
156       LOG.error(msg);
157       throw new CorruptedSnapshotException(msg);
158     }
159 
160     if (snapshotRegions.size() != regions.size()) {
161       String msg = "Regions moved during the snapshot '" + 
162                    SnapshotDescriptionUtils.toString(snapshot) + "'. expected=" +
163                    regions.size() + " snapshotted=" + snapshotRegions.size();
164       LOG.error(msg);
165       throw new CorruptedSnapshotException(msg);
166     }
167 
168     for (HRegionInfo region : regions) {
169       if (!snapshotRegions.contains(region.getEncodedName())) {
170         // could happen due to a move or split race.
171         String msg = "No region directory found for region:" + region;
172         LOG.error(msg);
173         throw new CorruptedSnapshotException(msg, snapshot);
174       }
175 
176       verifyRegion(fs, snapshotDir, region);
177     }
178   }
179 
180   /**
181    * Verify that the region (regioninfo, hfiles) are valid
182    * @param fs the FileSystem instance
183    * @param snapshotDir snapshot directory to check
184    * @param region the region to check
185    */
186   private void verifyRegion(final FileSystem fs, final Path snapshotDir, final HRegionInfo region)
187       throws IOException {
188     // make sure we have region in the snapshot
189     Path regionDir = new Path(snapshotDir, region.getEncodedName());
190 
191     // make sure we have the region info in the snapshot
192     Path regionInfo = new Path(regionDir, HRegion.REGIONINFO_FILE);
193     // make sure the file exists
194     if (!fs.exists(regionInfo)) {
195       throw new CorruptedSnapshotException("No region info found for region:" + region, snapshot);
196     }
197 
198     FSDataInputStream in = fs.open(regionInfo);
199     HRegionInfo found = new HRegionInfo();
200     try {
201       found.readFields(in);
202       if (!region.equals(found)) {
203         throw new CorruptedSnapshotException("Found region info (" + found
204            + ") doesn't match expected region:" + region, snapshot);
205       }
206     } finally {
207       in.close();
208     }
209 
210     // make sure we have the expected recovered edits files
211     TakeSnapshotUtils.verifyRecoveredEdits(fs, snapshotDir, found, snapshot);
212 
213     // make sure we have all the expected store files
214     SnapshotReferenceUtil.visitRegionStoreFiles(fs, regionDir, new FSVisitor.StoreFileVisitor() {
215       public void storeFile(final String regionNameSuffix, final String family,
216           final String hfileName) throws IOException {
217         verifyStoreFile(snapshotDir, region, family, hfileName);
218       }
219     });
220   }
221 
222   private void verifyStoreFile(final Path snapshotDir, final HRegionInfo regionInfo,
223       final String family, final String fileName) throws IOException {
224     Path refPath = null;
225     if (StoreFile.isReference(fileName)) {
226       // If is a reference file check if the parent file is present in the snapshot
227       Path snapshotHFilePath = new Path(new Path(
228           new Path(snapshotDir, regionInfo.getEncodedName()), family), fileName);
229       refPath = StoreFile.getReferredToFile(snapshotHFilePath);
230       if (!fs.exists(refPath)) {
231         throw new CorruptedSnapshotException("Missing parent hfile for: " + fileName, snapshot);
232       }
233     }
234 
235     Path linkPath;
236     if (refPath != null && HFileLink.isHFileLink(refPath)) {
237       linkPath = new Path(family, refPath.getName());
238     } else if (HFileLink.isHFileLink(fileName)) {
239       linkPath = new Path(family, fileName);
240     } else {
241       linkPath = new Path(family, HFileLink.createHFileLinkName(tableName,
242         regionInfo.getEncodedName(), fileName));
243     }
244 
245     // check if the linked file exists (in the archive, or in the table dir)
246     HFileLink link = new HFileLink(services.getConfiguration(), linkPath);
247     if (!link.exists(fs)) {
248       throw new CorruptedSnapshotException("Can't find hfile: " + fileName
249           + " in the real (" + link.getOriginPath() + ") or archive (" + link.getArchivePath()
250           + ") directory for the primary table.", snapshot);
251     }
252   }
253 
254   /**
255    * Check that the logs stored in the log directory for the snapshot are valid - it contains all
256    * the expected logs for all servers involved in the snapshot.
257    * @param snapshotDir snapshot directory to check
258    * @param snapshotServers list of the names of servers involved in the snapshot.
259    * @throws CorruptedSnapshotException if the hlogs in the snapshot are not correct
260    * @throws IOException if we can't reach the filesystem
261    */
262   private void verifyLogs(Path snapshotDir, Set<String> snapshotServers)
263       throws CorruptedSnapshotException, IOException {
264     Path snapshotLogDir = new Path(snapshotDir, HConstants.HREGION_LOGDIR_NAME);
265     Path logsDir = new Path(rootDir, HConstants.HREGION_LOGDIR_NAME);
266     TakeSnapshotUtils.verifyAllLogsGotReferenced(fs, logsDir, snapshotServers, snapshot,
267       snapshotLogDir);
268   }
269 }