View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.master.snapshot;
19  
20  import java.io.IOException;
21  import java.util.List;
22  import java.util.Set;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.classification.InterfaceAudience;
27  import org.apache.hadoop.classification.InterfaceStability;
28  import org.apache.hadoop.fs.FileStatus;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.fs.PathFilter;
32  import org.apache.hadoop.hbase.TableName;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.ServerName;
35  import org.apache.hadoop.hbase.catalog.MetaReader;
36  import org.apache.hadoop.hbase.io.HFileLink;
37  import org.apache.hadoop.hbase.master.MasterServices;
38  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
39  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
40  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
41  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
42  import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
43  import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
44  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
45  import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
46  import org.apache.hadoop.hbase.snapshot.TakeSnapshotUtils;
47  import org.apache.hadoop.hbase.util.FSTableDescriptors;
48  import org.apache.hadoop.hbase.util.FSUtils;
49  import org.apache.hadoop.hbase.util.FSVisitor;
50  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
51  
52  /**
53   * General snapshot verification on the master.
54   * <p>
55   * This is a light-weight verification mechanism for all the files in a snapshot. It doesn't
56   * attempt to verify that the files are exact copies (that would be paramount to taking the
57   * snapshot again!), but instead just attempts to ensure that the files match the expected
58   * files and are the same length.
59   * <p>
60   * Taking an online snapshots can race against other operations and this is an last line of
61   * defense.  For example, if meta changes between when snapshots are taken not all regions of a
62   * table may be present.  This can be caused by a region split (daughters present on this scan,
63   * but snapshot took parent), or move (snapshots only checks lists of region servers, a move could
64   * have caused a region to be skipped or done twice).
65   * <p>
66   * Current snapshot files checked:
67   * <ol>
68   * <li>SnapshotDescription is readable</li>
69   * <li>Table info is readable</li>
70   * <li>Regions</li>
71   * <ul>
72   * <li>Matching regions in the snapshot as currently in the table</li>
73   * <li>{@link HRegionInfo} matches the current and stored regions</li>
74   * <li>All referenced hfiles have valid names</li>
75   * <li>All the hfiles are present (either in .archive directory in the region)</li>
76   * <li>All recovered.edits files are present (by name) and have the correct file size</li>
77   * </ul>
78   * </ol>
79   */
80  @InterfaceAudience.Private
81  @InterfaceStability.Unstable
82  public final class MasterSnapshotVerifier {
83    private static final Log LOG = LogFactory.getLog(MasterSnapshotVerifier.class);
84  
85    private SnapshotDescription snapshot;
86    private FileSystem fs;
87    private Path rootDir;
88    private TableName tableName;
89    private MasterServices services;
90  
91    /**
92     * @param services services for the master
93     * @param snapshot snapshot to check
94     * @param rootDir root directory of the hbase installation.
95     */
96    public MasterSnapshotVerifier(MasterServices services, SnapshotDescription snapshot, Path rootDir) {
97      this.fs = services.getMasterFileSystem().getFileSystem();
98      this.services = services;
99      this.snapshot = snapshot;
100     this.rootDir = rootDir;
101     this.tableName = TableName.valueOf(snapshot.getTable());
102   }
103 
104   /**
105    * Verify that the snapshot in the directory is a valid snapshot
106    * @param snapshotDir snapshot directory to check
107    * @param snapshotServers {@link ServerName} of the servers that are involved in the snapshot
108    * @throws CorruptedSnapshotException if the snapshot is invalid
109    * @throws IOException if there is an unexpected connection issue to the filesystem
110    */
111   public void verifySnapshot(Path snapshotDir, Set<String> snapshotServers)
112       throws CorruptedSnapshotException, IOException {
113     // verify snapshot info matches
114     verifySnapshotDescription(snapshotDir);
115 
116     // check that tableinfo is a valid table description
117     verifyTableInfo(snapshotDir);
118 
119     // check that each region is valid
120     verifyRegions(snapshotDir);
121   }
122 
123   /**
124    * Check that the snapshot description written in the filesystem matches the current snapshot
125    * @param snapshotDir snapshot directory to check
126    */
127   private void verifySnapshotDescription(Path snapshotDir) throws CorruptedSnapshotException {
128     SnapshotDescription found = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
129     if (!this.snapshot.equals(found)) {
130       throw new CorruptedSnapshotException("Snapshot read (" + found
131           + ") doesn't equal snapshot we ran (" + snapshot + ").", snapshot);
132     }
133   }
134 
135   /**
136    * Check that the table descriptor for the snapshot is a valid table descriptor
137    * @param snapshotDir snapshot directory to check
138    */
139   private void verifyTableInfo(Path snapshotDir) throws IOException {
140     FSTableDescriptors.getTableDescriptorFromFs(fs, snapshotDir);
141   }
142 
143   /**
144    * Check that all the regions in the snapshot are valid, and accounted for.
145    * @param snapshotDir snapshot directory to check
146    * @throws IOException if we can't reach hbase:meta or read the files from the FS
147    */
148   private void verifyRegions(Path snapshotDir) throws IOException {
149     List<HRegionInfo> regions = MetaReader.getTableRegions(this.services.getCatalogTracker(),
150         tableName);
151 
152     Set<String> snapshotRegions = SnapshotReferenceUtil.getSnapshotRegionNames(fs, snapshotDir);
153     if (snapshotRegions == null) {
154       String msg = "Snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " looks empty";
155       LOG.error(msg);
156       throw new CorruptedSnapshotException(msg);
157     }
158 
159     if (snapshotRegions.size() != regions.size()) {
160       String msg = "Regions moved during the snapshot '" + 
161                    ClientSnapshotDescriptionUtils.toString(snapshot) + "'. expected=" +
162                    regions.size() + " snapshotted=" + snapshotRegions.size();
163       LOG.error(msg);
164       throw new CorruptedSnapshotException(msg);
165     }
166 
167     for (HRegionInfo region : regions) {
168       if (!snapshotRegions.contains(region.getEncodedName())) {
169         // could happen due to a move or split race.
170         String msg = "No region directory found for region:" + region;
171         LOG.error(msg);
172         throw new CorruptedSnapshotException(msg, snapshot);
173       }
174 
175       verifyRegion(fs, snapshotDir, region);
176     }
177   }
178 
179   /**
180    * Verify that the region (regioninfo, hfiles) are valid
181    * @param fs the FileSystem instance
182    * @param snapshotDir snapshot directory to check
183    * @param region the region to check
184    */
185   private void verifyRegion(final FileSystem fs, final Path snapshotDir, final HRegionInfo region)
186       throws IOException {
187     // make sure we have region in the snapshot
188     Path regionDir = new Path(snapshotDir, region.getEncodedName());
189 
190     // make sure we have the region info in the snapshot
191     Path regionInfo = new Path(regionDir, HRegionFileSystem.REGION_INFO_FILE);
192     // make sure the file exists
193     if (!fs.exists(regionInfo)) {
194       throw new CorruptedSnapshotException("No region info found for region:" + region, snapshot);
195     }
196 
197     HRegionInfo found = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
198     if (!region.equals(found)) {
199       throw new CorruptedSnapshotException("Found region info (" + found
200         + ") doesn't match expected region:" + region, snapshot);
201     }
202 
203     // make sure we have the expected recovered edits files
204     TakeSnapshotUtils.verifyRecoveredEdits(fs, snapshotDir, found, snapshot);
205 
206      // make sure we have all the expected store files
207     SnapshotReferenceUtil.visitRegionStoreFiles(fs, regionDir, new FSVisitor.StoreFileVisitor() {
208       public void storeFile(final String regionNameSuffix, final String family,
209           final String hfileName) throws IOException {
210         verifyStoreFile(snapshotDir, region, family, hfileName);
211       }
212     });
213   }
214 
215   private void verifyStoreFile(final Path snapshotDir, final HRegionInfo regionInfo,
216       final String family, final String fileName) throws IOException {
217     Path refPath = null;
218     if (StoreFileInfo.isReference(fileName)) {
219       // If is a reference file check if the parent file is present in the snapshot
220       Path snapshotHFilePath = new Path(new Path(
221           new Path(snapshotDir, regionInfo.getEncodedName()), family), fileName);
222       refPath = StoreFileInfo.getReferredToFile(snapshotHFilePath);
223       if (!fs.exists(refPath)) {
224         throw new CorruptedSnapshotException("Missing parent hfile for: " + fileName, snapshot);
225       }
226     }
227 
228     Path linkPath;
229     if (refPath != null && HFileLink.isHFileLink(refPath)) {
230       linkPath = new Path(family, refPath.getName());
231     } else if (HFileLink.isHFileLink(fileName)) {
232       linkPath = new Path(family, fileName);
233     } else {
234       linkPath = new Path(family, HFileLink.createHFileLinkName(tableName,
235         regionInfo.getEncodedName(), fileName));
236     }
237 
238     // check if the linked file exists (in the archive, or in the table dir)
239     HFileLink link = new HFileLink(services.getConfiguration(), linkPath);
240     if (!link.exists(fs)) {
241       throw new CorruptedSnapshotException("Can't find hfile: " + fileName
242           + " in the real (" + link.getOriginPath() + ") or archive (" + link.getArchivePath()
243           + ") directory for the primary table.", snapshot);
244     }
245   }
246 }