001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.snapshot;
019
020import java.io.IOException;
021import java.util.List;
022import java.util.Map;
023import java.util.Set;
024import org.apache.hadoop.fs.FileSystem;
025import org.apache.hadoop.fs.Path;
026import org.apache.hadoop.hbase.TableName;
027import org.apache.hadoop.hbase.client.RegionInfo;
028import org.apache.hadoop.hbase.client.RegionReplicaUtil;
029import org.apache.hadoop.hbase.client.TableDescriptor;
030import org.apache.hadoop.hbase.master.MasterServices;
031import org.apache.hadoop.hbase.mob.MobUtils;
032import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils;
033import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException;
034import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
035import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
036import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil;
037import org.apache.hadoop.hbase.util.CommonFSUtils;
038import org.apache.yetus.audience.InterfaceAudience;
039import org.apache.yetus.audience.InterfaceStability;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
044import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
045import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
046
047/**
048 * General snapshot verification on the master.
049 * <p>
050 * This is a light-weight verification mechanism for all the files in a snapshot. It doesn't attempt
051 * to verify that the files are exact copies (that would be paramount to taking the snapshot
052 * again!), but instead just attempts to ensure that the files match the expected files and are the
053 * same length.
054 * <p>
055 * Taking an online snapshots can race against other operations and this is an last line of defense.
056 * For example, if meta changes between when snapshots are taken not all regions of a table may be
057 * present. This can be caused by a region split (daughters present on this scan, but snapshot took
058 * parent), or move (snapshots only checks lists of region servers, a move could have caused a
059 * region to be skipped or done twice).
060 * <p>
061 * Current snapshot files checked:
062 * <ol>
063 * <li>SnapshotDescription is readable</li>
064 * <li>Table info is readable</li>
065 * <li>Regions</li>
066 * </ol>
067 * <ul>
068 * <li>Matching regions in the snapshot as currently in the table</li>
069 * <li>{@link RegionInfo} matches the current and stored regions</li>
070 * <li>All referenced hfiles have valid names</li>
071 * <li>All the hfiles are present (either in .archive directory in the region)</li>
072 * <li>All recovered.edits files are present (by name) and have the correct file size</li>
073 * </ul>
074 */
075@InterfaceAudience.Private
076@InterfaceStability.Unstable
077public final class MasterSnapshotVerifier {
078  private static final Logger LOG = LoggerFactory.getLogger(MasterSnapshotVerifier.class);
079
080  private SnapshotDescription snapshot;
081  private FileSystem workingDirFs;
082  private TableName tableName;
083  private MasterServices services;
084
085  /**
086   * @param services     services for the master
087   * @param snapshot     snapshot to check
088   * @param workingDirFs the file system containing the temporary snapshot information
089   */
090  public MasterSnapshotVerifier(MasterServices services, SnapshotDescription snapshot,
091    FileSystem workingDirFs) {
092    this.workingDirFs = workingDirFs;
093    this.services = services;
094    this.snapshot = snapshot;
095    this.tableName = TableName.valueOf(snapshot.getTable());
096  }
097
098  /**
099   * Verify that the snapshot in the directory is a valid snapshot
100   * @param snapshotDir     snapshot directory to check
101   * @param snapshotServers {@link org.apache.hadoop.hbase.ServerName} of the servers that are
102   *                        involved in the snapshot
103   * @throws CorruptedSnapshotException if the snapshot is invalid
104   * @throws IOException                if there is an unexpected connection issue to the filesystem
105   */
106  public void verifySnapshot(Path snapshotDir, Set<String> snapshotServers)
107    throws CorruptedSnapshotException, IOException {
108    SnapshotManifest manifest =
109      SnapshotManifest.open(services.getConfiguration(), workingDirFs, snapshotDir, snapshot);
110    // verify snapshot info matches
111    verifySnapshotDescription(snapshotDir);
112
113    // check that tableinfo is a valid table description
114    verifyTableInfo(manifest);
115
116    // check that each region is valid
117    verifyRegions(manifest);
118  }
119
120  /**
121   * Check that the snapshot description written in the filesystem matches the current snapshot
122   * @param snapshotDir snapshot directory to check
123   */
124  private void verifySnapshotDescription(Path snapshotDir) throws CorruptedSnapshotException {
125    SnapshotDescription found =
126      SnapshotDescriptionUtils.readSnapshotInfo(workingDirFs, snapshotDir);
127    if (!this.snapshot.equals(found)) {
128      throw new CorruptedSnapshotException(
129        "Snapshot read (" + found + ") doesn't equal snapshot we ran (" + snapshot + ").",
130        ProtobufUtil.createSnapshotDesc(snapshot));
131    }
132  }
133
134  /**
135   * Check that the table descriptor for the snapshot is a valid table descriptor
136   * @param manifest snapshot manifest to inspect
137   */
138  private void verifyTableInfo(final SnapshotManifest manifest) throws IOException {
139    TableDescriptor htd = manifest.getTableDescriptor();
140    if (htd == null) {
141      throw new CorruptedSnapshotException("Missing Table Descriptor",
142        ProtobufUtil.createSnapshotDesc(snapshot));
143    }
144
145    if (!htd.getTableName().getNameAsString().equals(snapshot.getTable())) {
146      throw new CorruptedSnapshotException("Invalid Table Descriptor. Expected "
147        + snapshot.getTable() + " name, got " + htd.getTableName().getNameAsString(),
148        ProtobufUtil.createSnapshotDesc(snapshot));
149    }
150  }
151
152  /**
153   * Check that all the regions in the snapshot are valid, and accounted for.
154   * @param manifest snapshot manifest to inspect
155   * @throws IOException if we can't reach hbase:meta or read the files from the FS
156   */
157  private void verifyRegions(final SnapshotManifest manifest) throws IOException {
158    List<RegionInfo> regions = services.getAssignmentManager().getTableRegions(tableName, false);
159    // Remove the non-default regions
160    RegionReplicaUtil.removeNonDefaultRegions(regions);
161
162    Map<String, SnapshotRegionManifest> regionManifests = manifest.getRegionManifestsMap();
163    if (regionManifests == null) {
164      String msg = "Snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " looks empty";
165      LOG.error(msg);
166      throw new CorruptedSnapshotException(msg);
167    }
168
169    String errorMsg = "";
170    boolean hasMobStore = false;
171    // the mob region is a dummy region, it's not a real region in HBase.
172    // the mob region has a special name, it could be found by the region name.
173    if (regionManifests.get(MobUtils.getMobRegionInfo(tableName).getEncodedName()) != null) {
174      hasMobStore = true;
175    }
176    int realRegionCount = hasMobStore ? regionManifests.size() - 1 : regionManifests.size();
177    if (realRegionCount != regions.size()) {
178      errorMsg =
179        "Regions moved during the snapshot '" + ClientSnapshotDescriptionUtils.toString(snapshot)
180          + "'. expected=" + regions.size() + " snapshotted=" + realRegionCount + ".";
181      LOG.error(errorMsg);
182    }
183
184    // Verify RegionInfo
185    for (RegionInfo region : regions) {
186      SnapshotRegionManifest regionManifest = regionManifests.get(region.getEncodedName());
187      if (regionManifest == null) {
188        // could happen due to a move or split race.
189        String mesg = " No snapshot region directory found for region:" + region;
190        if (errorMsg.isEmpty()) errorMsg = mesg;
191        LOG.error(mesg);
192        continue;
193      }
194
195      verifyRegionInfo(region, regionManifest);
196    }
197
198    if (!errorMsg.isEmpty()) {
199      throw new CorruptedSnapshotException(errorMsg);
200    }
201
202    // Verify Snapshot HFiles
203    // Requires the root directory file system as HFiles are stored in the root directory
204    SnapshotReferenceUtil.verifySnapshot(services.getConfiguration(),
205      CommonFSUtils.getRootDirFileSystem(services.getConfiguration()), manifest);
206  }
207
208  /**
209   * Verify that the regionInfo is valid
210   * @param region   the region to check
211   * @param manifest snapshot manifest to inspect
212   */
213  private void verifyRegionInfo(final RegionInfo region, final SnapshotRegionManifest manifest)
214    throws IOException {
215    RegionInfo manifestRegionInfo = ProtobufUtil.toRegionInfo(manifest.getRegionInfo());
216    if (RegionInfo.COMPARATOR.compare(region, manifestRegionInfo) != 0) {
217      String msg =
218        "Manifest region info " + manifestRegionInfo + "doesn't match expected region:" + region;
219      throw new CorruptedSnapshotException(msg, ProtobufUtil.createSnapshotDesc(snapshot));
220    }
221  }
222}