001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.snapshot; 019 020import java.io.IOException; 021import java.util.List; 022import java.util.Map; 023import java.util.Set; 024import org.apache.hadoop.fs.FileSystem; 025import org.apache.hadoop.fs.Path; 026import org.apache.hadoop.hbase.TableName; 027import org.apache.hadoop.hbase.client.RegionInfo; 028import org.apache.hadoop.hbase.client.RegionReplicaUtil; 029import org.apache.hadoop.hbase.client.TableDescriptor; 030import org.apache.hadoop.hbase.master.MasterServices; 031import org.apache.hadoop.hbase.mob.MobUtils; 032import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; 033import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; 034import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 035import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 036import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; 037import org.apache.hadoop.hbase.util.CommonFSUtils; 038import org.apache.yetus.audience.InterfaceAudience; 039import org.apache.yetus.audience.InterfaceStability; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042 043import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 044import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 045import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest; 046 047/** 048 * General snapshot verification on the master. 049 * <p> 050 * This is a light-weight verification mechanism for all the files in a snapshot. It doesn't attempt 051 * to verify that the files are exact copies (that would be paramount to taking the snapshot 052 * again!), but instead just attempts to ensure that the files match the expected files and are the 053 * same length. 054 * <p> 055 * Taking an online snapshots can race against other operations and this is an last line of defense. 056 * For example, if meta changes between when snapshots are taken not all regions of a table may be 057 * present. This can be caused by a region split (daughters present on this scan, but snapshot took 058 * parent), or move (snapshots only checks lists of region servers, a move could have caused a 059 * region to be skipped or done twice). 060 * <p> 061 * Current snapshot files checked: 062 * <ol> 063 * <li>SnapshotDescription is readable</li> 064 * <li>Table info is readable</li> 065 * <li>Regions</li> 066 * </ol> 067 * <ul> 068 * <li>Matching regions in the snapshot as currently in the table</li> 069 * <li>{@link RegionInfo} matches the current and stored regions</li> 070 * <li>All referenced hfiles have valid names</li> 071 * <li>All the hfiles are present (either in .archive directory in the region)</li> 072 * <li>All recovered.edits files are present (by name) and have the correct file size</li> 073 * </ul> 074 */ 075@InterfaceAudience.Private 076@InterfaceStability.Unstable 077public final class MasterSnapshotVerifier { 078 private static final Logger LOG = LoggerFactory.getLogger(MasterSnapshotVerifier.class); 079 080 private SnapshotDescription snapshot; 081 private FileSystem workingDirFs; 082 private TableName tableName; 083 private MasterServices services; 084 085 /** 086 * @param services services for the master 087 * @param snapshot snapshot to check 088 * @param workingDirFs the file system containing the temporary snapshot information 089 */ 090 public MasterSnapshotVerifier(MasterServices services, SnapshotDescription snapshot, 091 FileSystem workingDirFs) { 092 this.workingDirFs = workingDirFs; 093 this.services = services; 094 this.snapshot = snapshot; 095 this.tableName = TableName.valueOf(snapshot.getTable()); 096 } 097 098 /** 099 * Verify that the snapshot in the directory is a valid snapshot 100 * @param snapshotDir snapshot directory to check 101 * @param snapshotServers {@link org.apache.hadoop.hbase.ServerName} of the servers that are 102 * involved in the snapshot 103 * @throws CorruptedSnapshotException if the snapshot is invalid 104 * @throws IOException if there is an unexpected connection issue to the filesystem 105 */ 106 public void verifySnapshot(Path snapshotDir, Set<String> snapshotServers) 107 throws CorruptedSnapshotException, IOException { 108 SnapshotManifest manifest = 109 SnapshotManifest.open(services.getConfiguration(), workingDirFs, snapshotDir, snapshot); 110 // verify snapshot info matches 111 verifySnapshotDescription(snapshotDir); 112 113 // check that tableinfo is a valid table description 114 verifyTableInfo(manifest); 115 116 // check that each region is valid 117 verifyRegions(manifest); 118 } 119 120 /** 121 * Check that the snapshot description written in the filesystem matches the current snapshot 122 * @param snapshotDir snapshot directory to check 123 */ 124 private void verifySnapshotDescription(Path snapshotDir) throws CorruptedSnapshotException { 125 SnapshotDescription found = 126 SnapshotDescriptionUtils.readSnapshotInfo(workingDirFs, snapshotDir); 127 if (!this.snapshot.equals(found)) { 128 throw new CorruptedSnapshotException( 129 "Snapshot read (" + found + ") doesn't equal snapshot we ran (" + snapshot + ").", 130 ProtobufUtil.createSnapshotDesc(snapshot)); 131 } 132 } 133 134 /** 135 * Check that the table descriptor for the snapshot is a valid table descriptor 136 * @param manifest snapshot manifest to inspect 137 */ 138 private void verifyTableInfo(final SnapshotManifest manifest) throws IOException { 139 TableDescriptor htd = manifest.getTableDescriptor(); 140 if (htd == null) { 141 throw new CorruptedSnapshotException("Missing Table Descriptor", 142 ProtobufUtil.createSnapshotDesc(snapshot)); 143 } 144 145 if (!htd.getTableName().getNameAsString().equals(snapshot.getTable())) { 146 throw new CorruptedSnapshotException("Invalid Table Descriptor. Expected " 147 + snapshot.getTable() + " name, got " + htd.getTableName().getNameAsString(), 148 ProtobufUtil.createSnapshotDesc(snapshot)); 149 } 150 } 151 152 /** 153 * Check that all the regions in the snapshot are valid, and accounted for. 154 * @param manifest snapshot manifest to inspect 155 * @throws IOException if we can't reach hbase:meta or read the files from the FS 156 */ 157 private void verifyRegions(final SnapshotManifest manifest) throws IOException { 158 List<RegionInfo> regions = services.getAssignmentManager().getTableRegions(tableName, false); 159 // Remove the non-default regions 160 RegionReplicaUtil.removeNonDefaultRegions(regions); 161 162 Map<String, SnapshotRegionManifest> regionManifests = manifest.getRegionManifestsMap(); 163 if (regionManifests == null) { 164 String msg = "Snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " looks empty"; 165 LOG.error(msg); 166 throw new CorruptedSnapshotException(msg); 167 } 168 169 String errorMsg = ""; 170 boolean hasMobStore = false; 171 // the mob region is a dummy region, it's not a real region in HBase. 172 // the mob region has a special name, it could be found by the region name. 173 if (regionManifests.get(MobUtils.getMobRegionInfo(tableName).getEncodedName()) != null) { 174 hasMobStore = true; 175 } 176 int realRegionCount = hasMobStore ? regionManifests.size() - 1 : regionManifests.size(); 177 if (realRegionCount != regions.size()) { 178 errorMsg = 179 "Regions moved during the snapshot '" + ClientSnapshotDescriptionUtils.toString(snapshot) 180 + "'. expected=" + regions.size() + " snapshotted=" + realRegionCount + "."; 181 LOG.error(errorMsg); 182 } 183 184 // Verify RegionInfo 185 for (RegionInfo region : regions) { 186 SnapshotRegionManifest regionManifest = regionManifests.get(region.getEncodedName()); 187 if (regionManifest == null) { 188 // could happen due to a move or split race. 189 String mesg = " No snapshot region directory found for region:" + region; 190 if (errorMsg.isEmpty()) errorMsg = mesg; 191 LOG.error(mesg); 192 continue; 193 } 194 195 verifyRegionInfo(region, regionManifest); 196 } 197 198 if (!errorMsg.isEmpty()) { 199 throw new CorruptedSnapshotException(errorMsg); 200 } 201 202 // Verify Snapshot HFiles 203 // Requires the root directory file system as HFiles are stored in the root directory 204 SnapshotReferenceUtil.verifySnapshot(services.getConfiguration(), 205 CommonFSUtils.getRootDirFileSystem(services.getConfiguration()), manifest); 206 } 207 208 /** 209 * Verify that the regionInfo is valid 210 * @param region the region to check 211 * @param manifest snapshot manifest to inspect 212 */ 213 private void verifyRegionInfo(final RegionInfo region, final SnapshotRegionManifest manifest) 214 throws IOException { 215 RegionInfo manifestRegionInfo = ProtobufUtil.toRegionInfo(manifest.getRegionInfo()); 216 if (RegionInfo.COMPARATOR.compare(region, manifestRegionInfo) != 0) { 217 String msg = 218 "Manifest region info " + manifestRegionInfo + "doesn't match expected region:" + region; 219 throw new CorruptedSnapshotException(msg, ProtobufUtil.createSnapshotDesc(snapshot)); 220 } 221 } 222}