001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.snapshot; 019 020import java.io.IOException; 021import java.util.List; 022import java.util.Map; 023import java.util.Set; 024 025import org.apache.hadoop.fs.FileSystem; 026import org.apache.hadoop.fs.Path; 027import org.apache.hadoop.hbase.MetaTableAccessor; 028import org.apache.hadoop.hbase.TableName; 029import org.apache.hadoop.hbase.client.RegionInfo; 030import org.apache.hadoop.hbase.client.RegionReplicaUtil; 031import org.apache.hadoop.hbase.client.TableDescriptor; 032import org.apache.hadoop.hbase.master.MasterServices; 033import org.apache.hadoop.hbase.mob.MobUtils; 034import org.apache.hadoop.hbase.snapshot.ClientSnapshotDescriptionUtils; 035import org.apache.hadoop.hbase.snapshot.CorruptedSnapshotException; 036import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 037import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 038import org.apache.hadoop.hbase.snapshot.SnapshotReferenceUtil; 039import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 040import org.apache.yetus.audience.InterfaceAudience; 041import org.apache.yetus.audience.InterfaceStability; 042import org.slf4j.Logger; 043import org.slf4j.LoggerFactory; 044import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 045import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription; 046import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest; 047 048/** 049 * General snapshot verification on the master. 050 * <p> 051 * This is a light-weight verification mechanism for all the files in a snapshot. It doesn't 052 * attempt to verify that the files are exact copies (that would be paramount to taking the 053 * snapshot again!), but instead just attempts to ensure that the files match the expected 054 * files and are the same length. 055 * <p> 056 * Taking an online snapshots can race against other operations and this is an last line of 057 * defense. For example, if meta changes between when snapshots are taken not all regions of a 058 * table may be present. This can be caused by a region split (daughters present on this scan, 059 * but snapshot took parent), or move (snapshots only checks lists of region servers, a move could 060 * have caused a region to be skipped or done twice). 061 * <p> 062 * Current snapshot files checked: 063 * <ol> 064 * <li>SnapshotDescription is readable</li> 065 * <li>Table info is readable</li> 066 * <li>Regions</li> 067 * </ol> 068 * <ul> 069 * <li>Matching regions in the snapshot as currently in the table</li> 070 * <li>{@link RegionInfo} matches the current and stored regions</li> 071 * <li>All referenced hfiles have valid names</li> 072 * <li>All the hfiles are present (either in .archive directory in the region)</li> 073 * <li>All recovered.edits files are present (by name) and have the correct file size</li> 074 * </ul> 075 */ 076@InterfaceAudience.Private 077@InterfaceStability.Unstable 078public final class MasterSnapshotVerifier { 079 private static final Logger LOG = LoggerFactory.getLogger(MasterSnapshotVerifier.class); 080 081 private SnapshotDescription snapshot; 082 private FileSystem fs; 083 private Path rootDir; 084 private TableName tableName; 085 private MasterServices services; 086 087 /** 088 * @param services services for the master 089 * @param snapshot snapshot to check 090 * @param rootDir root directory of the hbase installation. 091 */ 092 public MasterSnapshotVerifier(MasterServices services, SnapshotDescription snapshot, Path rootDir) { 093 this.fs = services.getMasterFileSystem().getFileSystem(); 094 this.services = services; 095 this.snapshot = snapshot; 096 this.rootDir = rootDir; 097 this.tableName = TableName.valueOf(snapshot.getTable()); 098 } 099 100 /** 101 * Verify that the snapshot in the directory is a valid snapshot 102 * @param snapshotDir snapshot directory to check 103 * @param snapshotServers {@link org.apache.hadoop.hbase.ServerName} of the servers 104 * that are involved in the snapshot 105 * @throws CorruptedSnapshotException if the snapshot is invalid 106 * @throws IOException if there is an unexpected connection issue to the filesystem 107 */ 108 public void verifySnapshot(Path snapshotDir, Set<String> snapshotServers) 109 throws CorruptedSnapshotException, IOException { 110 SnapshotManifest manifest = SnapshotManifest.open(services.getConfiguration(), fs, 111 snapshotDir, snapshot); 112 // verify snapshot info matches 113 verifySnapshotDescription(snapshotDir); 114 115 // check that tableinfo is a valid table description 116 verifyTableInfo(manifest); 117 118 // check that each region is valid 119 verifyRegions(manifest); 120 } 121 122 /** 123 * Check that the snapshot description written in the filesystem matches the current snapshot 124 * @param snapshotDir snapshot directory to check 125 */ 126 private void verifySnapshotDescription(Path snapshotDir) throws CorruptedSnapshotException { 127 SnapshotDescription found = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); 128 if (!this.snapshot.equals(found)) { 129 throw new CorruptedSnapshotException( 130 "Snapshot read (" + found + ") doesn't equal snapshot we ran (" + snapshot + ").", 131 ProtobufUtil.createSnapshotDesc(snapshot)); 132 } 133 } 134 135 /** 136 * Check that the table descriptor for the snapshot is a valid table descriptor 137 * @param manifest snapshot manifest to inspect 138 */ 139 private void verifyTableInfo(final SnapshotManifest manifest) throws IOException { 140 TableDescriptor htd = manifest.getTableDescriptor(); 141 if (htd == null) { 142 throw new CorruptedSnapshotException("Missing Table Descriptor", 143 ProtobufUtil.createSnapshotDesc(snapshot)); 144 } 145 146 if (!htd.getTableName().getNameAsString().equals(snapshot.getTable())) { 147 throw new CorruptedSnapshotException( 148 "Invalid Table Descriptor. Expected " + snapshot.getTable() + " name, got " 149 + htd.getTableName().getNameAsString(), ProtobufUtil.createSnapshotDesc(snapshot)); 150 } 151 } 152 153 /** 154 * Check that all the regions in the snapshot are valid, and accounted for. 155 * @param manifest snapshot manifest to inspect 156 * @throws IOException if we can't reach hbase:meta or read the files from the FS 157 */ 158 private void verifyRegions(final SnapshotManifest manifest) throws IOException { 159 List<RegionInfo> regions; 160 if (TableName.META_TABLE_NAME.equals(tableName)) { 161 regions = new MetaTableLocator().getMetaRegions(services.getZooKeeper()); 162 } else { 163 regions = MetaTableAccessor.getTableRegions(services.getConnection(), tableName); 164 } 165 // Remove the non-default regions 166 RegionReplicaUtil.removeNonDefaultRegions(regions); 167 168 Map<String, SnapshotRegionManifest> regionManifests = manifest.getRegionManifestsMap(); 169 if (regionManifests == null) { 170 String msg = "Snapshot " + ClientSnapshotDescriptionUtils.toString(snapshot) + " looks empty"; 171 LOG.error(msg); 172 throw new CorruptedSnapshotException(msg); 173 } 174 175 String errorMsg = ""; 176 boolean hasMobStore = false; 177 // the mob region is a dummy region, it's not a real region in HBase. 178 // the mob region has a special name, it could be found by the region name. 179 if (regionManifests.get(MobUtils.getMobRegionInfo(tableName).getEncodedName()) != null) { 180 hasMobStore = true; 181 } 182 int realRegionCount = hasMobStore ? regionManifests.size() - 1 : regionManifests.size(); 183 if (realRegionCount != regions.size()) { 184 errorMsg = "Regions moved during the snapshot '" + 185 ClientSnapshotDescriptionUtils.toString(snapshot) + "'. expected=" + 186 regions.size() + " snapshotted=" + realRegionCount + "."; 187 LOG.error(errorMsg); 188 } 189 190 // Verify RegionInfo 191 for (RegionInfo region : regions) { 192 SnapshotRegionManifest regionManifest = regionManifests.get(region.getEncodedName()); 193 if (regionManifest == null) { 194 // could happen due to a move or split race. 195 String mesg = " No snapshot region directory found for region:" + region; 196 if (errorMsg.isEmpty()) errorMsg = mesg; 197 LOG.error(mesg); 198 continue; 199 } 200 201 verifyRegionInfo(region, regionManifest); 202 } 203 204 if (!errorMsg.isEmpty()) { 205 throw new CorruptedSnapshotException(errorMsg); 206 } 207 208 // Verify Snapshot HFiles 209 SnapshotReferenceUtil.verifySnapshot(services.getConfiguration(), fs, manifest); 210 } 211 212 /** 213 * Verify that the regionInfo is valid 214 * @param region the region to check 215 * @param manifest snapshot manifest to inspect 216 */ 217 private void verifyRegionInfo(final RegionInfo region, 218 final SnapshotRegionManifest manifest) throws IOException { 219 RegionInfo manifestRegionInfo = ProtobufUtil.toRegionInfo(manifest.getRegionInfo()); 220 if (RegionInfo.COMPARATOR.compare(region, manifestRegionInfo) != 0) { 221 String msg = "Manifest region info " + manifestRegionInfo + 222 "doesn't match expected region:" + region; 223 throw new CorruptedSnapshotException(msg, ProtobufUtil.createSnapshotDesc(snapshot)); 224 } 225 } 226}