001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.client; 020 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.List; 024import java.util.UUID; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.FileSystem; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.HRegionInfo; 029import org.apache.hadoop.hbase.PrivateCellUtil; 030import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper; 031import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils; 032import org.apache.hadoop.hbase.snapshot.SnapshotManifest; 033import org.apache.hadoop.hbase.util.CommonFSUtils; 034import org.apache.yetus.audience.InterfaceAudience; 035import org.slf4j.Logger; 036import org.slf4j.LoggerFactory; 037 038import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos; 039import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest; 040 041/** 042 * A Scanner which performs a scan over snapshot files. Using this class requires copying the 043 * snapshot to a temporary empty directory, which will copy the snapshot reference files into that 044 * directory. Actual data files are not copied. 045 * 046 * <p> 047 * This also allows one to run the scan from an 048 * online or offline hbase cluster. The snapshot files can be exported by using the 049 * org.apache.hadoop.hbase.snapshot.ExportSnapshot tool, 050 * to a pure-hdfs cluster, and this scanner can be used to 051 * run the scan directly over the snapshot files. The snapshot should not be deleted while there 052 * are open scanners reading from snapshot files. 053 * 054 * <p> 055 * An internal RegionScanner is used to execute the {@link Scan} obtained 056 * from the user for each region in the snapshot. 057 * <p> 058 * HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from 059 * snapshot files and data files. HBase also enforces security because all the requests are handled 060 * by the server layer, and the user cannot read from the data files directly. To read from snapshot 061 * files directly from the file system, the user who is running the MR job must have sufficient 062 * permissions to access snapshot and reference files. This means that to run mapreduce over 063 * snapshot files, the job has to be run as the HBase user or the user must have group or other 064 * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from 065 * snapshot/data files will completely circumvent the access control enforced by HBase. 066 * See org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat. 067 */ 068@InterfaceAudience.Private 069public class TableSnapshotScanner extends AbstractClientScanner { 070 071 private static final Logger LOG = LoggerFactory.getLogger(TableSnapshotScanner.class); 072 073 private Configuration conf; 074 private String snapshotName; 075 private FileSystem fs; 076 private Path rootDir; 077 private Path restoreDir; 078 private Scan scan; 079 private ArrayList<RegionInfo> regions; 080 private TableDescriptor htd; 081 private final boolean snapshotAlreadyRestored; 082 083 private ClientSideRegionScanner currentRegionScanner = null; 084 private int currentRegion = -1; 085 086 private int numOfCompleteRows = 0; 087 /** 088 * Creates a TableSnapshotScanner. 089 * @param conf the configuration 090 * @param restoreDir a temporary directory to copy the snapshot files into. Current user should 091 * have write permissions to this directory, and this should not be a subdirectory of 092 * rootDir. The scanner deletes the contents of the directory once the scanner is closed. 093 * @param snapshotName the name of the snapshot to read from 094 * @param scan a Scan representing scan parameters 095 * @throws IOException in case of error 096 */ 097 public TableSnapshotScanner(Configuration conf, Path restoreDir, String snapshotName, Scan scan) 098 throws IOException { 099 this(conf, CommonFSUtils.getRootDir(conf), restoreDir, snapshotName, scan); 100 } 101 102 public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir, 103 String snapshotName, Scan scan) throws IOException { 104 this(conf, rootDir, restoreDir, snapshotName, scan, false); 105 } 106 107 /** 108 * Creates a TableSnapshotScanner. 109 * @param conf the configuration 110 * @param rootDir root directory for HBase. 111 * @param restoreDir a temporary directory to copy the snapshot files into. Current user should 112 * have write permissions to this directory, and this should not be a subdirectory of 113 * rootdir. The scanner deletes the contents of the directory once the scanner is closed. 114 * @param snapshotName the name of the snapshot to read from 115 * @param scan a Scan representing scan parameters 116 * @param snapshotAlreadyRestored true to indicate that snapshot has been restored. 117 * @throws IOException in case of error 118 */ 119 public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir, 120 String snapshotName, Scan scan, boolean snapshotAlreadyRestored) throws IOException { 121 this.conf = conf; 122 this.snapshotName = snapshotName; 123 this.rootDir = rootDir; 124 this.scan = scan; 125 this.snapshotAlreadyRestored = snapshotAlreadyRestored; 126 this.fs = rootDir.getFileSystem(conf); 127 128 if (snapshotAlreadyRestored) { 129 this.restoreDir = restoreDir; 130 openWithoutRestoringSnapshot(); 131 } else { 132 // restoreDir will be deleted in close(), use a unique sub directory 133 this.restoreDir = new Path(restoreDir, UUID.randomUUID().toString()); 134 openWithRestoringSnapshot(); 135 } 136 137 initScanMetrics(scan); 138 } 139 140 private void openWithoutRestoringSnapshot() throws IOException { 141 Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir); 142 SnapshotProtos.SnapshotDescription snapshotDesc = 143 SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir); 144 145 SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc); 146 List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests(); 147 if (regionManifests == null) { 148 throw new IllegalArgumentException("Snapshot seems empty, snapshotName: " + snapshotName); 149 } 150 151 regions = new ArrayList<>(regionManifests.size()); 152 regionManifests.stream().map(r -> HRegionInfo.convert(r.getRegionInfo())) 153 .filter(this::isValidRegion).sorted().forEach(r -> regions.add(r)); 154 htd = manifest.getTableDescriptor(); 155 } 156 157 private boolean isValidRegion(RegionInfo hri) { 158 // An offline split parent region should be excluded. 159 if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) { 160 return false; 161 } 162 return PrivateCellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(), 163 hri.getEndKey()); 164 } 165 166 private void openWithRestoringSnapshot() throws IOException { 167 final RestoreSnapshotHelper.RestoreMetaChanges meta = 168 RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName); 169 final List<RegionInfo> restoredRegions = meta.getRegionsToAdd(); 170 171 htd = meta.getTableDescriptor(); 172 regions = new ArrayList<>(restoredRegions.size()); 173 restoredRegions.stream().filter(this::isValidRegion).sorted().forEach(r -> regions.add(r)); 174 } 175 176 @Override 177 public Result next() throws IOException { 178 Result result = null; 179 while (true) { 180 if (currentRegionScanner == null) { 181 currentRegion++; 182 if (currentRegion >= regions.size()) { 183 return null; 184 } 185 186 RegionInfo hri = regions.get(currentRegion); 187 currentRegionScanner = new ClientSideRegionScanner(conf, fs, 188 restoreDir, htd, hri, scan, scanMetrics); 189 if (this.scanMetrics != null) { 190 this.scanMetrics.countOfRegions.incrementAndGet(); 191 } 192 } 193 194 try { 195 result = currentRegionScanner.next(); 196 if (result != null) { 197 if (scan.getLimit() > 0 && ++this.numOfCompleteRows > scan.getLimit()) { 198 result = null; 199 } 200 return result; 201 } 202 } finally { 203 if (result == null) { 204 currentRegionScanner.close(); 205 currentRegionScanner = null; 206 } 207 } 208 } 209 } 210 211 private void cleanup() { 212 try { 213 if (fs.exists(this.restoreDir)) { 214 if (!fs.delete(this.restoreDir, true)) { 215 LOG.warn( 216 "Delete restore directory for the snapshot failed. restoreDir: " + this.restoreDir); 217 } 218 } 219 } catch (IOException ex) { 220 LOG.warn( 221 "Could not delete restore directory for the snapshot. restoreDir: " + this.restoreDir, ex); 222 } 223 } 224 225 @Override 226 public void close() { 227 if (currentRegionScanner != null) { 228 currentRegionScanner.close(); 229 } 230 // if snapshotAlreadyRestored is true, then we should invoke cleanup() method by hand. 231 if (!this.snapshotAlreadyRestored) { 232 cleanup(); 233 } 234 } 235 236 @Override 237 public boolean renewLease() { 238 throw new UnsupportedOperationException(); 239 } 240 241}