001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.client;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.UUID;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.fs.FileSystem;
026import org.apache.hadoop.fs.Path;
027import org.apache.hadoop.hbase.PrivateCellUtil;
028import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
029import org.apache.hadoop.hbase.regionserver.MemStoreLAB;
030import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
031import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
032import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
033import org.apache.hadoop.hbase.util.CommonFSUtils;
034import org.apache.yetus.audience.InterfaceAudience;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
039import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
040import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
041
042/**
043 * A Scanner which performs a scan over snapshot files. Using this class requires copying the
044 * snapshot to a temporary empty directory, which will copy the snapshot reference files into that
045 * directory. Actual data files are not copied.
046 * <p>
047 * This also allows one to run the scan from an online or offline hbase cluster. The snapshot files
048 * can be exported by using the org.apache.hadoop.hbase.snapshot.ExportSnapshot tool, to a pure-hdfs
049 * cluster, and this scanner can be used to run the scan directly over the snapshot files. The
050 * snapshot should not be deleted while there are open scanners reading from snapshot files.
051 * <p>
052 * An internal RegionScanner is used to execute the {@link Scan} obtained from the user for each
053 * region in the snapshot.
054 * <p>
055 * HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from
056 * snapshot files and data files. HBase also enforces security because all the requests are handled
057 * by the server layer, and the user cannot read from the data files directly. To read from snapshot
058 * files directly from the file system, the user who is running the MR job must have sufficient
059 * permissions to access snapshot and reference files. This means that to run mapreduce over
060 * snapshot files, the job has to be run as the HBase user or the user must have group or other
061 * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
062 * snapshot/data files will completely circumvent the access control enforced by HBase. See
063 * org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.
064 */
065@InterfaceAudience.Private
066public class TableSnapshotScanner extends AbstractClientScanner {
067
068  private static final Logger LOG = LoggerFactory.getLogger(TableSnapshotScanner.class);
069
070  private Configuration conf;
071  private String snapshotName;
072  private FileSystem fs;
073  private Path rootDir;
074  private Path restoreDir;
075  private Scan scan;
076  private ArrayList<RegionInfo> regions;
077  private TableDescriptor htd;
078  private final boolean snapshotAlreadyRestored;
079
080  private ClientSideRegionScanner currentRegionScanner = null;
081  private int currentRegion = -1;
082
083  private int numOfCompleteRows = 0;
084
085  /**
086   * Creates a TableSnapshotScanner.
087   * @param conf         the configuration
088   * @param restoreDir   a temporary directory to copy the snapshot files into. Current user should
089   *                     have write permissions to this directory, and this should not be a
090   *                     subdirectory of rootDir. The scanner deletes the contents of the directory
091   *                     once the scanner is closed.
092   * @param snapshotName the name of the snapshot to read from
093   * @param scan         a Scan representing scan parameters
094   * @throws IOException in case of error
095   */
096  public TableSnapshotScanner(Configuration conf, Path restoreDir, String snapshotName, Scan scan)
097    throws IOException {
098    this(conf, CommonFSUtils.getRootDir(conf), restoreDir, snapshotName, scan);
099  }
100
101  public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir,
102    String snapshotName, Scan scan) throws IOException {
103    this(conf, rootDir, restoreDir, snapshotName, scan, false);
104  }
105
106  /**
107   * Creates a TableSnapshotScanner.
108   * @param conf                    the configuration
109   * @param rootDir                 root directory for HBase.
110   * @param restoreDir              a temporary directory to copy the snapshot files into. Current
111   *                                user should have write permissions to this directory, and this
112   *                                should not be a subdirectory of rootdir. The scanner deletes the
113   *                                contents of the directory once the scanner is closed.
114   * @param snapshotName            the name of the snapshot to read from
115   * @param scan                    a Scan representing scan parameters
116   * @param snapshotAlreadyRestored true to indicate that snapshot has been restored.
117   * @throws IOException in case of error
118   */
119  public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir,
120    String snapshotName, Scan scan, boolean snapshotAlreadyRestored) throws IOException {
121    this.conf = conf;
122    this.snapshotName = snapshotName;
123    this.rootDir = rootDir;
124    this.scan = scan;
125    this.snapshotAlreadyRestored = snapshotAlreadyRestored;
126    this.fs = rootDir.getFileSystem(conf);
127    conf.setBoolean(MemStoreLAB.USEMSLAB_KEY, false);
128
129    if (snapshotAlreadyRestored) {
130      this.restoreDir = restoreDir;
131      openWithoutRestoringSnapshot();
132    } else {
133      // restoreDir will be deleted in close(), use a unique sub directory
134      this.restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
135      openWithRestoringSnapshot();
136    }
137
138    initScanMetrics(scan);
139  }
140
141  private void openWithoutRestoringSnapshot() throws IOException {
142    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
143    SnapshotProtos.SnapshotDescription snapshotDesc =
144      SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
145
146    SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
147    List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
148    if (regionManifests == null) {
149      throw new IllegalArgumentException("Snapshot seems empty, snapshotName: " + snapshotName);
150    }
151
152    regions = new ArrayList<>(regionManifests.size());
153    regionManifests.stream().map(r -> ProtobufUtil.toRegionInfo(r.getRegionInfo()))
154      .filter(this::isValidRegion).sorted().forEach(r -> regions.add(r));
155    htd = manifest.getTableDescriptor();
156  }
157
158  private boolean isValidRegion(RegionInfo hri) {
159    // An offline split parent region should be excluded.
160    if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
161      return false;
162    }
163    return PrivateCellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
164      hri.getEndKey());
165  }
166
167  private void openWithRestoringSnapshot() throws IOException {
168    final RestoreSnapshotHelper.RestoreMetaChanges meta =
169      RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
170    final List<RegionInfo> restoredRegions = meta.getRegionsToAdd();
171
172    htd = meta.getTableDescriptor();
173    regions = new ArrayList<>(restoredRegions.size());
174    restoredRegions.stream().filter(this::isValidRegion).sorted().forEach(r -> regions.add(r));
175  }
176
177  @Override
178  public Result next() throws IOException {
179    Result result = null;
180    while (true) {
181      if (currentRegionScanner == null) {
182        currentRegion++;
183        if (currentRegion >= regions.size()) {
184          return null;
185        }
186
187        RegionInfo hri = regions.get(currentRegion);
188        currentRegionScanner =
189          new ClientSideRegionScanner(conf, fs, restoreDir, htd, hri, scan, scanMetrics);
190        if (this.scanMetrics != null) {
191          this.scanMetrics.addToCounter(ScanMetrics.REGIONS_SCANNED_METRIC_NAME, 1);
192        }
193      }
194
195      try {
196        result = currentRegionScanner.next();
197        if (result != null) {
198          if (scan.getLimit() > 0 && ++this.numOfCompleteRows > scan.getLimit()) {
199            result = null;
200          }
201          return result;
202        }
203      } finally {
204        if (result == null) {
205          currentRegionScanner.close();
206          currentRegionScanner = null;
207        }
208      }
209    }
210  }
211
212  private void cleanup() {
213    try {
214      if (fs.exists(this.restoreDir)) {
215        if (!fs.delete(this.restoreDir, true)) {
216          LOG.warn(
217            "Delete restore directory for the snapshot failed. restoreDir: " + this.restoreDir);
218        }
219      }
220    } catch (IOException ex) {
221      LOG.warn(
222        "Could not delete restore directory for the snapshot. restoreDir: " + this.restoreDir, ex);
223    }
224  }
225
226  @Override
227  public void close() {
228    if (currentRegionScanner != null) {
229      currentRegionScanner.close();
230    }
231    // if snapshotAlreadyRestored is true, then we should invoke cleanup() method by hand.
232    if (!this.snapshotAlreadyRestored) {
233      cleanup();
234    }
235  }
236
237  @Override
238  public boolean renewLease() {
239    throw new UnsupportedOperationException();
240  }
241
242}