001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.client;
020
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.List;
024import java.util.UUID;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileSystem;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.HRegionInfo;
029import org.apache.hadoop.hbase.PrivateCellUtil;
030import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
031import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
032import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
033import org.apache.hadoop.hbase.util.CommonFSUtils;
034import org.apache.yetus.audience.InterfaceAudience;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
039import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
040
041/**
042 * A Scanner which performs a scan over snapshot files. Using this class requires copying the
043 * snapshot to a temporary empty directory, which will copy the snapshot reference files into that
044 * directory. Actual data files are not copied.
045 *
046 * <p>
047 * This also allows one to run the scan from an
048 * online or offline hbase cluster. The snapshot files can be exported by using the
049 * org.apache.hadoop.hbase.snapshot.ExportSnapshot tool,
050 * to a pure-hdfs cluster, and this scanner can be used to
051 * run the scan directly over the snapshot files. The snapshot should not be deleted while there
052 * are open scanners reading from snapshot files.
053 *
054 * <p>
055 * An internal RegionScanner is used to execute the {@link Scan} obtained
056 * from the user for each region in the snapshot.
057 * <p>
058 * HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from
059 * snapshot files and data files. HBase also enforces security because all the requests are handled
060 * by the server layer, and the user cannot read from the data files directly. To read from snapshot
061 * files directly from the file system, the user who is running the MR job must have sufficient
062 * permissions to access snapshot and reference files. This means that to run mapreduce over
063 * snapshot files, the job has to be run as the HBase user or the user must have group or other
064 * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
065 * snapshot/data files will completely circumvent the access control enforced by HBase.
066 * See org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.
067 */
068@InterfaceAudience.Private
069public class TableSnapshotScanner extends AbstractClientScanner {
070
071  private static final Logger LOG = LoggerFactory.getLogger(TableSnapshotScanner.class);
072
073  private Configuration conf;
074  private String snapshotName;
075  private FileSystem fs;
076  private Path rootDir;
077  private Path restoreDir;
078  private Scan scan;
079  private ArrayList<RegionInfo> regions;
080  private TableDescriptor htd;
081  private final boolean snapshotAlreadyRestored;
082
083  private ClientSideRegionScanner currentRegionScanner  = null;
084  private int currentRegion = -1;
085
086  private int numOfCompleteRows = 0;
087  /**
088   * Creates a TableSnapshotScanner.
089   * @param conf the configuration
090   * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
091   *          have write permissions to this directory, and this should not be a subdirectory of
092   *          rootDir. The scanner deletes the contents of the directory once the scanner is closed.
093   * @param snapshotName the name of the snapshot to read from
094   * @param scan a Scan representing scan parameters
095   * @throws IOException in case of error
096   */
097  public TableSnapshotScanner(Configuration conf, Path restoreDir, String snapshotName, Scan scan)
098      throws IOException {
099    this(conf, CommonFSUtils.getRootDir(conf), restoreDir, snapshotName, scan);
100  }
101
102  public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir,
103      String snapshotName, Scan scan) throws IOException {
104    this(conf, rootDir, restoreDir, snapshotName, scan, false);
105  }
106
107  /**
108   * Creates a TableSnapshotScanner.
109   * @param conf the configuration
110   * @param rootDir root directory for HBase.
111   * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
112   *          have write permissions to this directory, and this should not be a subdirectory of
113   *          rootdir. The scanner deletes the contents of the directory once the scanner is closed.
114   * @param snapshotName the name of the snapshot to read from
115   * @param scan a Scan representing scan parameters
116   * @param snapshotAlreadyRestored true to indicate that snapshot has been restored.
117   * @throws IOException in case of error
118   */
119  public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir,
120      String snapshotName, Scan scan, boolean snapshotAlreadyRestored) throws IOException {
121    this.conf = conf;
122    this.snapshotName = snapshotName;
123    this.rootDir = rootDir;
124    this.scan = scan;
125    this.snapshotAlreadyRestored = snapshotAlreadyRestored;
126    this.fs = rootDir.getFileSystem(conf);
127
128    if (snapshotAlreadyRestored) {
129      this.restoreDir = restoreDir;
130      openWithoutRestoringSnapshot();
131    } else {
132      // restoreDir will be deleted in close(), use a unique sub directory
133      this.restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
134      openWithRestoringSnapshot();
135    }
136
137    initScanMetrics(scan);
138  }
139
140  private void openWithoutRestoringSnapshot() throws IOException {
141    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
142    SnapshotProtos.SnapshotDescription snapshotDesc =
143        SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
144
145    SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
146    List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
147    if (regionManifests == null) {
148      throw new IllegalArgumentException("Snapshot seems empty, snapshotName: " + snapshotName);
149    }
150
151    regions = new ArrayList<>(regionManifests.size());
152    regionManifests.stream().map(r -> HRegionInfo.convert(r.getRegionInfo()))
153        .filter(this::isValidRegion).sorted().forEach(r -> regions.add(r));
154    htd = manifest.getTableDescriptor();
155  }
156
157  private boolean isValidRegion(RegionInfo hri) {
158    // An offline split parent region should be excluded.
159    if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
160      return false;
161    }
162    return PrivateCellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
163      hri.getEndKey());
164  }
165
166  private void openWithRestoringSnapshot() throws IOException {
167    final RestoreSnapshotHelper.RestoreMetaChanges meta =
168        RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
169    final List<RegionInfo> restoredRegions = meta.getRegionsToAdd();
170
171    htd = meta.getTableDescriptor();
172    regions = new ArrayList<>(restoredRegions.size());
173    restoredRegions.stream().filter(this::isValidRegion).sorted().forEach(r -> regions.add(r));
174  }
175
176  @Override
177  public Result next() throws IOException {
178    Result result = null;
179    while (true) {
180      if (currentRegionScanner == null) {
181        currentRegion++;
182        if (currentRegion >= regions.size()) {
183          return null;
184        }
185
186        RegionInfo hri = regions.get(currentRegion);
187        currentRegionScanner = new ClientSideRegionScanner(conf, fs,
188          restoreDir, htd, hri, scan, scanMetrics);
189        if (this.scanMetrics != null) {
190          this.scanMetrics.countOfRegions.incrementAndGet();
191        }
192      }
193
194      try {
195        result = currentRegionScanner.next();
196        if (result != null) {
197          if (scan.getLimit() > 0 && ++this.numOfCompleteRows > scan.getLimit()) {
198            result = null;
199          }
200          return result;
201        }
202      } finally {
203        if (result == null) {
204          currentRegionScanner.close();
205          currentRegionScanner = null;
206        }
207      }
208    }
209  }
210
211  private void cleanup() {
212    try {
213      if (fs.exists(this.restoreDir)) {
214        if (!fs.delete(this.restoreDir, true)) {
215          LOG.warn(
216            "Delete restore directory for the snapshot failed. restoreDir: " + this.restoreDir);
217        }
218      }
219    } catch (IOException ex) {
220      LOG.warn(
221        "Could not delete restore directory for the snapshot. restoreDir: " + this.restoreDir, ex);
222    }
223  }
224
225  @Override
226  public void close() {
227    if (currentRegionScanner != null) {
228      currentRegionScanner.close();
229    }
230    // if snapshotAlreadyRestored is true, then we should invoke cleanup() method by hand.
231    if (!this.snapshotAlreadyRestored) {
232      cleanup();
233    }
234  }
235
236  @Override
237  public boolean renewLease() {
238    throw new UnsupportedOperationException();
239  }
240
241}