001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.client;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.UUID;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.fs.FileSystem;
026import org.apache.hadoop.fs.Path;
027import org.apache.hadoop.hbase.HRegionInfo;
028import org.apache.hadoop.hbase.PrivateCellUtil;
029import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
030import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
031import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
032import org.apache.hadoop.hbase.util.CommonFSUtils;
033import org.apache.yetus.audience.InterfaceAudience;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036
037import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos;
038import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
039
040/**
041 * A Scanner which performs a scan over snapshot files. Using this class requires copying the
042 * snapshot to a temporary empty directory, which will copy the snapshot reference files into that
043 * directory. Actual data files are not copied.
044 * <p>
045 * This also allows one to run the scan from an online or offline hbase cluster. The snapshot files
046 * can be exported by using the org.apache.hadoop.hbase.snapshot.ExportSnapshot tool, to a pure-hdfs
047 * cluster, and this scanner can be used to run the scan directly over the snapshot files. The
048 * snapshot should not be deleted while there are open scanners reading from snapshot files.
049 * <p>
050 * An internal RegionScanner is used to execute the {@link Scan} obtained from the user for each
051 * region in the snapshot.
052 * <p>
053 * HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from
054 * snapshot files and data files. HBase also enforces security because all the requests are handled
055 * by the server layer, and the user cannot read from the data files directly. To read from snapshot
056 * files directly from the file system, the user who is running the MR job must have sufficient
057 * permissions to access snapshot and reference files. This means that to run mapreduce over
058 * snapshot files, the job has to be run as the HBase user or the user must have group or other
059 * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
060 * snapshot/data files will completely circumvent the access control enforced by HBase. See
061 * org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat.
062 */
063@InterfaceAudience.Private
064public class TableSnapshotScanner extends AbstractClientScanner {
065
066  private static final Logger LOG = LoggerFactory.getLogger(TableSnapshotScanner.class);
067
068  private Configuration conf;
069  private String snapshotName;
070  private FileSystem fs;
071  private Path rootDir;
072  private Path restoreDir;
073  private Scan scan;
074  private ArrayList<RegionInfo> regions;
075  private TableDescriptor htd;
076  private final boolean snapshotAlreadyRestored;
077
078  private ClientSideRegionScanner currentRegionScanner = null;
079  private int currentRegion = -1;
080
081  private int numOfCompleteRows = 0;
082
083  /**
084   * Creates a TableSnapshotScanner.
085   * @param conf         the configuration
086   * @param restoreDir   a temporary directory to copy the snapshot files into. Current user should
087   *                     have write permissions to this directory, and this should not be a
088   *                     subdirectory of rootDir. The scanner deletes the contents of the directory
089   *                     once the scanner is closed.
090   * @param snapshotName the name of the snapshot to read from
091   * @param scan         a Scan representing scan parameters
092   * @throws IOException in case of error
093   */
094  public TableSnapshotScanner(Configuration conf, Path restoreDir, String snapshotName, Scan scan)
095    throws IOException {
096    this(conf, CommonFSUtils.getRootDir(conf), restoreDir, snapshotName, scan);
097  }
098
099  public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir,
100    String snapshotName, Scan scan) throws IOException {
101    this(conf, rootDir, restoreDir, snapshotName, scan, false);
102  }
103
104  /**
105   * Creates a TableSnapshotScanner.
106   * @param conf                    the configuration
107   * @param rootDir                 root directory for HBase.
108   * @param restoreDir              a temporary directory to copy the snapshot files into. Current
109   *                                user should have write permissions to this directory, and this
110   *                                should not be a subdirectory of rootdir. The scanner deletes the
111   *                                contents of the directory once the scanner is closed.
112   * @param snapshotName            the name of the snapshot to read from
113   * @param scan                    a Scan representing scan parameters
114   * @param snapshotAlreadyRestored true to indicate that snapshot has been restored.
115   * @throws IOException in case of error
116   */
117  public TableSnapshotScanner(Configuration conf, Path rootDir, Path restoreDir,
118    String snapshotName, Scan scan, boolean snapshotAlreadyRestored) throws IOException {
119    this.conf = conf;
120    this.snapshotName = snapshotName;
121    this.rootDir = rootDir;
122    this.scan = scan;
123    this.snapshotAlreadyRestored = snapshotAlreadyRestored;
124    this.fs = rootDir.getFileSystem(conf);
125
126    if (snapshotAlreadyRestored) {
127      this.restoreDir = restoreDir;
128      openWithoutRestoringSnapshot();
129    } else {
130      // restoreDir will be deleted in close(), use a unique sub directory
131      this.restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
132      openWithRestoringSnapshot();
133    }
134
135    initScanMetrics(scan);
136  }
137
138  private void openWithoutRestoringSnapshot() throws IOException {
139    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
140    SnapshotProtos.SnapshotDescription snapshotDesc =
141      SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
142
143    SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
144    List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
145    if (regionManifests == null) {
146      throw new IllegalArgumentException("Snapshot seems empty, snapshotName: " + snapshotName);
147    }
148
149    regions = new ArrayList<>(regionManifests.size());
150    regionManifests.stream().map(r -> HRegionInfo.convert(r.getRegionInfo()))
151      .filter(this::isValidRegion).sorted().forEach(r -> regions.add(r));
152    htd = manifest.getTableDescriptor();
153  }
154
155  private boolean isValidRegion(RegionInfo hri) {
156    // An offline split parent region should be excluded.
157    if (hri.isOffline() && (hri.isSplit() || hri.isSplitParent())) {
158      return false;
159    }
160    return PrivateCellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(), hri.getStartKey(),
161      hri.getEndKey());
162  }
163
164  private void openWithRestoringSnapshot() throws IOException {
165    final RestoreSnapshotHelper.RestoreMetaChanges meta =
166      RestoreSnapshotHelper.copySnapshotForScanner(conf, fs, rootDir, restoreDir, snapshotName);
167    final List<RegionInfo> restoredRegions = meta.getRegionsToAdd();
168
169    htd = meta.getTableDescriptor();
170    regions = new ArrayList<>(restoredRegions.size());
171    restoredRegions.stream().filter(this::isValidRegion).sorted().forEach(r -> regions.add(r));
172  }
173
174  @Override
175  public Result next() throws IOException {
176    Result result = null;
177    while (true) {
178      if (currentRegionScanner == null) {
179        currentRegion++;
180        if (currentRegion >= regions.size()) {
181          return null;
182        }
183
184        RegionInfo hri = regions.get(currentRegion);
185        currentRegionScanner =
186          new ClientSideRegionScanner(conf, fs, restoreDir, htd, hri, scan, scanMetrics);
187        if (this.scanMetrics != null) {
188          this.scanMetrics.countOfRegions.incrementAndGet();
189        }
190      }
191
192      try {
193        result = currentRegionScanner.next();
194        if (result != null) {
195          if (scan.getLimit() > 0 && ++this.numOfCompleteRows > scan.getLimit()) {
196            result = null;
197          }
198          return result;
199        }
200      } finally {
201        if (result == null) {
202          currentRegionScanner.close();
203          currentRegionScanner = null;
204        }
205      }
206    }
207  }
208
209  private void cleanup() {
210    try {
211      if (fs.exists(this.restoreDir)) {
212        if (!fs.delete(this.restoreDir, true)) {
213          LOG.warn(
214            "Delete restore directory for the snapshot failed. restoreDir: " + this.restoreDir);
215        }
216      }
217    } catch (IOException ex) {
218      LOG.warn(
219        "Could not delete restore directory for the snapshot. restoreDir: " + this.restoreDir, ex);
220    }
221  }
222
223  @Override
224  public void close() {
225    if (currentRegionScanner != null) {
226      currentRegionScanner.close();
227    }
228    // if snapshotAlreadyRestored is true, then we should invoke cleanup() method by hand.
229    if (!this.snapshotAlreadyRestored) {
230      cleanup();
231    }
232  }
233
234  @Override
235  public boolean renewLease() {
236    throw new UnsupportedOperationException();
237  }
238
239}