View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.client;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Collections;
24  import java.util.List;
25  import java.util.UUID;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileSystem;
31  import org.apache.hadoop.fs.Path;
32  import org.apache.hadoop.hbase.CellUtil;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.HTableDescriptor;
35  import org.apache.hadoop.hbase.classification.InterfaceAudience;
36  import org.apache.hadoop.hbase.classification.InterfaceStability;
37  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription;
38  import org.apache.hadoop.hbase.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
39  import org.apache.hadoop.hbase.snapshot.RestoreSnapshotHelper;
40  import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
41  import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
42  import org.apache.hadoop.hbase.util.FSUtils;
43  
44  /**
45   * A Scanner which performs a scan over snapshot files. Using this class requires copying the
46   * snapshot to a temporary empty directory, which will copy the snapshot reference files into that
47   * directory. Actual data files are not copied.
48   *
49   * <p>
50   * This also allows one to run the scan from an
51   * online or offline hbase cluster. The snapshot files can be exported by using the
52   * {@link org.apache.hadoop.hbase.snapshot.ExportSnapshot} tool, to a pure-hdfs cluster, 
53   * and this scanner can be used to run the scan directly over the snapshot files. 
54   * The snapshot should not be deleted while there are open scanners reading from snapshot 
55   * files.
56   *
57   * <p>
58   * An internal RegionScanner is used to execute the {@link Scan} obtained
59   * from the user for each region in the snapshot.
60   * <p>
61   * HBase owns all the data and snapshot files on the filesystem. Only the HBase user can read from
62   * snapshot files and data files. HBase also enforces security because all the requests are handled
63   * by the server layer, and the user cannot read from the data files directly. To read from snapshot
64   * files directly from the file system, the user who is running the MR job must have sufficient
65   * permissions to access snapshot and reference files. This means that to run mapreduce over
66   * snapshot files, the job has to be run as the HBase user or the user must have group or other
67   * priviledges in the filesystem (See HBASE-8369). Note that, given other users access to read from
68   * snapshot/data files will completely circumvent the access control enforced by HBase.
69   * @see org.apache.hadoop.hbase.mapreduce.TableSnapshotInputFormat
70   */
71  @InterfaceAudience.Public
72  @InterfaceStability.Evolving
73  public class TableSnapshotScanner extends AbstractClientScanner {
74  
75    private static final Log LOG = LogFactory.getLog(TableSnapshotScanner.class);
76  
77    private Configuration conf;
78    private String snapshotName;
79    private FileSystem fs;
80    private Path rootDir;
81    private Path restoreDir;
82    private Scan scan;
83    private ArrayList<HRegionInfo> regions;
84    private HTableDescriptor htd;
85  
86    private ClientSideRegionScanner currentRegionScanner  = null;
87    private int currentRegion = -1;
88  
89    /**
90     * Creates a TableSnapshotScanner.
91     * @param conf the configuration
92     * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
93     * have write permissions to this directory, and this should not be a subdirectory of rootdir.
94     * The scanner deletes the contents of the directory once the scanner is closed.
95     * @param snapshotName the name of the snapshot to read from
96     * @param scan a Scan representing scan parameters
97     * @throws IOException in case of error
98     */
99    public TableSnapshotScanner(Configuration conf, Path restoreDir,
100       String snapshotName, Scan scan) throws IOException {
101     this(conf, FSUtils.getRootDir(conf), restoreDir, snapshotName, scan);
102   }
103 
104   /**
105    * Creates a TableSnapshotScanner.
106    * @param conf the configuration
107    * @param rootDir root directory for HBase.
108    * @param restoreDir a temporary directory to copy the snapshot files into. Current user should
109    * have write permissions to this directory, and this should not be a subdirectory of rootdir.
110    * The scanner deletes the contents of the directory once the scanner is closed.
111    * @param snapshotName the name of the snapshot to read from
112    * @param scan a Scan representing scan parameters
113    * @throws IOException in case of error
114    */
115   public TableSnapshotScanner(Configuration conf, Path rootDir,
116       Path restoreDir, String snapshotName, Scan scan) throws IOException {
117     this.conf = conf;
118     this.snapshotName = snapshotName;
119     this.rootDir = rootDir;
120     // restoreDir will be deleted in close(), use a unique sub directory
121     this.restoreDir = new Path(restoreDir, UUID.randomUUID().toString());
122     this.scan = scan;
123     this.fs = rootDir.getFileSystem(conf);
124     init();
125   }
126 
127   private void init() throws IOException {
128     Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
129     SnapshotDescription snapshotDesc = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
130     SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, snapshotDesc);
131 
132     // load table descriptor
133     htd = manifest.getTableDescriptor();
134 
135     List<SnapshotRegionManifest> regionManifests = manifest.getRegionManifests();
136     if (regionManifests == null) {
137       throw new IllegalArgumentException("Snapshot seems empty");
138     }
139 
140     regions = new ArrayList<HRegionInfo>(regionManifests.size());
141     for (SnapshotRegionManifest regionManifest : regionManifests) {
142       // load region descriptor
143       HRegionInfo hri = HRegionInfo.convert(regionManifest.getRegionInfo());
144 
145       if (CellUtil.overlappingKeys(scan.getStartRow(), scan.getStopRow(),
146           hri.getStartKey(), hri.getEndKey())) {
147         regions.add(hri);
148       }
149     }
150 
151     // sort for regions according to startKey.
152     Collections.sort(regions);
153 
154     initScanMetrics(scan);
155 
156     RestoreSnapshotHelper.copySnapshotForScanner(conf, fs,
157       rootDir, restoreDir, snapshotName);
158   }
159 
160   @Override
161   public Result next() throws IOException {
162     Result result = null;
163     while (true) {
164       if (currentRegionScanner == null) {
165         currentRegion++;
166         if (currentRegion >= regions.size()) {
167           return null;
168         }
169 
170         HRegionInfo hri = regions.get(currentRegion);
171         currentRegionScanner = new ClientSideRegionScanner(conf, fs,
172           restoreDir, htd, hri, scan, scanMetrics);
173         if (this.scanMetrics != null) {
174           this.scanMetrics.countOfRegions.incrementAndGet();
175         }
176       }
177 
178       try {
179         result = currentRegionScanner.next();
180         if (result != null) {
181           return result;
182         }
183       } finally {
184         if (result == null) {
185           currentRegionScanner.close();
186           currentRegionScanner = null;
187         }        
188       }
189     }
190   }
191 
192   @Override
193   public void close() {
194     if (currentRegionScanner != null) {
195       currentRegionScanner.close();
196     }
197     try {
198       fs.delete(this.restoreDir, true);
199     } catch (IOException ex) {
200       LOG.warn("Could not delete restore directory for the snapshot:" + ex);
201     }
202   }
203 
204   @Override
205   public boolean renewLease() {
206     throw new UnsupportedOperationException();
207   }
208 
209 }