View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.regex.Matcher;
24  import java.util.regex.Pattern;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileStatus;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
34  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
35  import org.apache.hadoop.hbase.io.HFileLink;
36  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
37  import org.apache.hadoop.hbase.io.Reference;
38  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.util.FSUtils;
41  
42  /**
43   * Describe a StoreFile (hfile, reference, link)
44   */
45  @InterfaceAudience.Private
46  public class StoreFileInfo {
47    public static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
48  
49    /**
50     * A non-capture group, for hfiles, so that this can be embedded.
51     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
52     */
53    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?";
54  
55    /** Regex that will work for hfiles */
56    private static final Pattern HFILE_NAME_PATTERN =
57      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
58  
59    /**
60     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
61     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
62     * If reference, then the regex has more than just one group.
63     * Group 1, hfile/hfilelink pattern, is this file's id.
64     * Group 2 '(.+)' is the reference's parent region name.
65     */
66    private static final Pattern REF_NAME_PATTERN =
67      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
68        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
69  
70    // Configuration
71    private Configuration conf;
72  
73    // HDFS blocks distribution information
74    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
75  
76    // If this storefile references another, this is the reference instance.
77    private final Reference reference;
78  
79    // If this storefile is a link to another, this is the link instance.
80    private final HFileLink link;
81  
82    // FileSystem information for the file.
83    private final FileStatus fileStatus;
84  
85    private RegionCoprocessorHost coprocessorHost;
86  
87    /**
88     * Create a Store File Info
89     * @param conf the {@link Configuration} to use
90     * @param fs The current file system to use.
91     * @param path The {@link Path} of the file
92     */
93    public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path path)
94        throws IOException {
95      this(conf, fs, fs.getFileStatus(path));
96    }
97  
98    /**
99     * Create a Store File Info
100    * @param conf the {@link Configuration} to use
101    * @param fs The current file system to use.
102    * @param fileStatus The {@link FileStatus} of the file
103    */
104   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
105       throws IOException {
106     this.conf = conf;
107     this.fileStatus = fileStatus;
108     Path p = fileStatus.getPath();
109     if (HFileLink.isHFileLink(p)) {
110       // HFileLink
111       this.reference = null;
112       this.link = new HFileLink(conf, p);
113       if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
114     } else if (isReference(p)) {
115       this.reference = Reference.read(fs, p);
116       Path referencePath = getReferredToFile(p);
117       if (HFileLink.isHFileLink(referencePath)) {
118         // HFileLink Reference
119         this.link = new HFileLink(conf, referencePath);
120       } else {
121         // Reference
122         this.link = null;
123       }
124       if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
125         " reference to " + referencePath);
126     } else if (isHFile(p)) {
127       // HFile
128       this.reference = null;
129       this.link = null;
130     } else {
131       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
132     }
133   }
134 
135   /**
136    * Sets the region coprocessor env.
137    * @param coprocessorHost
138    */
139   public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
140     this.coprocessorHost = coprocessorHost;
141   }
142 
143   /*
144    * @return the Reference object associated to this StoreFileInfo.
145    *         null if the StoreFile is not a reference.
146    */
147   Reference getReference() {
148     return this.reference;
149   }
150 
151   /** @return True if the store file is a Reference */
152   public boolean isReference() {
153     return this.reference != null;
154   }
155 
156   /** @return True if the store file is a top Reference */
157   public boolean isTopReference() {
158     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
159   }
160 
161   /** @return True if the store file is a link */
162   public boolean isLink() {
163     return this.link != null && this.reference == null;
164   }
165 
166   /** @return the HDFS block distribution */
167   public HDFSBlocksDistribution getHDFSBlockDistribution() {
168     return this.hdfsBlocksDistribution;
169   }
170 
171   /**
172    * Open a Reader for the StoreFile
173    * @param fs The current file system to use.
174    * @param cacheConf The cache configuration and block cache reference.
175    * @return The StoreFile.Reader for the file
176    */
177   public StoreFile.Reader open(final FileSystem fs,
178       final CacheConfig cacheConf) throws IOException {
179     FSDataInputStreamWrapper in;
180     FileStatus status;
181 
182     if (this.link != null) {
183       // HFileLink
184       in = new FSDataInputStreamWrapper(fs, this.link);
185       status = this.link.getFileStatus(fs);
186     } else if (this.reference != null) {
187       // HFile Reference
188       Path referencePath = getReferredToFile(this.getPath());
189       in = new FSDataInputStreamWrapper(fs, referencePath);
190       status = fs.getFileStatus(referencePath);
191     } else {
192       in = new FSDataInputStreamWrapper(fs, this.getPath());
193       status = fileStatus;
194     }
195     long length = status.getLen();
196     if (this.reference != null) {
197       hdfsBlocksDistribution = computeRefFileHDFSBlockDistribution(fs, reference, status);
198     } else {
199       hdfsBlocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, length);
200     }
201     StoreFile.Reader reader = null;
202     if (this.coprocessorHost != null) {
203       reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
204         cacheConf, reference);
205     }
206     if (reader == null) {
207       if (this.reference != null) {
208         reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
209           conf);
210       } else {
211         reader = new StoreFile.Reader(fs, this.getPath(), in, length, cacheConf, conf);
212       }
213     }
214     if (this.coprocessorHost != null) {
215       reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
216         cacheConf, reference, reader);
217     }
218     return reader;
219   }
220 
221   /**
222    * Compute the HDFS Block Distribution for this StoreFile
223    */
224   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
225       throws IOException {
226     FileStatus status;
227     if (this.reference != null) {
228       if (this.link != null) {
229         // HFileLink Reference
230         status = link.getFileStatus(fs);
231       } else {
232         // HFile Reference
233         Path referencePath = getReferredToFile(this.getPath());
234         status = fs.getFileStatus(referencePath);
235       }
236       return computeRefFileHDFSBlockDistribution(fs, reference, status);
237     } else {
238       if (this.link != null) {
239         // HFileLink
240         status = link.getFileStatus(fs);
241       } else {
242         status = this.fileStatus;
243       }
244       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
245     }
246   }
247 
248   /** @return The {@link Path} of the file */
249   public Path getPath() {
250     return this.fileStatus.getPath();
251   }
252 
253   /** @return The {@link FileStatus} of the file */
254   public FileStatus getFileStatus() {
255     return this.fileStatus;
256   }
257 
258   /** @return Get the modification time of the file. */
259   public long getModificationTime() {
260     return this.fileStatus.getModificationTime();
261   }
262 
263   @Override
264   public String toString() {
265     return this.getPath() +
266       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
267   }
268 
269   /**
270    * @param path Path to check.
271    * @return True if the path has format of a HFile.
272    */
273   public static boolean isHFile(final Path path) {
274     return isHFile(path.getName());
275   }
276 
277   public static boolean isHFile(final String fileName) {
278     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
279     return m.matches() && m.groupCount() > 0;
280   }
281 
282   /**
283    * @param path Path to check.
284    * @return True if the path has format of a HStoreFile reference.
285    */
286   public static boolean isReference(final Path path) {
287     return isReference(path.getName());
288   }
289 
290   /**
291    * @param name file name to check.
292    * @return True if the path has format of a HStoreFile reference.
293    */
294   public static boolean isReference(final String name) {
295     Matcher m = REF_NAME_PATTERN.matcher(name);
296     return m.matches() && m.groupCount() > 1;
297   }
298 
299   /*
300    * Return path to the file referred to by a Reference.  Presumes a directory
301    * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
302    * @param p Path to a Reference file.
303    * @return Calculated path to parent region file.
304    * @throws IllegalArgumentException when path regex fails to match.
305    */
306   public static Path getReferredToFile(final Path p) {
307     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
308     if (m == null || !m.matches()) {
309       LOG.warn("Failed match of store file name " + p.toString());
310       throw new IllegalArgumentException("Failed match of store file name " +
311           p.toString());
312     }
313 
314     // Other region name is suffix on the passed Reference file name
315     String otherRegion = m.group(2);
316     // Tabledir is up two directories from where Reference was written.
317     Path tableDir = p.getParent().getParent().getParent();
318     String nameStrippedOfSuffix = m.group(1);
319     LOG.debug("reference '" + p + "' to region=" + otherRegion + " hfile=" + nameStrippedOfSuffix);
320 
321     // Build up new path with the referenced region in place of our current
322     // region in the reference path.  Also strip regionname suffix from name.
323     return new Path(new Path(new Path(tableDir, otherRegion),
324       p.getParent().getName()), nameStrippedOfSuffix);
325   }
326 
327   /**
328    * Validate the store file name.
329    * @param fileName name of the file to validate
330    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
331    */
332   public static boolean validateStoreFileName(final String fileName) {
333     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
334       return(true);
335     return !fileName.contains("-");
336   }
337 
338   /**
339    * Return if the specified file is a valid store file or not.
340    * @param fileStatus The {@link FileStatus} of the file
341    * @return <tt>true</tt> if the file is valid
342    */
343   public static boolean isValid(final FileStatus fileStatus)
344       throws IOException {
345     final Path p = fileStatus.getPath();
346 
347     if (fileStatus.isDir())
348       return false;
349 
350     // Check for empty hfile. Should never be the case but can happen
351     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
352     // NOTE: that the HFileLink is just a name, so it's an empty file.
353     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
354       LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
355       return false;
356     }
357 
358     return validateStoreFileName(p.getName());
359   }
360 
361   /**
362    * helper function to compute HDFS blocks distribution of a given reference
363    * file.For reference file, we don't compute the exact value. We use some
364    * estimate instead given it might be good enough. we assume bottom part
365    * takes the first half of reference file, top part takes the second half
366    * of the reference file. This is just estimate, given
367    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
368    * If this estimate isn't good enough, we can improve it later.
369    * @param fs  The FileSystem
370    * @param reference  The reference
371    * @param status  The reference FileStatus
372    * @return HDFS blocks distribution
373    */
374   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
375       final FileSystem fs, final Reference reference, final FileStatus status)
376       throws IOException {
377     if (status == null) {
378       return null;
379     }
380 
381     long start = 0;
382     long length = 0;
383 
384     if (Reference.isTopFileRegion(reference.getFileRegion())) {
385       start = status.getLen()/2;
386       length = status.getLen() - status.getLen()/2;
387     } else {
388       start = 0;
389       length = status.getLen()/2;
390     }
391     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
392   }
393 }