View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.IOException;
23  import java.util.regex.Matcher;
24  import java.util.regex.Pattern;
25  
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.classification.InterfaceAudience;
29  import org.apache.hadoop.conf.Configuration;
30  import org.apache.hadoop.fs.FileStatus;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
34  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
35  import org.apache.hadoop.hbase.io.HFileLink;
36  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
37  import org.apache.hadoop.hbase.io.Reference;
38  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.util.FSUtils;
41  
42  /**
43   * Describe a StoreFile (hfile, reference, link)
44   */
45  @InterfaceAudience.Private
46  public class StoreFileInfo {
47    public static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
48  
49    /**
50     * A non-capture group, for hfiles, so that this can be embedded.
51     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
52     */
53    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?";
54  
55    /** Regex that will work for hfiles */
56    private static final Pattern HFILE_NAME_PATTERN =
57      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
58  
59    /**
60     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
61     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
62     * If reference, then the regex has more than just one group.
63     * Group 1, hfile/hfilelink pattern, is this file's id.
64     * Group 2 '(.+)' is the reference's parent region name.
65     */
66    private static final Pattern REF_NAME_PATTERN =
67      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
68        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
69  
70    // Configuration
71    private Configuration conf;
72  
73    // HDFS blocks distribution information
74    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
75  
76    // If this storefile references another, this is the reference instance.
77    private final Reference reference;
78  
79    // If this storefile is a link to another, this is the link instance.
80    private final HFileLink link;
81  
82    // FileSystem information for the file.
83    private final FileStatus fileStatus;
84  
85    private RegionCoprocessorHost coprocessorHost;
86  
87    /**
88     * Create a Store File Info
89     * @param conf the {@link Configuration} to use
90     * @param fs The current file system to use.
91     * @param path The {@link Path} of the file
92     */
93    public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path path)
94        throws IOException {
95      this(conf, fs, fs.getFileStatus(path));
96    }
97  
98    /**
99     * Create a Store File Info
100    * @param conf the {@link Configuration} to use
101    * @param fs The current file system to use.
102    * @param fileStatus The {@link FileStatus} of the file
103    */
104   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
105       throws IOException {
106     this.conf = conf;
107     this.fileStatus = fileStatus;
108     Path p = fileStatus.getPath();
109     if (HFileLink.isHFileLink(p)) {
110       // HFileLink
111       this.reference = null;
112       this.link = new HFileLink(conf, p);
113       if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
114     } else if (isReference(p)) {
115       this.reference = Reference.read(fs, p);
116       Path referencePath = getReferredToFile(p);
117       if (HFileLink.isHFileLink(referencePath)) {
118         // HFileLink Reference
119         this.link = new HFileLink(conf, referencePath);
120       } else {
121         // Reference
122         this.link = null;
123       }
124       if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
125         " reference to " + referencePath);
126     } else if (isHFile(p)) {
127       // HFile
128       this.reference = null;
129       this.link = null;
130     } else {
131       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
132     }
133   }
134 
135   /**
136    * Sets the region coprocessor env.
137    * @param coprocessorHost
138    */
139   public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
140     this.coprocessorHost = coprocessorHost;
141   }
142 
143   /*
144    * @return the Reference object associated to this StoreFileInfo.
145    *         null if the StoreFile is not a reference.
146    */
147   public Reference getReference() {
148     return this.reference;
149   }
150 
151   /** @return True if the store file is a Reference */
152   public boolean isReference() {
153     return this.reference != null;
154   }
155 
156   /** @return True if the store file is a top Reference */
157   public boolean isTopReference() {
158     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
159   }
160 
161   /** @return True if the store file is a link */
162   public boolean isLink() {
163     return this.link != null && this.reference == null;
164   }
165 
166   /** @return the HDFS block distribution */
167   public HDFSBlocksDistribution getHDFSBlockDistribution() {
168     return this.hdfsBlocksDistribution;
169   }
170 
171   /**
172    * Open a Reader for the StoreFile
173    * @param fs The current file system to use.
174    * @param cacheConf The cache configuration and block cache reference.
175    * @return The StoreFile.Reader for the file
176    */
177   public StoreFile.Reader open(final FileSystem fs,
178       final CacheConfig cacheConf) throws IOException {
179     FSDataInputStreamWrapper in;
180     FileStatus status;
181 
182     if (this.link != null) {
183       // HFileLink
184       in = new FSDataInputStreamWrapper(fs, this.link);
185       status = this.link.getFileStatus(fs);
186     } else if (this.reference != null) {
187       // HFile Reference
188       Path referencePath = getReferredToFile(this.getPath());
189       in = new FSDataInputStreamWrapper(fs, referencePath);
190       status = fs.getFileStatus(referencePath);
191     } else {
192       in = new FSDataInputStreamWrapper(fs, this.getPath());
193       status = fileStatus;
194     }
195     long length = status.getLen();
196     if (this.reference != null) {
197       hdfsBlocksDistribution = computeRefFileHDFSBlockDistribution(fs, reference, status);
198     } else {
199       hdfsBlocksDistribution = FSUtils.computeHDFSBlocksDistribution(fs, status, 0, length);
200     }
201     StoreFile.Reader reader = null;
202     if (this.coprocessorHost != null) {
203       reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
204         cacheConf, reference);
205     }
206     if (reader == null) {
207       if (this.reference != null) {
208         reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
209           conf);
210       } else {
211         reader = new StoreFile.Reader(fs, this.getPath(), in, length, cacheConf, conf);
212       }
213     }
214     if (this.coprocessorHost != null) {
215       reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
216         cacheConf, reference, reader);
217     }
218     return reader;
219   }
220 
221   /**
222    * Compute the HDFS Block Distribution for this StoreFile
223    */
224   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
225       throws IOException {
226     FileStatus status = getReferencedFileStatus(fs);
227     if (this.reference != null) {
228       return computeRefFileHDFSBlockDistribution(fs, reference, status);
229     } else {
230       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
231     }
232   }
233 
234   /**
235    * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
236    * @param fs The current file system to use.
237    * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
238    */
239   public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
240     FileStatus status;
241     if (this.reference != null) {
242       if (this.link != null) {
243         // HFileLink Reference
244         status = link.getFileStatus(fs);
245       } else {
246         // HFile Reference
247         Path referencePath = getReferredToFile(this.getPath());
248         status = fs.getFileStatus(referencePath);
249       }
250     } else {
251       if (this.link != null) {
252         // HFileLink
253         status = link.getFileStatus(fs);
254       } else {
255         status = this.fileStatus;
256       }
257     }
258     return status;
259   }
260 
261   /** @return The {@link Path} of the file */
262   public Path getPath() {
263     return this.fileStatus.getPath();
264   }
265 
266   /** @return The {@link FileStatus} of the file */
267   public FileStatus getFileStatus() {
268     return this.fileStatus;
269   }
270 
271   /** @return Get the modification time of the file. */
272   public long getModificationTime() {
273     return this.fileStatus.getModificationTime();
274   }
275 
276   @Override
277   public String toString() {
278     return this.getPath() +
279       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
280   }
281 
282   /**
283    * @param path Path to check.
284    * @return True if the path has format of a HFile.
285    */
286   public static boolean isHFile(final Path path) {
287     return isHFile(path.getName());
288   }
289 
290   public static boolean isHFile(final String fileName) {
291     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
292     return m.matches() && m.groupCount() > 0;
293   }
294 
295   /**
296    * @param path Path to check.
297    * @return True if the path has format of a HStoreFile reference.
298    */
299   public static boolean isReference(final Path path) {
300     return isReference(path.getName());
301   }
302 
303   /**
304    * @param name file name to check.
305    * @return True if the path has format of a HStoreFile reference.
306    */
307   public static boolean isReference(final String name) {
308     Matcher m = REF_NAME_PATTERN.matcher(name);
309     return m.matches() && m.groupCount() > 1;
310   }
311 
312   /*
313    * Return path to the file referred to by a Reference.  Presumes a directory
314    * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
315    * @param p Path to a Reference file.
316    * @return Calculated path to parent region file.
317    * @throws IllegalArgumentException when path regex fails to match.
318    */
319   public static Path getReferredToFile(final Path p) {
320     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
321     if (m == null || !m.matches()) {
322       LOG.warn("Failed match of store file name " + p.toString());
323       throw new IllegalArgumentException("Failed match of store file name " +
324           p.toString());
325     }
326 
327     // Other region name is suffix on the passed Reference file name
328     String otherRegion = m.group(2);
329     // Tabledir is up two directories from where Reference was written.
330     Path tableDir = p.getParent().getParent().getParent();
331     String nameStrippedOfSuffix = m.group(1);
332     LOG.debug("reference '" + p + "' to region=" + otherRegion + " hfile=" + nameStrippedOfSuffix);
333 
334     // Build up new path with the referenced region in place of our current
335     // region in the reference path.  Also strip regionname suffix from name.
336     return new Path(new Path(new Path(tableDir, otherRegion),
337       p.getParent().getName()), nameStrippedOfSuffix);
338   }
339 
340   /**
341    * Validate the store file name.
342    * @param fileName name of the file to validate
343    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
344    */
345   public static boolean validateStoreFileName(final String fileName) {
346     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
347       return(true);
348     return !fileName.contains("-");
349   }
350 
351   /**
352    * Return if the specified file is a valid store file or not.
353    * @param fileStatus The {@link FileStatus} of the file
354    * @return <tt>true</tt> if the file is valid
355    */
356   public static boolean isValid(final FileStatus fileStatus)
357       throws IOException {
358     final Path p = fileStatus.getPath();
359 
360     if (fileStatus.isDirectory())
361       return false;
362 
363     // Check for empty hfile. Should never be the case but can happen
364     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
365     // NOTE: that the HFileLink is just a name, so it's an empty file.
366     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
367       LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
368       return false;
369     }
370 
371     return validateStoreFileName(p.getName());
372   }
373 
374   /**
375    * helper function to compute HDFS blocks distribution of a given reference
376    * file.For reference file, we don't compute the exact value. We use some
377    * estimate instead given it might be good enough. we assume bottom part
378    * takes the first half of reference file, top part takes the second half
379    * of the reference file. This is just estimate, given
380    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
381    * If this estimate isn't good enough, we can improve it later.
382    * @param fs  The FileSystem
383    * @param reference  The reference
384    * @param status  The reference FileStatus
385    * @return HDFS blocks distribution
386    */
387   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
388       final FileSystem fs, final Reference reference, final FileStatus status)
389       throws IOException {
390     if (status == null) {
391       return null;
392     }
393 
394     long start = 0;
395     long length = 0;
396 
397     if (Reference.isTopFileRegion(reference.getFileRegion())) {
398       start = status.getLen()/2;
399       length = status.getLen() - status.getLen()/2;
400     } else {
401       start = 0;
402       length = status.getLen()/2;
403     }
404     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
405   }
406 }