View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileStatus;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
35  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
36  import org.apache.hadoop.hbase.io.HFileLink;
37  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
38  import org.apache.hadoop.hbase.io.Reference;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.util.FSUtils;
41  
42  /**
43   * Describe a StoreFile (hfile, reference, link)
44   */
45  @InterfaceAudience.Private
46  public class StoreFileInfo {
47    public static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
48  
49    /**
50     * A non-capture group, for hfiles, so that this can be embedded.
51     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
52     */
53    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?";
54  
55    /** Regex that will work for hfiles */
56    private static final Pattern HFILE_NAME_PATTERN =
57      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
58  
59    /**
60     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
61     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
62     * If reference, then the regex has more than just one group.
63     * Group 1, hfile/hfilelink pattern, is this file's id.
64     * Group 2 '(.+)' is the reference's parent region name.
65     */
66    private static final Pattern REF_NAME_PATTERN =
67      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
68        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
69  
70    // Configuration
71    private Configuration conf;
72  
73    // FileSystem handle
74    private final FileSystem fs;
75  
76    // HDFS blocks distribution information
77    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
78  
79    // If this storefile references another, this is the reference instance.
80    private final Reference reference;
81  
82    // If this storefile is a link to another, this is the link instance.
83    private final HFileLink link;
84  
85    private final Path initialPath;
86  
87    private RegionCoprocessorHost coprocessorHost;
88  
89    /**
90     * Create a Store File Info
91     * @param conf the {@link Configuration} to use
92     * @param fs The current file system to use.
93     * @param initialPath The {@link Path} of the file
94     */
95    public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath)
96        throws IOException {
97      assert fs != null;
98      assert initialPath != null;
99      assert conf != null;
100 
101     this.fs = fs;
102     this.conf = conf;
103     this.initialPath = initialPath;
104     Path p = initialPath;
105     if (HFileLink.isHFileLink(p)) {
106       // HFileLink
107       this.reference = null;
108       this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
109       if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
110     } else if (isReference(p)) {
111       this.reference = Reference.read(fs, p);
112       Path referencePath = getReferredToFile(p);
113       if (HFileLink.isHFileLink(referencePath)) {
114         // HFileLink Reference
115         this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
116       } else {
117         // Reference
118         this.link = null;
119       }
120       if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
121               " reference to " + referencePath);
122     } else if (isHFile(p)) {
123       // HFile
124       this.reference = null;
125       this.link = null;
126     } else {
127       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
128     }
129   }
130 
131   /**
132    * Create a Store File Info
133    * @param conf the {@link Configuration} to use
134    * @param fs The current file system to use.
135    * @param fileStatus The {@link FileStatus} of the file
136    */
137   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
138       throws IOException {
139     this(conf, fs, fileStatus.getPath());
140   }
141 
142   /**
143    * Create a Store File Info from an HFileLink
144    * @param conf the {@link Configuration} to use
145    * @param fs The current file system to use.
146    * @param fileStatus The {@link FileStatus} of the file
147    */
148   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
149       final HFileLink link)
150       throws IOException {
151     this.fs = fs;
152     this.conf = conf;
153     // initialPath can be null only if we get a link.
154     this.initialPath = (fileStatus == null) ? null : fileStatus.getPath();
155       // HFileLink
156     this.reference = null;
157     this.link = link;
158   }
159 
160   /**
161    * Create a Store File Info from an HFileLink
162    * @param conf
163    * @param fs
164    * @param fileStatus
165    * @param reference
166    * @throws IOException
167    */
168   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
169       final Reference reference)
170       throws IOException {
171     this.fs = fs;
172     this.conf = conf;
173     this.initialPath = fileStatus.getPath();
174     this.reference = reference;
175     this.link = null;
176   }
177 
178   /**
179    * Sets the region coprocessor env.
180    * @param coprocessorHost
181    */
182   public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
183     this.coprocessorHost = coprocessorHost;
184   }
185 
186   /*
187    * @return the Reference object associated to this StoreFileInfo.
188    *         null if the StoreFile is not a reference.
189    */
190   public Reference getReference() {
191     return this.reference;
192   }
193 
194   /** @return True if the store file is a Reference */
195   public boolean isReference() {
196     return this.reference != null;
197   }
198 
199   /** @return True if the store file is a top Reference */
200   public boolean isTopReference() {
201     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
202   }
203 
204   /** @return True if the store file is a link */
205   public boolean isLink() {
206     return this.link != null && this.reference == null;
207   }
208 
209   /** @return the HDFS block distribution */
210   public HDFSBlocksDistribution getHDFSBlockDistribution() {
211     return this.hdfsBlocksDistribution;
212   }
213 
214   /**
215    * Open a Reader for the StoreFile
216    * @param fs The current file system to use.
217    * @param cacheConf The cache configuration and block cache reference.
218    * @return The StoreFile.Reader for the file
219    */
220   public StoreFile.Reader open(final FileSystem fs,
221       final CacheConfig cacheConf) throws IOException {
222     FSDataInputStreamWrapper in;
223     FileStatus status;
224 
225     if (this.link != null) {
226       // HFileLink
227       in = new FSDataInputStreamWrapper(fs, this.link);
228       status = this.link.getFileStatus(fs);
229     } else if (this.reference != null) {
230       // HFile Reference
231       Path referencePath = getReferredToFile(this.getPath());
232       in = new FSDataInputStreamWrapper(fs, referencePath);
233       status = fs.getFileStatus(referencePath);
234     } else {
235       in = new FSDataInputStreamWrapper(fs, this.getPath());
236       status = fs.getFileStatus(initialPath);
237     }
238     long length = status.getLen();
239     hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
240 
241     StoreFile.Reader reader = null;
242     if (this.coprocessorHost != null) {
243       reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
244         cacheConf, reference);
245     }
246     if (reader == null) {
247       if (this.reference != null) {
248         reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
249           conf);
250       } else {
251         reader = new StoreFile.Reader(fs, status.getPath(), in, length, cacheConf, conf);
252       }
253     }
254     if (this.coprocessorHost != null) {
255       reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
256         cacheConf, reference, reader);
257     }
258     return reader;
259   }
260 
261   /**
262    * Compute the HDFS Block Distribution for this StoreFile
263    */
264   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
265       throws IOException {
266 
267     // guard against the case where we get the FileStatus from link, but by the time we
268     // call compute the file is moved again
269     if (this.link != null) {
270       FileNotFoundException exToThrow = null;
271       for (int i = 0; i < this.link.getLocations().length; i++) {
272         try {
273           return computeHDFSBlocksDistributionInternal(fs);
274         } catch (FileNotFoundException ex) {
275           // try the other location
276           exToThrow = ex;
277         }
278       }
279       throw exToThrow;
280     } else {
281       return computeHDFSBlocksDistributionInternal(fs);
282     }
283   }
284 
285   private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
286       throws IOException {
287     FileStatus status = getReferencedFileStatus(fs);
288     if (this.reference != null) {
289       return computeRefFileHDFSBlockDistribution(fs, reference, status);
290     } else {
291       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
292     }
293   }
294 
295   /**
296    * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
297    * @param fs The current file system to use.
298    * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
299    */
300   public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
301     FileStatus status;
302     if (this.reference != null) {
303       if (this.link != null) {
304         FileNotFoundException exToThrow = null;
305         for (int i = 0; i < this.link.getLocations().length; i++) {
306           // HFileLink Reference
307           try {
308             return link.getFileStatus(fs);
309           } catch (FileNotFoundException ex) {
310             // try the other location
311             exToThrow = ex;
312           }
313         }
314         throw exToThrow;
315       } else {
316         // HFile Reference
317         Path referencePath = getReferredToFile(this.getPath());
318         status = fs.getFileStatus(referencePath);
319       }
320     } else {
321       if (this.link != null) {
322         FileNotFoundException exToThrow = null;
323         for (int i = 0; i < this.link.getLocations().length; i++) {
324           // HFileLink
325           try {
326             return link.getFileStatus(fs);
327           } catch (FileNotFoundException ex) {
328             // try the other location
329             exToThrow = ex;
330           }
331         }
332         throw exToThrow;
333       } else {
334         status = fs.getFileStatus(initialPath);
335       }
336     }
337     return status;
338   }
339 
340   /** @return The {@link Path} of the file */
341   public Path getPath() {
342     return initialPath;
343   }
344 
345   /** @return The {@link FileStatus} of the file */
346   public FileStatus getFileStatus() throws IOException {
347     return getReferencedFileStatus(fs);
348   }
349 
350   /** @return Get the modification time of the file. */
351   public long getModificationTime() throws IOException {
352     return getFileStatus().getModificationTime();
353   }
354 
355   @Override
356   public String toString() {
357     return this.getPath() +
358       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
359   }
360 
361   /**
362    * @param path Path to check.
363    * @return True if the path has format of a HFile.
364    */
365   public static boolean isHFile(final Path path) {
366     return isHFile(path.getName());
367   }
368 
369   public static boolean isHFile(final String fileName) {
370     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
371     return m.matches() && m.groupCount() > 0;
372   }
373 
374   /**
375    * @param path Path to check.
376    * @return True if the path has format of a HStoreFile reference.
377    */
378   public static boolean isReference(final Path path) {
379     return isReference(path.getName());
380   }
381 
382   /**
383    * @param name file name to check.
384    * @return True if the path has format of a HStoreFile reference.
385    */
386   public static boolean isReference(final String name) {
387     Matcher m = REF_NAME_PATTERN.matcher(name);
388     return m.matches() && m.groupCount() > 1;
389   }
390 
391   /*
392    * Return path to the file referred to by a Reference.  Presumes a directory
393    * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
394    * @param p Path to a Reference file.
395    * @return Calculated path to parent region file.
396    * @throws IllegalArgumentException when path regex fails to match.
397    */
398   public static Path getReferredToFile(final Path p) {
399     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
400     if (m == null || !m.matches()) {
401       LOG.warn("Failed match of store file name " + p.toString());
402       throw new IllegalArgumentException("Failed match of store file name " +
403           p.toString());
404     }
405 
406     // Other region name is suffix on the passed Reference file name
407     String otherRegion = m.group(2);
408     // Tabledir is up two directories from where Reference was written.
409     Path tableDir = p.getParent().getParent().getParent();
410     String nameStrippedOfSuffix = m.group(1);
411     if (LOG.isDebugEnabled()) {
412       LOG.debug("reference '" + p + "' to region=" + otherRegion
413         + " hfile=" + nameStrippedOfSuffix);
414     }
415 
416     // Build up new path with the referenced region in place of our current
417     // region in the reference path.  Also strip regionname suffix from name.
418     return new Path(new Path(new Path(tableDir, otherRegion),
419       p.getParent().getName()), nameStrippedOfSuffix);
420   }
421 
422   /**
423    * Validate the store file name.
424    * @param fileName name of the file to validate
425    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
426    */
427   public static boolean validateStoreFileName(final String fileName) {
428     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
429       return(true);
430     return !fileName.contains("-");
431   }
432 
433   /**
434    * Return if the specified file is a valid store file or not.
435    * @param fileStatus The {@link FileStatus} of the file
436    * @return <tt>true</tt> if the file is valid
437    */
438   public static boolean isValid(final FileStatus fileStatus)
439       throws IOException {
440     final Path p = fileStatus.getPath();
441 
442     if (fileStatus.isDirectory())
443       return false;
444 
445     // Check for empty hfile. Should never be the case but can happen
446     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
447     // NOTE: that the HFileLink is just a name, so it's an empty file.
448     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
449       LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
450       return false;
451     }
452 
453     return validateStoreFileName(p.getName());
454   }
455 
456   /**
457    * helper function to compute HDFS blocks distribution of a given reference
458    * file.For reference file, we don't compute the exact value. We use some
459    * estimate instead given it might be good enough. we assume bottom part
460    * takes the first half of reference file, top part takes the second half
461    * of the reference file. This is just estimate, given
462    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
463    * If this estimate isn't good enough, we can improve it later.
464    * @param fs  The FileSystem
465    * @param reference  The reference
466    * @param status  The reference FileStatus
467    * @return HDFS blocks distribution
468    */
469   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
470       final FileSystem fs, final Reference reference, final FileStatus status)
471       throws IOException {
472     if (status == null) {
473       return null;
474     }
475 
476     long start = 0;
477     long length = 0;
478 
479     if (Reference.isTopFileRegion(reference.getFileRegion())) {
480       start = status.getLen()/2;
481       length = status.getLen() - status.getLen()/2;
482     } else {
483       start = 0;
484       length = status.getLen()/2;
485     }
486     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
487   }
488 
489   @Override
490   public boolean equals(Object that) {
491     if (this == that) return true;
492     if (that == null) return false;
493 
494     if (!(that instanceof StoreFileInfo)) return false;
495 
496     StoreFileInfo o = (StoreFileInfo)that;
497     if (initialPath != null && o.initialPath == null) return false;
498     if (initialPath == null && o.initialPath != null) return false;
499     if (initialPath != o.initialPath && initialPath != null
500             && !initialPath.equals(o.initialPath)) return false;
501 
502     if (reference != null && o.reference == null) return false;
503     if (reference == null && o.reference != null) return false;
504     if (reference != o.reference && reference != null
505             && !reference.equals(o.reference)) return false;
506 
507     if (link != null && o.link == null) return false;
508     if (link == null && o.link != null) return false;
509     if (link != o.link && link != null && !link.equals(o.link)) return false;
510 
511     return true;
512   };
513 
514 
515   @Override
516   public int hashCode() {
517     int hash = 17;
518     hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode());
519     hash = hash * 31 + ((initialPath ==  null) ? 0 : initialPath.hashCode());
520     hash = hash * 31 + ((link == null) ? 0 : link.hashCode());
521     return  hash;
522   }
523 }