View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileStatus;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
35  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
36  import org.apache.hadoop.hbase.io.HFileLink;
37  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
38  import org.apache.hadoop.hbase.io.Reference;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.util.FSUtils;
41  
42  /**
43   * Describe a StoreFile (hfile, reference, link)
44   */
45  @InterfaceAudience.Private
46  public class StoreFileInfo {
47    private static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
48  
49    /**
50     * A non-capture group, for hfiles, so that this can be embedded.
51     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
52     */
53    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:_SeqId_[0-9]+_)?";
54  
55    /** Regex that will work for hfiles */
56    private static final Pattern HFILE_NAME_PATTERN =
57      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
58  
59    /**
60     * Regex that will work for straight reference names (<hfile>.<parentEncRegion>)
61     * and hfilelink reference names (<table>=<region>-<hfile>.<parentEncRegion>)
62     * If reference, then the regex has more than just one group.
63     * Group 1, hfile/hfilelink pattern, is this file's id.
64     * Group 2 '(.+)' is the reference's parent region name.
65     */
66    private static final Pattern REF_NAME_PATTERN =
67      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
68        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
69  
70    // Configuration
71    private Configuration conf;
72  
73    // FileSystem handle
74    private final FileSystem fs;
75  
76    // HDFS blocks distribution information
77    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
78  
79    // If this storefile references another, this is the reference instance.
80    private final Reference reference;
81  
82    // If this storefile is a link to another, this is the link instance.
83    private final HFileLink link;
84  
85    private final Path initialPath;
86  
87    private RegionCoprocessorHost coprocessorHost;
88  
89    /**
90     * Create a Store File Info
91     * @param conf the {@link Configuration} to use
92     * @param fs The current file system to use.
93     * @param initialPath The {@link Path} of the file
94     */
95    public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath)
96        throws IOException {
97      assert fs != null;
98      assert initialPath != null;
99      assert conf != null;
100 
101     this.fs = fs;
102     this.conf = conf;
103     this.initialPath = initialPath;
104     Path p = initialPath;
105     if (HFileLink.isHFileLink(p)) {
106       // HFileLink
107       this.reference = null;
108       this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
109       if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
110     } else if (isReference(p)) {
111       this.reference = Reference.read(fs, p);
112       Path referencePath = getReferredToFile(p);
113       if (HFileLink.isHFileLink(referencePath)) {
114         // HFileLink Reference
115         this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
116       } else {
117         // Reference
118         this.link = null;
119       }
120       if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
121               " reference to " + referencePath);
122     } else if (isHFile(p)) {
123       // HFile
124       this.reference = null;
125       this.link = null;
126     } else {
127       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
128     }
129   }
130 
131   /**
132    * Create a Store File Info
133    * @param conf the {@link Configuration} to use
134    * @param fs The current file system to use.
135    * @param fileStatus The {@link FileStatus} of the file
136    */
137   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
138       throws IOException {
139     this(conf, fs, fileStatus.getPath());
140   }
141 
142   /**
143    * Create a Store File Info from an HFileLink
144    * @param conf the {@link Configuration} to use
145    * @param fs The current file system to use.
146    * @param fileStatus The {@link FileStatus} of the file
147    */
148   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
149       final HFileLink link)
150       throws IOException {
151     this.fs = fs;
152     this.conf = conf;
153     // initialPath can be null only if we get a link.
154     this.initialPath = (fileStatus == null) ? null : fileStatus.getPath();
155       // HFileLink
156     this.reference = null;
157     this.link = link;
158   }
159 
160   /**
161    * Create a Store File Info from an HFileLink
162    * @param conf
163    * @param fs
164    * @param fileStatus
165    * @param reference
166    * @throws IOException
167    */
168   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
169       final Reference reference)
170       throws IOException {
171     this.fs = fs;
172     this.conf = conf;
173     this.initialPath = fileStatus.getPath();
174     this.reference = reference;
175     this.link = null;
176   }
177 
178   /**
179    * Sets the region coprocessor env.
180    * @param coprocessorHost
181    */
182   public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
183     this.coprocessorHost = coprocessorHost;
184   }
185 
186   /*
187    * @return the Reference object associated to this StoreFileInfo.
188    *         null if the StoreFile is not a reference.
189    */
190   public Reference getReference() {
191     return this.reference;
192   }
193 
194   /** @return True if the store file is a Reference */
195   public boolean isReference() {
196     return this.reference != null;
197   }
198 
199   /** @return True if the store file is a top Reference */
200   public boolean isTopReference() {
201     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
202   }
203 
204   /** @return True if the store file is a link */
205   public boolean isLink() {
206     return this.link != null && this.reference == null;
207   }
208 
209   /** @return the HDFS block distribution */
210   public HDFSBlocksDistribution getHDFSBlockDistribution() {
211     return this.hdfsBlocksDistribution;
212   }
213 
214   /**
215    * Open a Reader for the StoreFile
216    * @param fs The current file system to use.
217    * @param cacheConf The cache configuration and block cache reference.
218    * @return The StoreFile.Reader for the file
219    */
220   public StoreFile.Reader open(final FileSystem fs,
221       final CacheConfig cacheConf, final boolean canUseDropBehind) throws IOException {
222     FSDataInputStreamWrapper in;
223     FileStatus status;
224 
225     final boolean doDropBehind = canUseDropBehind && cacheConf.shouldDropBehindCompaction();
226     if (this.link != null) {
227       // HFileLink
228       in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind);
229       status = this.link.getFileStatus(fs);
230     } else if (this.reference != null) {
231       // HFile Reference
232       Path referencePath = getReferredToFile(this.getPath());
233       in = new FSDataInputStreamWrapper(fs, referencePath,
234           doDropBehind);
235       status = fs.getFileStatus(referencePath);
236     } else {
237       in = new FSDataInputStreamWrapper(fs, this.getPath(),
238           doDropBehind);
239       status = fs.getFileStatus(initialPath);
240     }
241     long length = status.getLen();
242     hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
243 
244     StoreFile.Reader reader = null;
245     if (this.coprocessorHost != null) {
246       reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
247         cacheConf, reference);
248     }
249     if (reader == null) {
250       if (this.reference != null) {
251         reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
252           conf);
253       } else {
254         reader = new StoreFile.Reader(fs, status.getPath(), in, length, cacheConf, conf);
255       }
256     }
257     if (this.coprocessorHost != null) {
258       reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
259         cacheConf, reference, reader);
260     }
261     return reader;
262   }
263 
264   /**
265    * Compute the HDFS Block Distribution for this StoreFile
266    */
267   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
268       throws IOException {
269 
270     // guard against the case where we get the FileStatus from link, but by the time we
271     // call compute the file is moved again
272     if (this.link != null) {
273       FileNotFoundException exToThrow = null;
274       for (int i = 0; i < this.link.getLocations().length; i++) {
275         try {
276           return computeHDFSBlocksDistributionInternal(fs);
277         } catch (FileNotFoundException ex) {
278           // try the other location
279           exToThrow = ex;
280         }
281       }
282       throw exToThrow;
283     } else {
284       return computeHDFSBlocksDistributionInternal(fs);
285     }
286   }
287 
288   private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
289       throws IOException {
290     FileStatus status = getReferencedFileStatus(fs);
291     if (this.reference != null) {
292       return computeRefFileHDFSBlockDistribution(fs, reference, status);
293     } else {
294       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
295     }
296   }
297 
298   /**
299    * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
300    * @param fs The current file system to use.
301    * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
302    */
303   public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
304     FileStatus status;
305     if (this.reference != null) {
306       if (this.link != null) {
307         FileNotFoundException exToThrow = null;
308         for (int i = 0; i < this.link.getLocations().length; i++) {
309           // HFileLink Reference
310           try {
311             return link.getFileStatus(fs);
312           } catch (FileNotFoundException ex) {
313             // try the other location
314             exToThrow = ex;
315           }
316         }
317         throw exToThrow;
318       } else {
319         // HFile Reference
320         Path referencePath = getReferredToFile(this.getPath());
321         status = fs.getFileStatus(referencePath);
322       }
323     } else {
324       if (this.link != null) {
325         FileNotFoundException exToThrow = null;
326         for (int i = 0; i < this.link.getLocations().length; i++) {
327           // HFileLink
328           try {
329             return link.getFileStatus(fs);
330           } catch (FileNotFoundException ex) {
331             // try the other location
332             exToThrow = ex;
333           }
334         }
335         throw exToThrow;
336       } else {
337         status = fs.getFileStatus(initialPath);
338       }
339     }
340     return status;
341   }
342 
343   /** @return The {@link Path} of the file */
344   public Path getPath() {
345     return initialPath;
346   }
347 
348   /** @return The {@link FileStatus} of the file */
349   public FileStatus getFileStatus() throws IOException {
350     return getReferencedFileStatus(fs);
351   }
352 
353   /** @return Get the modification time of the file. */
354   public long getModificationTime() throws IOException {
355     return getFileStatus().getModificationTime();
356   }
357 
358   @Override
359   public String toString() {
360     return this.getPath() +
361       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
362   }
363 
364   /**
365    * @param path Path to check.
366    * @return True if the path has format of a HFile.
367    */
368   public static boolean isHFile(final Path path) {
369     return isHFile(path.getName());
370   }
371 
372   public static boolean isHFile(final String fileName) {
373     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
374     return m.matches() && m.groupCount() > 0;
375   }
376 
377   /**
378    * @param path Path to check.
379    * @return True if the path has format of a HStoreFile reference.
380    */
381   public static boolean isReference(final Path path) {
382     return isReference(path.getName());
383   }
384 
385   /**
386    * @param name file name to check.
387    * @return True if the path has format of a HStoreFile reference.
388    */
389   public static boolean isReference(final String name) {
390     Matcher m = REF_NAME_PATTERN.matcher(name);
391     return m.matches() && m.groupCount() > 1;
392   }
393 
394   /*
395    * Return path to the file referred to by a Reference.  Presumes a directory
396    * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
397    * @param p Path to a Reference file.
398    * @return Calculated path to parent region file.
399    * @throws IllegalArgumentException when path regex fails to match.
400    */
401   public static Path getReferredToFile(final Path p) {
402     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
403     if (m == null || !m.matches()) {
404       LOG.warn("Failed match of store file name " + p.toString());
405       throw new IllegalArgumentException("Failed match of store file name " +
406           p.toString());
407     }
408 
409     // Other region name is suffix on the passed Reference file name
410     String otherRegion = m.group(2);
411     // Tabledir is up two directories from where Reference was written.
412     Path tableDir = p.getParent().getParent().getParent();
413     String nameStrippedOfSuffix = m.group(1);
414     if (LOG.isDebugEnabled()) {
415       LOG.debug("reference '" + p + "' to region=" + otherRegion
416         + " hfile=" + nameStrippedOfSuffix);
417     }
418 
419     // Build up new path with the referenced region in place of our current
420     // region in the reference path.  Also strip regionname suffix from name.
421     return new Path(new Path(new Path(tableDir, otherRegion),
422       p.getParent().getName()), nameStrippedOfSuffix);
423   }
424 
425   /**
426    * Validate the store file name.
427    * @param fileName name of the file to validate
428    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
429    */
430   public static boolean validateStoreFileName(final String fileName) {
431     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
432       return(true);
433     return !fileName.contains("-");
434   }
435 
436   /**
437    * Return if the specified file is a valid store file or not.
438    * @param fileStatus The {@link FileStatus} of the file
439    * @return <tt>true</tt> if the file is valid
440    */
441   public static boolean isValid(final FileStatus fileStatus)
442       throws IOException {
443     final Path p = fileStatus.getPath();
444 
445     if (fileStatus.isDirectory())
446       return false;
447 
448     // Check for empty hfile. Should never be the case but can happen
449     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
450     // NOTE: that the HFileLink is just a name, so it's an empty file.
451     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
452       LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
453       return false;
454     }
455 
456     return validateStoreFileName(p.getName());
457   }
458 
459   /**
460    * helper function to compute HDFS blocks distribution of a given reference
461    * file.For reference file, we don't compute the exact value. We use some
462    * estimate instead given it might be good enough. we assume bottom part
463    * takes the first half of reference file, top part takes the second half
464    * of the reference file. This is just estimate, given
465    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
466    * If this estimate isn't good enough, we can improve it later.
467    * @param fs  The FileSystem
468    * @param reference  The reference
469    * @param status  The reference FileStatus
470    * @return HDFS blocks distribution
471    */
472   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
473       final FileSystem fs, final Reference reference, final FileStatus status)
474       throws IOException {
475     if (status == null) {
476       return null;
477     }
478 
479     long start = 0;
480     long length = 0;
481 
482     if (Reference.isTopFileRegion(reference.getFileRegion())) {
483       start = status.getLen()/2;
484       length = status.getLen() - status.getLen()/2;
485     } else {
486       start = 0;
487       length = status.getLen()/2;
488     }
489     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
490   }
491 
492   @Override
493   public boolean equals(Object that) {
494     if (this == that) return true;
495     if (that == null) return false;
496 
497     if (!(that instanceof StoreFileInfo)) return false;
498 
499     StoreFileInfo o = (StoreFileInfo)that;
500     if (initialPath != null && o.initialPath == null) return false;
501     if (initialPath == null && o.initialPath != null) return false;
502     if (initialPath != o.initialPath && initialPath != null
503             && !initialPath.equals(o.initialPath)) return false;
504 
505     if (reference != null && o.reference == null) return false;
506     if (reference == null && o.reference != null) return false;
507     if (reference != o.reference && reference != null
508             && !reference.equals(o.reference)) return false;
509 
510     if (link != null && o.link == null) return false;
511     if (link == null && o.link != null) return false;
512     if (link != o.link && link != null && !link.equals(o.link)) return false;
513 
514     return true;
515   };
516 
517 
518   @Override
519   public int hashCode() {
520     int hash = 17;
521     hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode());
522     hash = hash * 31 + ((initialPath ==  null) ? 0 : initialPath.hashCode());
523     hash = hash * 31 + ((link == null) ? 0 : link.hashCode());
524     return  hash;
525   }
526 }