View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.conf.Configuration;
31  import org.apache.hadoop.fs.FileStatus;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
35  import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
36  import org.apache.hadoop.hbase.io.HFileLink;
37  import org.apache.hadoop.hbase.io.HalfStoreFileReader;
38  import org.apache.hadoop.hbase.io.Reference;
39  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
40  import org.apache.hadoop.hbase.util.FSUtils;
41  
42  /**
43   * Describe a StoreFile (hfile, reference, link)
44   */
45  @InterfaceAudience.Private
46  public class StoreFileInfo {
47    private static final Log LOG = LogFactory.getLog(StoreFileInfo.class);
48  
49    /**
50     * A non-capture group, for hfiles, so that this can be embedded.
51     * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
52     * The mob del file has (_del) as suffix.
53     */
54    public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?";
55  
56    /** Regex that will work for hfiles */
57    private static final Pattern HFILE_NAME_PATTERN =
58      Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
59  
60    /**
61     * A non-capture group, for del files, so that this can be embedded.
62     * A del file has (_del) as suffix.
63     */
64    public static final String DELFILE_NAME_REGEX = "[0-9a-f]+(?:_del)";
65  
66    /** Regex that will work for del files */
67    private static final Pattern DELFILE_NAME_PATTERN =
68      Pattern.compile("^(" + DELFILE_NAME_REGEX + ")");
69  
70    /**
71     * Regex that will work for straight reference names ({@code <hfile>.<parentEncRegion>})
72     * and hfilelink reference names ({@code <table>=<region>-<hfile>.<parentEncRegion>})
73     * If reference, then the regex has more than just one group.
74     * Group 1, hfile/hfilelink pattern, is this file's id.
75     * Group 2 '(.+)' is the reference's parent region name.
76     */
77    private static final Pattern REF_NAME_PATTERN =
78      Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
79        HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
80  
81    // Configuration
82    private Configuration conf;
83  
84    // FileSystem handle
85    private final FileSystem fs;
86  
87    // HDFS blocks distribution information
88    private HDFSBlocksDistribution hdfsBlocksDistribution = null;
89  
90    // If this storefile references another, this is the reference instance.
91    private final Reference reference;
92  
93    // If this storefile is a link to another, this is the link instance.
94    private final HFileLink link;
95  
96    private final Path initialPath;
97  
98    private RegionCoprocessorHost coprocessorHost;
99  
100   // timestamp on when the file was created, is 0 and ignored for reference or link files
101   private long createdTimestamp;
102 
103   /**
104    * Create a Store File Info
105    * @param conf the {@link Configuration} to use
106    * @param fs The current file system to use.
107    * @param initialPath The {@link Path} of the file
108    */
109   public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath)
110       throws IOException {
111     assert fs != null;
112     assert initialPath != null;
113     assert conf != null;
114 
115     this.fs = fs;
116     this.conf = conf;
117     this.initialPath = initialPath;
118     Path p = initialPath;
119     if (HFileLink.isHFileLink(p)) {
120       // HFileLink
121       this.reference = null;
122       this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
123       if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
124     } else if (isReference(p)) {
125       this.reference = Reference.read(fs, p);
126       Path referencePath = getReferredToFile(p);
127       if (HFileLink.isHFileLink(referencePath)) {
128         // HFileLink Reference
129         this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
130       } else {
131         // Reference
132         this.link = null;
133       }
134       if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
135               " reference to " + referencePath);
136     } else if (isHFile(p)) {
137       // HFile
138       this.createdTimestamp = fs.getFileStatus(initialPath).getModificationTime();
139       this.reference = null;
140       this.link = null;
141     } else {
142       throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
143     }
144   }
145 
146   /**
147    * Create a Store File Info
148    * @param conf the {@link Configuration} to use
149    * @param fs The current file system to use.
150    * @param fileStatus The {@link FileStatus} of the file
151    */
152   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
153       throws IOException {
154     this(conf, fs, fileStatus.getPath());
155   }
156 
157   /**
158    * Create a Store File Info from an HFileLink
159    * @param conf the {@link Configuration} to use
160    * @param fs The current file system to use.
161    * @param fileStatus The {@link FileStatus} of the file
162    */
163   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
164       final HFileLink link)
165       throws IOException {
166     this.fs = fs;
167     this.conf = conf;
168     // initialPath can be null only if we get a link.
169     this.initialPath = (fileStatus == null) ? null : fileStatus.getPath();
170       // HFileLink
171     this.reference = null;
172     this.link = link;
173   }
174 
175   /**
176    * Create a Store File Info from an HFileLink
177    * @param conf
178    * @param fs
179    * @param fileStatus
180    * @param reference
181    * @throws IOException
182    */
183   public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
184       final Reference reference)
185       throws IOException {
186     this.fs = fs;
187     this.conf = conf;
188     this.initialPath = fileStatus.getPath();
189     this.createdTimestamp = fileStatus.getModificationTime();
190     this.reference = reference;
191     this.link = null;
192   }
193 
194   /**
195    * Sets the region coprocessor env.
196    * @param coprocessorHost
197    */
198   public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
199     this.coprocessorHost = coprocessorHost;
200   }
201 
202   /*
203    * @return the Reference object associated to this StoreFileInfo.
204    *         null if the StoreFile is not a reference.
205    */
206   public Reference getReference() {
207     return this.reference;
208   }
209 
210   /** @return True if the store file is a Reference */
211   public boolean isReference() {
212     return this.reference != null;
213   }
214 
215   /** @return True if the store file is a top Reference */
216   public boolean isTopReference() {
217     return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
218   }
219 
220   /** @return True if the store file is a link */
221   public boolean isLink() {
222     return this.link != null && this.reference == null;
223   }
224 
225   /** @return the HDFS block distribution */
226   public HDFSBlocksDistribution getHDFSBlockDistribution() {
227     return this.hdfsBlocksDistribution;
228   }
229 
230   /**
231    * Open a Reader for the StoreFile
232    * @param fs The current file system to use.
233    * @param cacheConf The cache configuration and block cache reference.
234    * @return The StoreFile.Reader for the file
235    */
236   public StoreFileReader open(final FileSystem fs,
237       final CacheConfig cacheConf, final boolean canUseDropBehind) throws IOException {
238     FSDataInputStreamWrapper in;
239     FileStatus status;
240 
241     final boolean doDropBehind = canUseDropBehind && cacheConf.shouldDropBehindCompaction();
242     if (this.link != null) {
243       // HFileLink
244       in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind);
245       status = this.link.getFileStatus(fs);
246     } else if (this.reference != null) {
247       // HFile Reference
248       Path referencePath = getReferredToFile(this.getPath());
249       in = new FSDataInputStreamWrapper(fs, referencePath,
250           doDropBehind);
251       status = fs.getFileStatus(referencePath);
252     } else {
253       in = new FSDataInputStreamWrapper(fs, this.getPath(),
254           doDropBehind);
255       status = fs.getFileStatus(initialPath);
256     }
257     long length = status.getLen();
258     hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
259 
260     StoreFileReader reader = null;
261     if (this.coprocessorHost != null) {
262       reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
263         cacheConf, reference);
264     }
265     if (reader == null) {
266       if (this.reference != null) {
267         reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
268           conf);
269       } else {
270         reader = new StoreFileReader(fs, status.getPath(), in, length, cacheConf, conf);
271       }
272     }
273     if (this.coprocessorHost != null) {
274       reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
275         cacheConf, reference, reader);
276     }
277     return reader;
278   }
279 
280   /**
281    * Compute the HDFS Block Distribution for this StoreFile
282    */
283   public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
284       throws IOException {
285 
286     // guard against the case where we get the FileStatus from link, but by the time we
287     // call compute the file is moved again
288     if (this.link != null) {
289       FileNotFoundException exToThrow = null;
290       for (int i = 0; i < this.link.getLocations().length; i++) {
291         try {
292           return computeHDFSBlocksDistributionInternal(fs);
293         } catch (FileNotFoundException ex) {
294           // try the other location
295           exToThrow = ex;
296         }
297       }
298       throw exToThrow;
299     } else {
300       return computeHDFSBlocksDistributionInternal(fs);
301     }
302   }
303 
304   private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
305       throws IOException {
306     FileStatus status = getReferencedFileStatus(fs);
307     if (this.reference != null) {
308       return computeRefFileHDFSBlockDistribution(fs, reference, status);
309     } else {
310       return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
311     }
312   }
313 
314   /**
315    * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
316    * @param fs The current file system to use.
317    * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
318    */
319   public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
320     FileStatus status;
321     if (this.reference != null) {
322       if (this.link != null) {
323         FileNotFoundException exToThrow = null;
324         for (int i = 0; i < this.link.getLocations().length; i++) {
325           // HFileLink Reference
326           try {
327             return link.getFileStatus(fs);
328           } catch (FileNotFoundException ex) {
329             // try the other location
330             exToThrow = ex;
331           }
332         }
333         throw exToThrow;
334       } else {
335         // HFile Reference
336         Path referencePath = getReferredToFile(this.getPath());
337         status = fs.getFileStatus(referencePath);
338       }
339     } else {
340       if (this.link != null) {
341         FileNotFoundException exToThrow = null;
342         for (int i = 0; i < this.link.getLocations().length; i++) {
343           // HFileLink
344           try {
345             return link.getFileStatus(fs);
346           } catch (FileNotFoundException ex) {
347             // try the other location
348             exToThrow = ex;
349           }
350         }
351         throw exToThrow;
352       } else {
353         status = fs.getFileStatus(initialPath);
354       }
355     }
356     return status;
357   }
358 
359   /** @return The {@link Path} of the file */
360   public Path getPath() {
361     return initialPath;
362   }
363 
364   /** @return The {@link FileStatus} of the file */
365   public FileStatus getFileStatus() throws IOException {
366     return getReferencedFileStatus(fs);
367   }
368 
369   /** @return Get the modification time of the file. */
370   public long getModificationTime() throws IOException {
371     return getFileStatus().getModificationTime();
372   }
373 
374   @Override
375   public String toString() {
376     return this.getPath() +
377       (isReference() ? "-" + getReferredToFile(this.getPath()) + "-" + reference : "");
378   }
379 
380   /**
381    * @param path Path to check.
382    * @return True if the path has format of a HFile.
383    */
384   public static boolean isHFile(final Path path) {
385     return isHFile(path.getName());
386   }
387 
388   public static boolean isHFile(final String fileName) {
389     Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
390     return m.matches() && m.groupCount() > 0;
391   }
392 
393   /**
394    * @param path Path to check.
395    * @return True if the path has format of a del file.
396    */
397   public static boolean isDelFile(final Path path) {
398     return isDelFile(path.getName());
399   }
400 
401   /**
402    * @param fileName Sting version of path to validate.
403    * @return True if the file name has format of a del file.
404    */
405   public static boolean isDelFile(final String fileName) {
406     Matcher m = DELFILE_NAME_PATTERN.matcher(fileName);
407     return m.matches() && m.groupCount() > 0;
408   }
409 
410   /**
411    * @param path Path to check.
412    * @return True if the path has format of a HStoreFile reference.
413    */
414   public static boolean isReference(final Path path) {
415     return isReference(path.getName());
416   }
417 
418   /**
419    * @param name file name to check.
420    * @return True if the path has format of a HStoreFile reference.
421    */
422   public static boolean isReference(final String name) {
423     Matcher m = REF_NAME_PATTERN.matcher(name);
424     return m.matches() && m.groupCount() > 1;
425   }
426 
427   /**
428    * @return timestamp when this file was created (as returned by filesystem)
429    */
430   public long getCreatedTimestamp() {
431     return createdTimestamp;
432   }
433 
434   /*
435    * Return path to the file referred to by a Reference.  Presumes a directory
436    * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
437    * @param p Path to a Reference file.
438    * @return Calculated path to parent region file.
439    * @throws IllegalArgumentException when path regex fails to match.
440    */
441   public static Path getReferredToFile(final Path p) {
442     Matcher m = REF_NAME_PATTERN.matcher(p.getName());
443     if (m == null || !m.matches()) {
444       LOG.warn("Failed match of store file name " + p.toString());
445       throw new IllegalArgumentException("Failed match of store file name " +
446           p.toString());
447     }
448 
449     // Other region name is suffix on the passed Reference file name
450     String otherRegion = m.group(2);
451     // Tabledir is up two directories from where Reference was written.
452     Path tableDir = p.getParent().getParent().getParent();
453     String nameStrippedOfSuffix = m.group(1);
454     if (LOG.isDebugEnabled()) {
455       LOG.debug("reference '" + p + "' to region=" + otherRegion
456         + " hfile=" + nameStrippedOfSuffix);
457     }
458 
459     // Build up new path with the referenced region in place of our current
460     // region in the reference path.  Also strip regionname suffix from name.
461     return new Path(new Path(new Path(tableDir, otherRegion),
462       p.getParent().getName()), nameStrippedOfSuffix);
463   }
464 
465   /**
466    * Validate the store file name.
467    * @param fileName name of the file to validate
468    * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
469    */
470   public static boolean validateStoreFileName(final String fileName) {
471     if (HFileLink.isHFileLink(fileName) || isReference(fileName))
472       return(true);
473     return !fileName.contains("-");
474   }
475 
476   /**
477    * Return if the specified file is a valid store file or not.
478    * @param fileStatus The {@link FileStatus} of the file
479    * @return <tt>true</tt> if the file is valid
480    */
481   public static boolean isValid(final FileStatus fileStatus)
482       throws IOException {
483     final Path p = fileStatus.getPath();
484 
485     if (fileStatus.isDirectory())
486       return false;
487 
488     // Check for empty hfile. Should never be the case but can happen
489     // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
490     // NOTE: that the HFileLink is just a name, so it's an empty file.
491     if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
492       LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
493       return false;
494     }
495 
496     return validateStoreFileName(p.getName());
497   }
498 
499   /**
500    * helper function to compute HDFS blocks distribution of a given reference
501    * file.For reference file, we don't compute the exact value. We use some
502    * estimate instead given it might be good enough. we assume bottom part
503    * takes the first half of reference file, top part takes the second half
504    * of the reference file. This is just estimate, given
505    * midkey ofregion != midkey of HFile, also the number and size of keys vary.
506    * If this estimate isn't good enough, we can improve it later.
507    * @param fs  The FileSystem
508    * @param reference  The reference
509    * @param status  The reference FileStatus
510    * @return HDFS blocks distribution
511    */
512   private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
513       final FileSystem fs, final Reference reference, final FileStatus status)
514       throws IOException {
515     if (status == null) {
516       return null;
517     }
518 
519     long start = 0;
520     long length = 0;
521 
522     if (Reference.isTopFileRegion(reference.getFileRegion())) {
523       start = status.getLen()/2;
524       length = status.getLen() - status.getLen()/2;
525     } else {
526       start = 0;
527       length = status.getLen()/2;
528     }
529     return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
530   }
531 
532   @Override
533   public boolean equals(Object that) {
534     if (this == that) return true;
535     if (that == null) return false;
536 
537     if (!(that instanceof StoreFileInfo)) return false;
538 
539     StoreFileInfo o = (StoreFileInfo)that;
540     if (initialPath != null && o.initialPath == null) return false;
541     if (initialPath == null && o.initialPath != null) return false;
542     if (initialPath != o.initialPath && initialPath != null
543             && !initialPath.equals(o.initialPath)) return false;
544 
545     if (reference != null && o.reference == null) return false;
546     if (reference == null && o.reference != null) return false;
547     if (reference != o.reference && reference != null
548             && !reference.equals(o.reference)) return false;
549 
550     if (link != null && o.link == null) return false;
551     if (link == null && o.link != null) return false;
552     if (link != o.link && link != null && !link.equals(o.link)) return false;
553 
554     return true;
555   };
556 
557 
558   @Override
559   public int hashCode() {
560     int hash = 17;
561     hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode());
562     hash = hash * 31 + ((initialPath ==  null) ? 0 : initialPath.hashCode());
563     hash = hash * 31 + ((link == null) ? 0 : link.hashCode());
564     return  hash;
565   }
566 }