001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.regionserver;
021
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.util.concurrent.atomic.AtomicInteger;
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.fs.FileStatus;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.HDFSBlocksDistribution;
033import org.apache.yetus.audience.InterfaceAudience;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
037import org.apache.hadoop.hbase.io.HFileLink;
038import org.apache.hadoop.hbase.io.HalfStoreFileReader;
039import org.apache.hadoop.hbase.io.Reference;
040import org.apache.hadoop.hbase.io.hfile.CacheConfig;
041import org.apache.hadoop.hbase.util.FSUtils;
042
043/**
044 * Describe a StoreFile (hfile, reference, link)
045 */
046@InterfaceAudience.Private
047public class StoreFileInfo {
048  private static final Logger LOG = LoggerFactory.getLogger(StoreFileInfo.class);
049
050  /**
051   * A non-capture group, for hfiles, so that this can be embedded.
052   * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
053   * The mob del file has (_del) as suffix.
054   */
055  public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?";
056
057  /** Regex that will work for hfiles */
058  private static final Pattern HFILE_NAME_PATTERN =
059    Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
060
061  /**
062   * A non-capture group, for del files, so that this can be embedded.
063   * A del file has (_del) as suffix.
064   */
065  public static final String DELFILE_NAME_REGEX = "[0-9a-f]+(?:_del)";
066
067  /** Regex that will work for del files */
068  private static final Pattern DELFILE_NAME_PATTERN =
069    Pattern.compile("^(" + DELFILE_NAME_REGEX + ")");
070
071  /**
072   * Regex that will work for straight reference names ({@code <hfile>.<parentEncRegion>})
073   * and hfilelink reference names ({@code <table>=<region>-<hfile>.<parentEncRegion>})
074   * If reference, then the regex has more than just one group.
075   * Group 1, hfile/hfilelink pattern, is this file's id.
076   * Group 2 '(.+)' is the reference's parent region name.
077   */
078  private static final Pattern REF_NAME_PATTERN =
079    Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
080      HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
081
082  // Configuration
083  private Configuration conf;
084
085  // FileSystem handle
086  private final FileSystem fs;
087
088  // HDFS blocks distribution information
089  private HDFSBlocksDistribution hdfsBlocksDistribution = null;
090
091  // If this storefile references another, this is the reference instance.
092  private final Reference reference;
093
094  // If this storefile is a link to another, this is the link instance.
095  private final HFileLink link;
096
097  private final Path initialPath;
098
099  private RegionCoprocessorHost coprocessorHost;
100
101  // timestamp on when the file was created, is 0 and ignored for reference or link files
102  private long createdTimestamp;
103
104  private long size;
105
106  /**
107   * Create a Store File Info
108   * @param conf the {@link Configuration} to use
109   * @param fs The current file system to use.
110   * @param initialPath The {@link Path} of the file
111   */
112  public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath)
113      throws IOException {
114    this(conf, fs, null, initialPath);
115  }
116
117  private StoreFileInfo(final Configuration conf, final FileSystem fs,
118      final FileStatus fileStatus, final Path initialPath) throws IOException {
119    assert fs != null;
120    assert initialPath != null;
121    assert conf != null;
122
123    this.fs = fs;
124    this.conf = conf;
125    this.initialPath = initialPath;
126    Path p = initialPath;
127    if (HFileLink.isHFileLink(p)) {
128      // HFileLink
129      this.reference = null;
130      this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
131      if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
132    } else if (isReference(p)) {
133      this.reference = Reference.read(fs, p);
134      Path referencePath = getReferredToFile(p);
135      if (HFileLink.isHFileLink(referencePath)) {
136        // HFileLink Reference
137        this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
138      } else {
139        // Reference
140        this.link = null;
141      }
142      if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
143              " reference to " + referencePath);
144    } else if (isHFile(p)) {
145      // HFile
146      if (fileStatus != null) {
147        this.createdTimestamp = fileStatus.getModificationTime();
148        this.size = fileStatus.getLen();
149      } else {
150        FileStatus fStatus = fs.getFileStatus(initialPath);
151        this.createdTimestamp = fStatus.getModificationTime();
152        this.size = fStatus.getLen();
153      }
154      this.reference = null;
155      this.link = null;
156    } else {
157      throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
158    }
159  }
160
161  /**
162   * Create a Store File Info
163   * @param conf the {@link Configuration} to use
164   * @param fs The current file system to use.
165   * @param fileStatus The {@link FileStatus} of the file
166   */
167  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
168      throws IOException {
169    this(conf, fs, fileStatus, fileStatus.getPath());
170  }
171
172  /**
173   * Create a Store File Info from an HFileLink
174   * @param conf The {@link Configuration} to use
175   * @param fs The current file system to use
176   * @param fileStatus The {@link FileStatus} of the file
177   */
178  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
179      final HFileLink link) {
180    this.fs = fs;
181    this.conf = conf;
182    // initialPath can be null only if we get a link.
183    this.initialPath = (fileStatus == null) ? null : fileStatus.getPath();
184      // HFileLink
185    this.reference = null;
186    this.link = link;
187  }
188
189  /**
190   * Create a Store File Info from an HFileLink
191   * @param conf The {@link Configuration} to use
192   * @param fs The current file system to use
193   * @param fileStatus The {@link FileStatus} of the file
194   * @param reference The reference instance
195   */
196  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
197      final Reference reference) {
198    this.fs = fs;
199    this.conf = conf;
200    this.initialPath = fileStatus.getPath();
201    this.createdTimestamp = fileStatus.getModificationTime();
202    this.reference = reference;
203    this.link = null;
204  }
205
206  /**
207   * Create a Store File Info from an HFileLink and a Reference
208   * @param conf The {@link Configuration} to use
209   * @param fs The current file system to use
210   * @param fileStatus The {@link FileStatus} of the file
211   * @param reference The reference instance
212   * @param link The link instance
213   */
214  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
215      final Reference reference, final HFileLink link) {
216    this.fs = fs;
217    this.conf = conf;
218    this.initialPath = fileStatus.getPath();
219    this.createdTimestamp = fileStatus.getModificationTime();
220    this.reference = reference;
221    this.link = link;
222  }
223
224  /**
225   * Size of the Hfile
226   * @return size
227   */
228  public long getSize() {
229    return size;
230  }
231
232  /**
233   * Sets the region coprocessor env.
234   * @param coprocessorHost
235   */
236  public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
237    this.coprocessorHost = coprocessorHost;
238  }
239
240  /*
241   * @return the Reference object associated to this StoreFileInfo.
242   *         null if the StoreFile is not a reference.
243   */
244  public Reference getReference() {
245    return this.reference;
246  }
247
248  /** @return True if the store file is a Reference */
249  public boolean isReference() {
250    return this.reference != null;
251  }
252
253  /** @return True if the store file is a top Reference */
254  public boolean isTopReference() {
255    return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
256  }
257
258  /** @return True if the store file is a link */
259  public boolean isLink() {
260    return this.link != null && this.reference == null;
261  }
262
263  /** @return the HDFS block distribution */
264  public HDFSBlocksDistribution getHDFSBlockDistribution() {
265    return this.hdfsBlocksDistribution;
266  }
267
268  /**
269   * Open a Reader for the StoreFile
270   * @param fs The current file system to use.
271   * @param cacheConf The cache configuration and block cache reference.
272   * @return The StoreFile.Reader for the file
273   */
274  public StoreFileReader open(FileSystem fs, CacheConfig cacheConf, boolean canUseDropBehind,
275      long readahead, boolean isPrimaryReplicaStoreFile, AtomicInteger refCount, boolean shared)
276      throws IOException {
277    FSDataInputStreamWrapper in;
278    FileStatus status;
279
280    final boolean doDropBehind = canUseDropBehind && cacheConf.shouldDropBehindCompaction();
281    if (this.link != null) {
282      // HFileLink
283      in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind, readahead);
284      status = this.link.getFileStatus(fs);
285    } else if (this.reference != null) {
286      // HFile Reference
287      Path referencePath = getReferredToFile(this.getPath());
288      try {
289        in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead);
290      } catch (FileNotFoundException fnfe) {
291        // Intercept the exception so can insert more info about the Reference; otherwise
292        // exception just complains about some random file -- operator doesn't realize it
293        // other end of a Reference
294        FileNotFoundException newFnfe = new FileNotFoundException(toString());
295        newFnfe.initCause(fnfe);
296        throw newFnfe;
297      }
298      status = fs.getFileStatus(referencePath);
299    } else {
300      in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead);
301      status = fs.getFileStatus(initialPath);
302    }
303    long length = status.getLen();
304    hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
305
306    StoreFileReader reader = null;
307    if (this.coprocessorHost != null) {
308      reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
309        cacheConf, reference);
310    }
311    if (reader == null) {
312      if (this.reference != null) {
313        reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
314            isPrimaryReplicaStoreFile, refCount, shared, conf);
315      } else {
316        reader = new StoreFileReader(fs, status.getPath(), in, length, cacheConf,
317            isPrimaryReplicaStoreFile, refCount, shared, conf);
318      }
319    }
320    if (this.coprocessorHost != null) {
321      reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
322        cacheConf, reference, reader);
323    }
324    return reader;
325  }
326
327  /**
328   * Compute the HDFS Block Distribution for this StoreFile
329   */
330  public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
331      throws IOException {
332    // guard against the case where we get the FileStatus from link, but by the time we
333    // call compute the file is moved again
334    if (this.link != null) {
335      FileNotFoundException exToThrow = null;
336      for (int i = 0; i < this.link.getLocations().length; i++) {
337        try {
338          return computeHDFSBlocksDistributionInternal(fs);
339        } catch (FileNotFoundException ex) {
340          // try the other location
341          exToThrow = ex;
342        }
343      }
344      throw exToThrow;
345    } else {
346      return computeHDFSBlocksDistributionInternal(fs);
347    }
348  }
349
350  private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
351      throws IOException {
352    FileStatus status = getReferencedFileStatus(fs);
353    if (this.reference != null) {
354      return computeRefFileHDFSBlockDistribution(fs, reference, status);
355    } else {
356      return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
357    }
358  }
359
360  /**
361   * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
362   * @param fs The current file system to use.
363   * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
364   */
365  public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
366    FileStatus status;
367    if (this.reference != null) {
368      if (this.link != null) {
369        FileNotFoundException exToThrow = null;
370        for (int i = 0; i < this.link.getLocations().length; i++) {
371          // HFileLink Reference
372          try {
373            return link.getFileStatus(fs);
374          } catch (FileNotFoundException ex) {
375            // try the other location
376            exToThrow = ex;
377          }
378        }
379        throw exToThrow;
380      } else {
381        // HFile Reference
382        Path referencePath = getReferredToFile(this.getPath());
383        status = fs.getFileStatus(referencePath);
384      }
385    } else {
386      if (this.link != null) {
387        FileNotFoundException exToThrow = null;
388        for (int i = 0; i < this.link.getLocations().length; i++) {
389          // HFileLink
390          try {
391            return link.getFileStatus(fs);
392          } catch (FileNotFoundException ex) {
393            // try the other location
394            exToThrow = ex;
395          }
396        }
397        throw exToThrow;
398      } else {
399        status = fs.getFileStatus(initialPath);
400      }
401    }
402    return status;
403  }
404
405  /** @return The {@link Path} of the file */
406  public Path getPath() {
407    return initialPath;
408  }
409
410  /** @return The {@link FileStatus} of the file */
411  public FileStatus getFileStatus() throws IOException {
412    return getReferencedFileStatus(fs);
413  }
414
415  /** @return Get the modification time of the file. */
416  public long getModificationTime() throws IOException {
417    return getFileStatus().getModificationTime();
418  }
419
420  @Override
421  public String toString() {
422    return this.getPath() +
423      (isReference() ? "->" + getReferredToFile(this.getPath()) + "-" + reference : "");
424  }
425
426  /**
427   * @param path Path to check.
428   * @return True if the path has format of a HFile.
429   */
430  public static boolean isHFile(final Path path) {
431    return isHFile(path.getName());
432  }
433
434  public static boolean isHFile(final String fileName) {
435    Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
436    return m.matches() && m.groupCount() > 0;
437  }
438
439  /**
440   * @param path Path to check.
441   * @return True if the path has format of a del file.
442   */
443  public static boolean isDelFile(final Path path) {
444    return isDelFile(path.getName());
445  }
446
447  /**
448   * @param fileName Sting version of path to validate.
449   * @return True if the file name has format of a del file.
450   */
451  public static boolean isDelFile(final String fileName) {
452    Matcher m = DELFILE_NAME_PATTERN.matcher(fileName);
453    return m.matches() && m.groupCount() > 0;
454  }
455
456  /**
457   * @param path Path to check.
458   * @return True if the path has format of a HStoreFile reference.
459   */
460  public static boolean isReference(final Path path) {
461    return isReference(path.getName());
462  }
463
464  /**
465   * @param name file name to check.
466   * @return True if the path has format of a HStoreFile reference.
467   */
468  public static boolean isReference(final String name) {
469    Matcher m = REF_NAME_PATTERN.matcher(name);
470    return m.matches() && m.groupCount() > 1;
471  }
472
473  /**
474   * @return timestamp when this file was created (as returned by filesystem)
475   */
476  public long getCreatedTimestamp() {
477    return createdTimestamp;
478  }
479
480  /*
481   * Return path to the file referred to by a Reference.  Presumes a directory
482   * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
483   * @param p Path to a Reference file.
484   * @return Calculated path to parent region file.
485   * @throws IllegalArgumentException when path regex fails to match.
486   */
487  public static Path getReferredToFile(final Path p) {
488    Matcher m = REF_NAME_PATTERN.matcher(p.getName());
489    if (m == null || !m.matches()) {
490      LOG.warn("Failed match of store file name " + p.toString());
491      throw new IllegalArgumentException("Failed match of store file name " +
492          p.toString());
493    }
494
495    // Other region name is suffix on the passed Reference file name
496    String otherRegion = m.group(2);
497    // Tabledir is up two directories from where Reference was written.
498    Path tableDir = p.getParent().getParent().getParent();
499    String nameStrippedOfSuffix = m.group(1);
500    if (LOG.isTraceEnabled()) {
501      LOG.trace("reference '" + p + "' to region=" + otherRegion
502        + " hfile=" + nameStrippedOfSuffix);
503    }
504
505    // Build up new path with the referenced region in place of our current
506    // region in the reference path.  Also strip regionname suffix from name.
507    return new Path(new Path(new Path(tableDir, otherRegion),
508      p.getParent().getName()), nameStrippedOfSuffix);
509  }
510
511  /**
512   * Validate the store file name.
513   * @param fileName name of the file to validate
514   * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
515   */
516  public static boolean validateStoreFileName(final String fileName) {
517    if (HFileLink.isHFileLink(fileName) || isReference(fileName))
518      return(true);
519    return !fileName.contains("-");
520  }
521
522  /**
523   * Return if the specified file is a valid store file or not.
524   * @param fileStatus The {@link FileStatus} of the file
525   * @return <tt>true</tt> if the file is valid
526   */
527  public static boolean isValid(final FileStatus fileStatus)
528      throws IOException {
529    final Path p = fileStatus.getPath();
530
531    if (fileStatus.isDirectory())
532      return false;
533
534    // Check for empty hfile. Should never be the case but can happen
535    // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
536    // NOTE: that the HFileLink is just a name, so it's an empty file.
537    if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
538      LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
539      return false;
540    }
541
542    return validateStoreFileName(p.getName());
543  }
544
545  /**
546   * helper function to compute HDFS blocks distribution of a given reference
547   * file.For reference file, we don't compute the exact value. We use some
548   * estimate instead given it might be good enough. we assume bottom part
549   * takes the first half of reference file, top part takes the second half
550   * of the reference file. This is just estimate, given
551   * midkey ofregion != midkey of HFile, also the number and size of keys vary.
552   * If this estimate isn't good enough, we can improve it later.
553   * @param fs  The FileSystem
554   * @param reference  The reference
555   * @param status  The reference FileStatus
556   * @return HDFS blocks distribution
557   */
558  private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
559      final FileSystem fs, final Reference reference, final FileStatus status)
560      throws IOException {
561    if (status == null) {
562      return null;
563    }
564
565    long start = 0;
566    long length = 0;
567
568    if (Reference.isTopFileRegion(reference.getFileRegion())) {
569      start = status.getLen()/2;
570      length = status.getLen() - status.getLen()/2;
571    } else {
572      start = 0;
573      length = status.getLen()/2;
574    }
575    return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
576  }
577
578  @Override
579  public boolean equals(Object that) {
580    if (this == that) return true;
581    if (that == null) return false;
582
583    if (!(that instanceof StoreFileInfo)) return false;
584
585    StoreFileInfo o = (StoreFileInfo)that;
586    if (initialPath != null && o.initialPath == null) return false;
587    if (initialPath == null && o.initialPath != null) return false;
588    if (initialPath != o.initialPath && initialPath != null
589            && !initialPath.equals(o.initialPath)) return false;
590
591    if (reference != null && o.reference == null) return false;
592    if (reference == null && o.reference != null) return false;
593    if (reference != o.reference && reference != null
594            && !reference.equals(o.reference)) return false;
595
596    if (link != null && o.link == null) return false;
597    if (link == null && o.link != null) return false;
598    if (link != o.link && link != null && !link.equals(o.link)) return false;
599
600    return true;
601  };
602
603
604  @Override
605  public int hashCode() {
606    int hash = 17;
607    hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode());
608    hash = hash * 31 + ((initialPath ==  null) ? 0 : initialPath.hashCode());
609    hash = hash * 31 + ((link == null) ? 0 : link.hashCode());
610    return  hash;
611  }
612
613  /**
614   * Return the active file name that contains the real data.
615   * <p>
616   * For referenced hfile, we will return the name of the reference file as it will be used to
617   * construct the StoreFileReader. And for linked hfile, we will return the name of the file being
618   * linked.
619   */
620  public String getActiveFileName() {
621    if (reference != null || link == null) {
622      return initialPath.getName();
623    } else {
624      return HFileLink.getReferencedHFileName(initialPath.getName());
625    }
626  }
627}