001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.regionserver;
021
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.util.concurrent.atomic.AtomicInteger;
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.fs.FileStatus;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.HDFSBlocksDistribution;
033import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
034import org.apache.hadoop.hbase.io.HFileLink;
035import org.apache.hadoop.hbase.io.HalfStoreFileReader;
036import org.apache.hadoop.hbase.io.Reference;
037import org.apache.hadoop.hbase.io.hfile.CacheConfig;
038import org.apache.hadoop.hbase.io.hfile.HFileInfo;
039import org.apache.hadoop.hbase.io.hfile.ReaderContext;
040import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType;
041import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder;
042import org.apache.hadoop.hbase.util.FSUtils;
043import org.apache.yetus.audience.InterfaceAudience;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047/**
048 * Describe a StoreFile (hfile, reference, link)
049 */
050@InterfaceAudience.Private
051public class StoreFileInfo {
052  private static final Logger LOG = LoggerFactory.getLogger(StoreFileInfo.class);
053
054  /**
055   * A non-capture group, for hfiles, so that this can be embedded.
056   * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
057   * The mob del file has (_del) as suffix.
058   */
059  public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?";
060
061  /** Regex that will work for hfiles */
062  private static final Pattern HFILE_NAME_PATTERN =
063    Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
064
065  /**
066   * A non-capture group, for del files, so that this can be embedded.
067   * A del file has (_del) as suffix.
068   */
069  public static final String DELFILE_NAME_REGEX = "[0-9a-f]+(?:_del)";
070
071  /** Regex that will work for del files */
072  private static final Pattern DELFILE_NAME_PATTERN =
073    Pattern.compile("^(" + DELFILE_NAME_REGEX + ")");
074
075  /**
076   * Regex that will work for straight reference names ({@code <hfile>.<parentEncRegion>})
077   * and hfilelink reference names ({@code <table>=<region>-<hfile>.<parentEncRegion>})
078   * If reference, then the regex has more than just one group.
079   * Group 1, hfile/hfilelink pattern, is this file's id.
080   * Group 2 '(.+)' is the reference's parent region name.
081   */
082  private static final Pattern REF_NAME_PATTERN =
083    Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
084      HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
085
086  public static final String STORE_FILE_READER_NO_READAHEAD = "hbase.store.reader.no-readahead";
087  public static final boolean DEFAULT_STORE_FILE_READER_NO_READAHEAD = false;
088
089  // Configuration
090  private final Configuration conf;
091
092  // FileSystem handle
093  private final FileSystem fs;
094
095  // HDFS blocks distribution information
096  private HDFSBlocksDistribution hdfsBlocksDistribution = null;
097
098  private HFileInfo hfileInfo;
099
100  // If this storefile references another, this is the reference instance.
101  private final Reference reference;
102
103  // If this storefile is a link to another, this is the link instance.
104  private final HFileLink link;
105
106  private final Path initialPath;
107
108  private RegionCoprocessorHost coprocessorHost;
109
110  // timestamp on when the file was created, is 0 and ignored for reference or link files
111  private long createdTimestamp;
112
113  private long size;
114
115  private final boolean primaryReplica;
116
117  private final boolean noReadahead;
118
119  // Counter that is incremented every time a scanner is created on the
120  // store file. It is decremented when the scan on the store file is
121  // done.
122  final AtomicInteger refCount = new AtomicInteger(0);
123
124  /**
125   * Create a Store File Info
126   * @param conf the {@link Configuration} to use
127   * @param fs The current file system to use.
128   * @param initialPath The {@link Path} of the file
129   * @param primaryReplica true if this is a store file for primary replica, otherwise false.
130   */
131  public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath,
132      final boolean primaryReplica) throws IOException {
133    this(conf, fs, null, initialPath, primaryReplica);
134  }
135
136  private StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
137      final Path initialPath, final boolean primaryReplica) throws IOException {
138    assert fs != null;
139    assert initialPath != null;
140    assert conf != null;
141
142    this.fs = fs;
143    this.conf = conf;
144    this.initialPath = initialPath;
145    this.primaryReplica = primaryReplica;
146    this.noReadahead = this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD,
147        DEFAULT_STORE_FILE_READER_NO_READAHEAD);
148    Path p = initialPath;
149    if (HFileLink.isHFileLink(p)) {
150      // HFileLink
151      this.reference = null;
152      this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
153      LOG.trace("{} is a link", p);
154    } else if (isReference(p)) {
155      this.reference = Reference.read(fs, p);
156      Path referencePath = getReferredToFile(p);
157      if (HFileLink.isHFileLink(referencePath)) {
158        // HFileLink Reference
159        this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
160      } else {
161        // Reference
162        this.link = null;
163      }
164      LOG.trace("{} is a {} reference to {}", p, reference.getFileRegion(), referencePath);
165    } else if (isHFile(p)) {
166      // HFile
167      if (fileStatus != null) {
168        this.createdTimestamp = fileStatus.getModificationTime();
169        this.size = fileStatus.getLen();
170      } else {
171        FileStatus fStatus = fs.getFileStatus(initialPath);
172        this.createdTimestamp = fStatus.getModificationTime();
173        this.size = fStatus.getLen();
174      }
175      this.reference = null;
176      this.link = null;
177    } else {
178      throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
179    }
180  }
181
182  /**
183   * Create a Store File Info
184   * @param conf the {@link Configuration} to use
185   * @param fs The current file system to use.
186   * @param fileStatus The {@link FileStatus} of the file
187   */
188  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
189      throws IOException {
190    this(conf, fs, fileStatus, fileStatus.getPath(), true);
191  }
192
193  /**
194   * Create a Store File Info from an HFileLink
195   * @param conf The {@link Configuration} to use
196   * @param fs The current file system to use
197   * @param fileStatus The {@link FileStatus} of the file
198   */
199  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
200      final HFileLink link) {
201    this(conf, fs, fileStatus, null, link);
202  }
203
204  /**
205   * Create a Store File Info from an HFileLink
206   * @param conf The {@link Configuration} to use
207   * @param fs The current file system to use
208   * @param fileStatus The {@link FileStatus} of the file
209   * @param reference The reference instance
210   */
211  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
212      final Reference reference) {
213    this(conf, fs, fileStatus, reference, null);
214  }
215
216  /**
217   * Create a Store File Info from an HFileLink and a Reference
218   * @param conf The {@link Configuration} to use
219   * @param fs The current file system to use
220   * @param fileStatus The {@link FileStatus} of the file
221   * @param reference The reference instance
222   * @param link The link instance
223   */
224  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
225      final Reference reference, final HFileLink link) {
226    this.fs = fs;
227    this.conf = conf;
228    this.primaryReplica = false;
229    this.initialPath = (fileStatus == null) ? null : fileStatus.getPath();
230    this.createdTimestamp = (fileStatus == null) ? 0 :fileStatus.getModificationTime();
231    this.reference = reference;
232    this.link = link;
233    this.noReadahead = this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD,
234        DEFAULT_STORE_FILE_READER_NO_READAHEAD);
235  }
236
237  /**
238   * Size of the Hfile
239   * @return size
240   */
241  public long getSize() {
242    return size;
243  }
244
245  /**
246   * Sets the region coprocessor env.
247   * @param coprocessorHost
248   */
249  public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
250    this.coprocessorHost = coprocessorHost;
251  }
252
253  /*
254   * @return the Reference object associated to this StoreFileInfo.
255   *         null if the StoreFile is not a reference.
256   */
257  public Reference getReference() {
258    return this.reference;
259  }
260
261  /** @return True if the store file is a Reference */
262  public boolean isReference() {
263    return this.reference != null;
264  }
265
266  /** @return True if the store file is a top Reference */
267  public boolean isTopReference() {
268    return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
269  }
270
271  /** @return True if the store file is a link */
272  public boolean isLink() {
273    return this.link != null && this.reference == null;
274  }
275
276  /** @return the HDFS block distribution */
277  public HDFSBlocksDistribution getHDFSBlockDistribution() {
278    return this.hdfsBlocksDistribution;
279  }
280
281  StoreFileReader createReader(ReaderContext context, CacheConfig cacheConf)
282      throws IOException {
283    StoreFileReader reader = null;
284    if (this.reference != null) {
285      reader = new HalfStoreFileReader(context, hfileInfo, cacheConf, reference, refCount, conf);
286    } else {
287      reader = new StoreFileReader(context, hfileInfo, cacheConf, refCount, conf);
288    }
289    return reader;
290  }
291
292  ReaderContext createReaderContext(boolean doDropBehind, long readahead, ReaderType type)
293      throws IOException {
294    FSDataInputStreamWrapper in;
295    FileStatus status;
296    if (this.link != null) {
297      // HFileLink
298      in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind, readahead);
299      status = this.link.getFileStatus(fs);
300    } else if (this.reference != null) {
301      // HFile Reference
302      Path referencePath = getReferredToFile(this.getPath());
303      try {
304        in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead);
305      } catch (FileNotFoundException fnfe) {
306        // Intercept the exception so can insert more info about the Reference; otherwise
307        // exception just complains about some random file -- operator doesn't realize it
308        // other end of a Reference
309        FileNotFoundException newFnfe = new FileNotFoundException(toString());
310        newFnfe.initCause(fnfe);
311        throw newFnfe;
312      }
313      status = fs.getFileStatus(referencePath);
314    } else {
315      in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead);
316      status = fs.getFileStatus(initialPath);
317    }
318    long length = status.getLen();
319    ReaderContextBuilder contextBuilder = new ReaderContextBuilder()
320        .withInputStreamWrapper(in)
321        .withFileSize(length)
322        .withPrimaryReplicaReader(this.primaryReplica)
323        .withReaderType(type)
324        .withFileSystem(fs);
325    if (this.reference != null) {
326      contextBuilder.withFilePath(this.getPath());
327    } else {
328      contextBuilder.withFilePath(status.getPath());
329    }
330    return contextBuilder.build();
331  }
332
333  /**
334   * Compute the HDFS Block Distribution for this StoreFile
335   */
336  public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
337      throws IOException {
338    // guard against the case where we get the FileStatus from link, but by the time we
339    // call compute the file is moved again
340    if (this.link != null) {
341      FileNotFoundException exToThrow = null;
342      for (int i = 0; i < this.link.getLocations().length; i++) {
343        try {
344          return computeHDFSBlocksDistributionInternal(fs);
345        } catch (FileNotFoundException ex) {
346          // try the other location
347          exToThrow = ex;
348        }
349      }
350      throw exToThrow;
351    } else {
352      return computeHDFSBlocksDistributionInternal(fs);
353    }
354  }
355
356  private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
357      throws IOException {
358    FileStatus status = getReferencedFileStatus(fs);
359    if (this.reference != null) {
360      return computeRefFileHDFSBlockDistribution(fs, reference, status);
361    } else {
362      return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
363    }
364  }
365
366  /**
367   * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
368   * @param fs The current file system to use.
369   * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
370   */
371  public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
372    FileStatus status;
373    if (this.reference != null) {
374      if (this.link != null) {
375        FileNotFoundException exToThrow = null;
376        for (int i = 0; i < this.link.getLocations().length; i++) {
377          // HFileLink Reference
378          try {
379            return link.getFileStatus(fs);
380          } catch (FileNotFoundException ex) {
381            // try the other location
382            exToThrow = ex;
383          }
384        }
385        throw exToThrow;
386      } else {
387        // HFile Reference
388        Path referencePath = getReferredToFile(this.getPath());
389        status = fs.getFileStatus(referencePath);
390      }
391    } else {
392      if (this.link != null) {
393        FileNotFoundException exToThrow = null;
394        for (int i = 0; i < this.link.getLocations().length; i++) {
395          // HFileLink
396          try {
397            return link.getFileStatus(fs);
398          } catch (FileNotFoundException ex) {
399            // try the other location
400            exToThrow = ex;
401          }
402        }
403        throw exToThrow;
404      } else {
405        status = fs.getFileStatus(initialPath);
406      }
407    }
408    return status;
409  }
410
411  /** @return The {@link Path} of the file */
412  public Path getPath() {
413    return initialPath;
414  }
415
416  /** @return The {@link FileStatus} of the file */
417  public FileStatus getFileStatus() throws IOException {
418    return getReferencedFileStatus(fs);
419  }
420
421  /** @return Get the modification time of the file. */
422  public long getModificationTime() throws IOException {
423    return getFileStatus().getModificationTime();
424  }
425
426  @Override
427  public String toString() {
428    return this.getPath() +
429      (isReference() ? "->" + getReferredToFile(this.getPath()) + "-" + reference : "");
430  }
431
432  /**
433   * @param path Path to check.
434   * @return True if the path has format of a HFile.
435   */
436  public static boolean isHFile(final Path path) {
437    return isHFile(path.getName());
438  }
439
440  public static boolean isHFile(final String fileName) {
441    Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
442    return m.matches() && m.groupCount() > 0;
443  }
444
445  /**
446   * @param path Path to check.
447   * @return True if the path has format of a del file.
448   */
449  public static boolean isDelFile(final Path path) {
450    return isDelFile(path.getName());
451  }
452
453  /**
454   * @param fileName Sting version of path to validate.
455   * @return True if the file name has format of a del file.
456   */
457  public static boolean isDelFile(final String fileName) {
458    Matcher m = DELFILE_NAME_PATTERN.matcher(fileName);
459    return m.matches() && m.groupCount() > 0;
460  }
461
462  /**
463   * @param path Path to check.
464   * @return True if the path has format of a HStoreFile reference.
465   */
466  public static boolean isReference(final Path path) {
467    return isReference(path.getName());
468  }
469
470  /**
471   * @param name file name to check.
472   * @return True if the path has format of a HStoreFile reference.
473   */
474  public static boolean isReference(final String name) {
475    Matcher m = REF_NAME_PATTERN.matcher(name);
476    return m.matches() && m.groupCount() > 1;
477  }
478
479  /**
480   * @return timestamp when this file was created (as returned by filesystem)
481   */
482  public long getCreatedTimestamp() {
483    return createdTimestamp;
484  }
485
486  /*
487   * Return path to the file referred to by a Reference.  Presumes a directory
488   * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
489   * @param p Path to a Reference file.
490   * @return Calculated path to parent region file.
491   * @throws IllegalArgumentException when path regex fails to match.
492   */
493  public static Path getReferredToFile(final Path p) {
494    Matcher m = REF_NAME_PATTERN.matcher(p.getName());
495    if (m == null || !m.matches()) {
496      LOG.warn("Failed match of store file name {}", p.toString());
497      throw new IllegalArgumentException("Failed match of store file name " +
498          p.toString());
499    }
500
501    // Other region name is suffix on the passed Reference file name
502    String otherRegion = m.group(2);
503    // Tabledir is up two directories from where Reference was written.
504    Path tableDir = p.getParent().getParent().getParent();
505    String nameStrippedOfSuffix = m.group(1);
506    LOG.trace("reference {} to region={} hfile={}", p, otherRegion, nameStrippedOfSuffix);
507
508    // Build up new path with the referenced region in place of our current
509    // region in the reference path.  Also strip regionname suffix from name.
510    return new Path(new Path(new Path(tableDir, otherRegion),
511      p.getParent().getName()), nameStrippedOfSuffix);
512  }
513
514  /**
515   * Validate the store file name.
516   * @param fileName name of the file to validate
517   * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
518   */
519  public static boolean validateStoreFileName(final String fileName) {
520    if (HFileLink.isHFileLink(fileName) || isReference(fileName))
521      return(true);
522    return !fileName.contains("-");
523  }
524
525  /**
526   * Return if the specified file is a valid store file or not.
527   * @param fileStatus The {@link FileStatus} of the file
528   * @return <tt>true</tt> if the file is valid
529   */
530  public static boolean isValid(final FileStatus fileStatus)
531      throws IOException {
532    final Path p = fileStatus.getPath();
533
534    if (fileStatus.isDirectory())
535      return false;
536
537    // Check for empty hfile. Should never be the case but can happen
538    // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
539    // NOTE: that the HFileLink is just a name, so it's an empty file.
540    if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
541      LOG.warn("Skipping {} because it is empty. HBASE-646 DATA LOSS?", p);
542      return false;
543    }
544
545    return validateStoreFileName(p.getName());
546  }
547
548  /**
549   * helper function to compute HDFS blocks distribution of a given reference
550   * file.For reference file, we don't compute the exact value. We use some
551   * estimate instead given it might be good enough. we assume bottom part
552   * takes the first half of reference file, top part takes the second half
553   * of the reference file. This is just estimate, given
554   * midkey ofregion != midkey of HFile, also the number and size of keys vary.
555   * If this estimate isn't good enough, we can improve it later.
556   * @param fs  The FileSystem
557   * @param reference  The reference
558   * @param status  The reference FileStatus
559   * @return HDFS blocks distribution
560   */
561  private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
562      final FileSystem fs, final Reference reference, final FileStatus status)
563      throws IOException {
564    if (status == null) {
565      return null;
566    }
567
568    long start = 0;
569    long length = 0;
570
571    if (Reference.isTopFileRegion(reference.getFileRegion())) {
572      start = status.getLen()/2;
573      length = status.getLen() - status.getLen()/2;
574    } else {
575      start = 0;
576      length = status.getLen()/2;
577    }
578    return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
579  }
580
581  @Override
582  public boolean equals(Object that) {
583    if (this == that) return true;
584    if (that == null) return false;
585
586    if (!(that instanceof StoreFileInfo)) return false;
587
588    StoreFileInfo o = (StoreFileInfo)that;
589    if (initialPath != null && o.initialPath == null) return false;
590    if (initialPath == null && o.initialPath != null) return false;
591    if (initialPath != o.initialPath && initialPath != null
592            && !initialPath.equals(o.initialPath)) return false;
593
594    if (reference != null && o.reference == null) return false;
595    if (reference == null && o.reference != null) return false;
596    if (reference != o.reference && reference != null
597            && !reference.equals(o.reference)) return false;
598
599    if (link != null && o.link == null) return false;
600    if (link == null && o.link != null) return false;
601    if (link != o.link && link != null && !link.equals(o.link)) return false;
602
603    return true;
604  };
605
606
607  @Override
608  public int hashCode() {
609    int hash = 17;
610    hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode());
611    hash = hash * 31 + ((initialPath ==  null) ? 0 : initialPath.hashCode());
612    hash = hash * 31 + ((link == null) ? 0 : link.hashCode());
613    return  hash;
614  }
615
616  /**
617   * Return the active file name that contains the real data.
618   * <p>
619   * For referenced hfile, we will return the name of the reference file as it will be used to
620   * construct the StoreFileReader. And for linked hfile, we will return the name of the file being
621   * linked.
622   */
623  public String getActiveFileName() {
624    if (reference != null || link == null) {
625      return initialPath.getName();
626    } else {
627      return HFileLink.getReferencedHFileName(initialPath.getName());
628    }
629  }
630
631  FileSystem getFileSystem() {
632    return this.fs;
633  }
634
635  Configuration getConf() {
636    return this.conf;
637  }
638
639  boolean isNoReadahead() {
640    return this.noReadahead;
641  }
642
643  HFileInfo getHFileInfo() {
644    return hfileInfo;
645  }
646
647  void initHDFSBlocksDistribution() throws IOException {
648    hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
649  }
650
651  StoreFileReader preStoreFileReaderOpen(ReaderContext context, CacheConfig cacheConf)
652      throws IOException {
653    StoreFileReader reader = null;
654    if (this.coprocessorHost != null) {
655      reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(),
656          context.getInputStreamWrapper(), context.getFileSize(),
657          cacheConf, reference);
658    }
659    return reader;
660  }
661
662  StoreFileReader postStoreFileReaderOpen(ReaderContext context, CacheConfig cacheConf,
663      StoreFileReader reader) throws IOException {
664    StoreFileReader res = reader;
665    if (this.coprocessorHost != null) {
666      res = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(),
667          context.getInputStreamWrapper(), context.getFileSize(),
668          cacheConf, reference, reader);
669    }
670    return res;
671  }
672
673  public void initHFileInfo(ReaderContext context) throws IOException {
674    this.hfileInfo = new HFileInfo(context, conf);
675  }
676}