001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.FileNotFoundException;
021import java.io.IOException;
022import java.util.OptionalLong;
023import java.util.concurrent.atomic.AtomicInteger;
024import java.util.regex.Matcher;
025import java.util.regex.Pattern;
026import org.apache.hadoop.conf.Configurable;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.FileStatus;
029import org.apache.hadoop.fs.FileSystem;
030import org.apache.hadoop.fs.Path;
031import org.apache.hadoop.hbase.HDFSBlocksDistribution;
032import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
033import org.apache.hadoop.hbase.io.HFileLink;
034import org.apache.hadoop.hbase.io.HalfStoreFileReader;
035import org.apache.hadoop.hbase.io.Reference;
036import org.apache.hadoop.hbase.io.hfile.CacheConfig;
037import org.apache.hadoop.hbase.io.hfile.HFileInfo;
038import org.apache.hadoop.hbase.io.hfile.InvalidHFileException;
039import org.apache.hadoop.hbase.io.hfile.ReaderContext;
040import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType;
041import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder;
042import org.apache.hadoop.hbase.keymeta.ManagedKeyDataCache;
043import org.apache.hadoop.hbase.keymeta.SystemKeyCache;
044import org.apache.hadoop.hbase.mob.MobUtils;
045import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
046import org.apache.hadoop.hbase.util.FSUtils;
047import org.apache.hadoop.hbase.util.Pair;
048import org.apache.yetus.audience.InterfaceAudience;
049import org.slf4j.Logger;
050import org.slf4j.LoggerFactory;
051
052/**
053 * Describe a StoreFile (hfile, reference, link)
054 */
055@InterfaceAudience.Private
056public class StoreFileInfo implements Configurable {
057  private static final Logger LOG = LoggerFactory.getLogger(StoreFileInfo.class);
058
059  /**
060   * A non-capture group, for hfiles, so that this can be embedded. HFiles are uuid ([0-9a-z]+).
061   * Bulk loaded hfiles have (_SeqId_[0-9]+_) as a suffix. The mob del file has (_del) as a suffix.
062   */
063  public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?";
064
065  /** Regex that will work for hfiles */
066  private static final Pattern HFILE_NAME_PATTERN = Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
067
068  /**
069   * Regex that will work for straight reference names ({@code <hfile>.<parentEncRegion>}) and
070   * hfilelink reference names ({@code
071   *
072  <table>
073   * =<region>-<hfile>.<parentEncRegion>}). If reference, then the regex has more than just one
074   * group. Group 1, hfile/hfilelink pattern, is this file's id. Group 2 '(.+)' is the reference's
075   * parent region name.
076   */
077  private static final Pattern REF_NAME_PATTERN =
078    Pattern.compile(String.format("^(%s|%s)\\.(.+)$", HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
079
080  public static final String STORE_FILE_READER_NO_READAHEAD = "hbase.store.reader.no-readahead";
081  public static final boolean DEFAULT_STORE_FILE_READER_NO_READAHEAD = true;
082
083  // Configuration
084  private Configuration conf;
085
086  // FileSystem handle
087  private final FileSystem fs;
088
089  // HDFS blocks distribution information
090  private HDFSBlocksDistribution hdfsBlocksDistribution = null;
091
092  private HFileInfo hfileInfo;
093
094  // If this storefile references another, this is the reference instance.
095  private final Reference reference;
096
097  // If this storefile is a link to another, this is the link instance.
098  private final HFileLink link;
099
100  private final Path initialPath;
101
102  private RegionCoprocessorHost coprocessorHost;
103
104  // timestamp on when the file was created, is 0 and ignored for reference or link files
105  private long createdTimestamp;
106
107  private long size;
108
109  private final boolean primaryReplica;
110
111  private final boolean noReadahead;
112
113  // Counter that is incremented every time a scanner is created on the
114  // store file. It is decremented when the scan on the store file is
115  // done.
116  private final AtomicInteger refCount = new AtomicInteger(0);
117
118  private StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
119    final Path initialPath, final boolean primaryReplica, final StoreFileTracker sft)
120    throws IOException {
121    assert fs != null;
122    assert initialPath != null;
123    assert conf != null;
124
125    this.fs = fs;
126    this.conf = conf;
127    this.initialPath = fs.makeQualified(initialPath);
128    this.primaryReplica = primaryReplica;
129    this.noReadahead =
130      this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD);
131    Path p = initialPath;
132    if (HFileLink.isHFileLink(p)) {
133      // HFileLink
134      this.reference = null;
135      this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
136      LOG.trace("{} is a link", p);
137    } else if (isReference(p)) {
138      this.reference = sft.readReference(p);
139      Path referencePath = getReferredToFile(p);
140      if (HFileLink.isHFileLink(referencePath)) {
141        // HFileLink Reference
142        this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
143      } else {
144        // Reference
145        this.link = null;
146      }
147      LOG.trace("{} is a {} reference to {}", p, reference.getFileRegion(), referencePath);
148    } else if (isHFile(p) || isMobFile(p) || isMobRefFile(p)) {
149      // HFile
150      if (fileStatus != null) {
151        this.createdTimestamp = fileStatus.getModificationTime();
152        this.size = fileStatus.getLen();
153      } else {
154        FileStatus fStatus = fs.getFileStatus(initialPath);
155        this.createdTimestamp = fStatus.getModificationTime();
156        this.size = fStatus.getLen();
157      }
158      this.reference = null;
159      this.link = null;
160    } else {
161      throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
162    }
163  }
164
165  /**
166   * Create a Store File Info from an HFileLink
167   * @param conf       The {@link Configuration} to use
168   * @param fs         The current file system to use
169   * @param fileStatus The {@link FileStatus} of the file
170   */
171  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
172    final HFileLink link) {
173    this(conf, fs, fileStatus, null, link);
174  }
175
176  /**
177   * Create a Store File Info from an HFileLink
178   * @param conf       The {@link Configuration} to use
179   * @param fs         The current file system to use
180   * @param fileStatus The {@link FileStatus} of the file
181   */
182  public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initiaPath,
183    final HFileLink link) {
184    this(conf, fs, initiaPath, null, link);
185  }
186
187  /**
188   * Create a Store File Info from an HFileLink
189   * @param conf       The {@link Configuration} to use
190   * @param fs         The current file system to use
191   * @param fileStatus The {@link FileStatus} of the file
192   * @param reference  The reference instance
193   */
194  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
195    final Reference reference) {
196    this(conf, fs, fileStatus, reference, null);
197  }
198
199  /**
200   * Create a Store File Info from an HFileLink
201   * @param conf       The {@link Configuration} to use
202   * @param fs         The current file system to use
203   * @param fileStatus The {@link FileStatus} of the file
204   * @param reference  The reference instance
205   */
206  public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath,
207    final Reference reference) {
208    this(conf, fs, initialPath, reference, null);
209  }
210
211  /**
212   * Create a Store File Info from an HFileLink and a Reference
213   * @param conf       The {@link Configuration} to use
214   * @param fs         The current file system to use
215   * @param fileStatus The {@link FileStatus} of the file
216   * @param reference  The reference instance
217   * @param link       The link instance
218   */
219  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
220    final Reference reference, final HFileLink link) {
221    this.fs = fs;
222    this.conf = conf;
223    this.primaryReplica = false;
224    this.initialPath = (fileStatus == null) ? null : fileStatus.getPath();
225    this.createdTimestamp = (fileStatus == null) ? 0 : fileStatus.getModificationTime();
226    this.reference = reference;
227    this.link = link;
228    this.noReadahead =
229      this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD);
230  }
231
232  /**
233   * Create a Store File Info from an HFileLink and a Reference
234   * @param conf       The {@link Configuration} to use
235   * @param fs         The current file system to use
236   * @param fileStatus The {@link FileStatus} of the file
237   * @param reference  The reference instance
238   * @param link       The link instance
239   */
240  public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path path,
241    final Reference reference, final HFileLink link) {
242    this.fs = fs;
243    this.conf = conf;
244    this.primaryReplica = false;
245    this.initialPath = path;
246    this.reference = reference;
247    this.link = link;
248    this.noReadahead =
249      this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD);
250  }
251
252  /**
253   * Create a Store File Info from an HFileLink and a Reference
254   * @param conf       The {@link Configuration} to use
255   * @param fs         The current file system to use
256   * @param fileStatus The {@link FileStatus} of the file
257   * @param reference  The reference instance
258   * @param link       The link instance
259   */
260  public StoreFileInfo(final Configuration conf, final FileSystem fs, final long createdTimestamp,
261    final Path initialPath, final long size, final Reference reference, final HFileLink link,
262    final boolean primaryReplica) {
263    this.fs = fs;
264    this.conf = conf;
265    this.primaryReplica = primaryReplica;
266    this.initialPath = initialPath;
267    this.createdTimestamp = createdTimestamp;
268    this.size = size;
269    this.reference = reference;
270    this.link = link;
271    this.noReadahead =
272      this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD);
273  }
274
275  public HFileLink getLink() {
276    return link;
277  }
278
279  @Override
280  public Configuration getConf() {
281    return conf;
282  }
283
284  @Override
285  public void setConf(Configuration conf) {
286    this.conf = conf;
287  }
288
289  /**
290   * Size of the Hfile
291   */
292  public long getSize() {
293    return size;
294  }
295
296  /**
297   * Sets the region coprocessor env.
298   */
299  public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
300    this.coprocessorHost = coprocessorHost;
301  }
302
303  /**
304   * @return the Reference object associated to this StoreFileInfo. null if the StoreFile is not a
305   *         reference.
306   */
307  public Reference getReference() {
308    return this.reference;
309  }
310
311  /** Returns True if the store file is a Reference */
312  public boolean isReference() {
313    return this.reference != null;
314  }
315
316  /** Returns True if the store file is a top Reference */
317  public boolean isTopReference() {
318    return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
319  }
320
321  /** Returns True if the store file is a link */
322  public boolean isLink() {
323    return this.link != null && this.reference == null;
324  }
325
326  /** Returns the HDFS block distribution */
327  public HDFSBlocksDistribution getHDFSBlockDistribution() {
328    return this.hdfsBlocksDistribution;
329  }
330
331  public StoreFileReader createReader(ReaderContext context, CacheConfig cacheConf)
332    throws IOException {
333    StoreFileReader reader = null;
334    if (this.reference != null) {
335      reader = new HalfStoreFileReader(context, hfileInfo, cacheConf, reference, this, conf);
336    } else {
337      reader = new StoreFileReader(context, hfileInfo, cacheConf, this, conf);
338    }
339    return reader;
340  }
341
342  ReaderContext createReaderContext(boolean doDropBehind, long readahead, ReaderType type,
343    String keyNamespace, SystemKeyCache systemKeyCache, ManagedKeyDataCache managedKeyDataCache)
344    throws IOException {
345    FSDataInputStreamWrapper in;
346    FileStatus status;
347    if (this.link != null) {
348      // HFileLink
349      in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind, readahead);
350      status = this.link.getFileStatus(fs);
351    } else if (this.reference != null) {
352      // HFile Reference
353      Path referencePath = getReferredToFile(this.getPath());
354      try {
355        in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead);
356      } catch (FileNotFoundException fnfe) {
357        // Intercept the exception so can insert more info about the Reference; otherwise
358        // exception just complains about some random file -- operator doesn't realize it
359        // other end of a Reference
360        FileNotFoundException newFnfe = new FileNotFoundException(toString());
361        newFnfe.initCause(fnfe);
362        throw newFnfe;
363      }
364      status = fs.getFileStatus(referencePath);
365    } else {
366      in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead);
367      status = fs.getFileStatus(initialPath);
368    }
369    long length = status.getLen();
370    ReaderContextBuilder contextBuilder =
371      new ReaderContextBuilder().withInputStreamWrapper(in).withFileSize(length)
372        .withPrimaryReplicaReader(this.primaryReplica).withReaderType(type).withFileSystem(fs)
373        .withSystemKeyCache(systemKeyCache).withManagedKeyDataCache(managedKeyDataCache);
374    if (this.reference != null) {
375      contextBuilder.withFilePath(this.getPath());
376    } else {
377      contextBuilder.withFilePath(status.getPath());
378    }
379    return contextBuilder.build();
380  }
381
382  /**
383   * Compute the HDFS Block Distribution for this StoreFile
384   */
385  public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
386    throws IOException {
387    // guard against the case where we get the FileStatus from link, but by the time we
388    // call compute the file is moved again
389    if (this.link != null) {
390      FileNotFoundException exToThrow = null;
391      for (int i = 0; i < this.link.getLocations().length; i++) {
392        try {
393          return computeHDFSBlocksDistributionInternal(fs);
394        } catch (FileNotFoundException ex) {
395          // try the other location
396          exToThrow = ex;
397        }
398      }
399      throw exToThrow;
400    } else {
401      return computeHDFSBlocksDistributionInternal(fs);
402    }
403  }
404
405  private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
406    throws IOException {
407    FileStatus status = getReferencedFileStatus(fs);
408    if (this.reference != null) {
409      return computeRefFileHDFSBlockDistribution(fs, reference, status);
410    } else {
411      return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
412    }
413  }
414
415  /**
416   * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
417   * @param fs The current file system to use.
418   * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
419   */
420  public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
421    FileStatus status;
422    if (this.reference != null) {
423      if (this.link != null) {
424        FileNotFoundException exToThrow = null;
425        for (int i = 0; i < this.link.getLocations().length; i++) {
426          // HFileLink Reference
427          try {
428            return link.getFileStatus(fs);
429          } catch (FileNotFoundException ex) {
430            // try the other location
431            exToThrow = ex;
432          }
433        }
434        throw exToThrow;
435      } else {
436        // HFile Reference
437        Path referencePath = getReferredToFile(this.getPath());
438        status = fs.getFileStatus(referencePath);
439      }
440    } else {
441      if (this.link != null) {
442        FileNotFoundException exToThrow = null;
443        for (int i = 0; i < this.link.getLocations().length; i++) {
444          // HFileLink
445          try {
446            return link.getFileStatus(fs);
447          } catch (FileNotFoundException ex) {
448            // try the other location
449            exToThrow = ex;
450          }
451        }
452        throw exToThrow;
453      } else {
454        status = fs.getFileStatus(initialPath);
455      }
456    }
457    return status;
458  }
459
460  /** Returns The {@link Path} of the file */
461  public Path getPath() {
462    return initialPath;
463  }
464
465  /** Returns The {@link FileStatus} of the file */
466  public FileStatus getFileStatus() throws IOException {
467    return getReferencedFileStatus(fs);
468  }
469
470  /** Returns Get the modification time of the file. */
471  public long getModificationTime() throws IOException {
472    return getFileStatus().getModificationTime();
473  }
474
475  @Override
476  public String toString() {
477    return this.getPath()
478      + (isReference() ? "->" + getReferredToFile(this.getPath()) + "-" + reference : "");
479  }
480
481  /**
482   * Cells in a bulkloaded file don't have a sequenceId since they don't go through memstore. When a
483   * bulkload file is committed, the current memstore ts is stamped onto the file name as the
484   * sequenceId of the file. At read time, the sequenceId is copied onto all of the cells returned
485   * so that they can be properly sorted relative to other cells in other files. Further, when
486   * opening multiple files for scan, the sequence id is used to ensusre that the bulkload file's
487   * scanner is porperly sorted amongst the other scanners. Non-bulkloaded files get their
488   * sequenceId from the MAX_MEMSTORE_TS_KEY since those go through the memstore and have true
489   * sequenceIds.
490   */
491  private static final String SEQ_ID_MARKER = "_SeqId_";
492  private static final int SEQ_ID_MARKER_LENGTH = SEQ_ID_MARKER.length();
493
494  /**
495   * @see #SEQ_ID_MARKER
496   * @return True if the file name looks like a bulkloaded file, based on the presence of the SeqId
497   *         marker added to those files.
498   */
499  public static boolean hasBulkloadSeqId(final Path path) {
500    String fileName = path.getName();
501    return fileName.contains(SEQ_ID_MARKER);
502  }
503
504  /**
505   * @see #SEQ_ID_MARKER
506   * @return If the path is a properly named bulkloaded file, returns the sequence id stamped at the
507   *         end of the file name.
508   */
509  public static OptionalLong getBulkloadSeqId(final Path path) {
510    String fileName = path.getName();
511    int startPos = fileName.indexOf(SEQ_ID_MARKER);
512    if (startPos != -1) {
513      String strVal = fileName.substring(startPos + SEQ_ID_MARKER_LENGTH,
514        fileName.indexOf('_', startPos + SEQ_ID_MARKER_LENGTH));
515      return OptionalLong.of(Long.parseLong(strVal));
516    }
517    return OptionalLong.empty();
518  }
519
520  /**
521   * @see #SEQ_ID_MARKER
522   * @return A string value for appending to the end of a bulkloaded file name, containing the
523   *         properly formatted SeqId marker.
524   */
525  public static String formatBulkloadSeqId(long seqId) {
526    return SEQ_ID_MARKER + seqId + "_";
527  }
528
529  /**
530   * @param path Path to check.
531   * @return True if the path has format of a HFile.
532   */
533  public static boolean isHFile(final Path path) {
534    return isHFile(path.getName());
535  }
536
537  public static boolean isHFile(final String fileName) {
538    Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
539    return m.matches() && m.groupCount() > 0;
540  }
541
542  /**
543   * Checks if the file is a MOB file
544   * @param path path to a file
545   * @return true, if - yes, false otherwise
546   */
547  public static boolean isMobFile(final Path path) {
548    String fileName = path.getName();
549    String[] parts = fileName.split(MobUtils.SEP);
550    if (parts.length != 2) {
551      return false;
552    }
553    Matcher m = HFILE_NAME_PATTERN.matcher(parts[0]);
554    Matcher mm = HFILE_NAME_PATTERN.matcher(parts[1]);
555    return m.matches() && mm.matches();
556  }
557
558  /**
559   * Checks if the file is a MOB reference file, created by snapshot
560   * @param path path to a file
561   * @return true, if - yes, false otherwise
562   */
563  public static boolean isMobRefFile(final Path path) {
564    String fileName = path.getName();
565    int lastIndex = fileName.lastIndexOf(MobUtils.SEP);
566    if (lastIndex < 0) {
567      return false;
568    }
569    String[] parts = new String[2];
570    parts[0] = fileName.substring(0, lastIndex);
571    parts[1] = fileName.substring(lastIndex + 1);
572    String name = parts[0] + "." + parts[1];
573    Matcher m = REF_NAME_PATTERN.matcher(name);
574    return m.matches() && m.groupCount() > 1;
575  }
576
577  /**
578   * @param path Path to check.
579   * @return True if the path has format of a HStoreFile reference.
580   */
581  public static boolean isReference(final Path path) {
582    return isReference(path.getName());
583  }
584
585  /**
586   * @param name file name to check.
587   * @return True if the path has format of a HStoreFile reference.
588   */
589  public static boolean isReference(final String name) {
590    // The REF_NAME_PATTERN regex is not computationally trivial, so see if we can fast-fail
591    // on a simple heuristic first. The regex contains a literal ".", so if that character
592    // isn't in the name, then the regex cannot match.
593    if (!name.contains(".")) {
594      return false;
595    }
596
597    Matcher m = REF_NAME_PATTERN.matcher(name);
598    return m.matches() && m.groupCount() > 1;
599  }
600
601  /** Returns timestamp when this file was created (as returned by filesystem) */
602  public long getCreatedTimestamp() {
603    return createdTimestamp;
604  }
605
606  /*
607   * Return path to the file referred to by a Reference. Presumes a directory hierarchy of
608   * <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
609   * @param p Path to a Reference file.
610   * @return Calculated path to parent region file.
611   * @throws IllegalArgumentException when path regex fails to match.
612   */
613  public static Path getReferredToFile(final Path p) {
614    Matcher m = REF_NAME_PATTERN.matcher(p.getName());
615    if (m == null || !m.matches()) {
616      LOG.warn("Failed match of store file name {}", p.toString());
617      throw new IllegalArgumentException("Failed match of store file name " + p.toString());
618    }
619
620    // Other region name is suffix on the passed Reference file name
621    String otherRegion = m.group(2);
622    // Tabledir is up two directories from where Reference was written.
623    Path tableDir = p.getParent().getParent().getParent();
624    String nameStrippedOfSuffix = m.group(1);
625    LOG.trace("reference {} to region={} hfile={}", p, otherRegion, nameStrippedOfSuffix);
626
627    // Build up new path with the referenced region in place of our current
628    // region in the reference path. Also strip regionname suffix from name.
629    return new Path(new Path(new Path(tableDir, otherRegion), p.getParent().getName()),
630      nameStrippedOfSuffix);
631  }
632
633  /*
634   * Return region and file name referred to by a Reference.
635   * @param referenceFile HFile name which is a Reference.
636   * @return Calculated referenced region and file name.
637   * @throws IllegalArgumentException when referenceFile regex fails to match.
638   */
639  public static Pair<String, String> getReferredToRegionAndFile(final String referenceFile) {
640    Matcher m = REF_NAME_PATTERN.matcher(referenceFile);
641    if (m == null || !m.matches()) {
642      LOG.warn("Failed match of store file name {}", referenceFile);
643      throw new IllegalArgumentException("Failed match of store file name " + referenceFile);
644    }
645    String referencedRegion = m.group(2);
646    String referencedFile = m.group(1);
647    LOG.trace("reference {} to region={} file={}", referenceFile, referencedRegion, referencedFile);
648    return new Pair<>(referencedRegion, referencedFile);
649  }
650
651  /**
652   * Validate the store file name.
653   * @param fileName name of the file to validate
654   * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
655   */
656  public static boolean validateStoreFileName(final String fileName) {
657    if (HFileLink.isHFileLink(fileName) || isReference(fileName) || isMobFileLink(fileName)) {
658      return true;
659    }
660    return !fileName.contains("-");
661  }
662
663  public static boolean isMobFileLink(String fileName) {
664    Matcher m = HFileLink.REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
665    return m.matches() && !isReference(fileName);
666  }
667
668  /**
669   * Return if the specified file is a valid store file or not.
670   * @param fileStatus The {@link FileStatus} of the file
671   * @return <tt>true</tt> if the file is valid
672   */
673  public static boolean isValid(final FileStatus fileStatus) throws IOException {
674    final Path p = fileStatus.getPath();
675
676    if (fileStatus.isDirectory()) {
677      return false;
678    }
679
680    // Check for empty hfile. Should never be the case but can happen
681    // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
682    // NOTE: that the HFileLink is just a name, so it's an empty file.
683    if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0 && !isMobFileLink(p.getName())) {
684      LOG.warn("Skipping {} because it is empty. HBASE-646 DATA LOSS?", p);
685      return false;
686    }
687
688    return validateStoreFileName(p.getName());
689  }
690
691  /**
692   * helper function to compute HDFS blocks distribution of a given reference file.For reference
693   * file, we don't compute the exact value. We use some estimate instead given it might be good
694   * enough. we assume bottom part takes the first half of reference file, top part takes the second
695   * half of the reference file. This is just estimate, given midkey ofregion != midkey of HFile,
696   * also the number and size of keys vary. If this estimate isn't good enough, we can improve it
697   * later.
698   * @param fs        The FileSystem
699   * @param reference The reference
700   * @param status    The reference FileStatus
701   * @return HDFS blocks distribution
702   */
703  private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(final FileSystem fs,
704    final Reference reference, final FileStatus status) throws IOException {
705    if (status == null) {
706      return null;
707    }
708
709    long start = 0;
710    long length = 0;
711
712    if (Reference.isTopFileRegion(reference.getFileRegion())) {
713      start = status.getLen() / 2;
714      length = status.getLen() - status.getLen() / 2;
715    } else {
716      start = 0;
717      length = status.getLen() / 2;
718    }
719    return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
720  }
721
722  @Override
723  public boolean equals(Object that) {
724    if (this == that) {
725      return true;
726    }
727    if (that == null) {
728      return false;
729    }
730
731    if (!(that instanceof StoreFileInfo)) {
732      return false;
733    }
734
735    StoreFileInfo o = (StoreFileInfo) that;
736    if (initialPath != null && o.initialPath == null) {
737      return false;
738    }
739    if (initialPath == null && o.initialPath != null) {
740      return false;
741    }
742    if (initialPath != o.initialPath && initialPath != null && !initialPath.equals(o.initialPath)) {
743      return false;
744    }
745    if (reference != null && o.reference == null) {
746      return false;
747    }
748    if (reference == null && o.reference != null) {
749      return false;
750    }
751    if (reference != o.reference && reference != null && !reference.equals(o.reference)) {
752      return false;
753    }
754
755    if (link != null && o.link == null) {
756      return false;
757    }
758    if (link == null && o.link != null) {
759      return false;
760    }
761    if (link != o.link && link != null && !link.equals(o.link)) {
762      return false;
763    }
764
765    return true;
766  }
767
768  @Override
769  public int hashCode() {
770    int hash = 17;
771    hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode());
772    hash = hash * 31 + ((initialPath == null) ? 0 : initialPath.hashCode());
773    hash = hash * 31 + ((link == null) ? 0 : link.hashCode());
774    return hash;
775  }
776
777  /**
778   * Return the active file name that contains the real data.
779   * <p>
780   * For referenced hfile, we will return the name of the reference file as it will be used to
781   * construct the StoreFileReader. And for linked hfile, we will return the name of the file being
782   * linked.
783   */
784  public String getActiveFileName() {
785    if (reference != null || link == null) {
786      return initialPath.getName();
787    } else {
788      return HFileLink.getReferencedHFileName(initialPath.getName());
789    }
790  }
791
792  public FileSystem getFileSystem() {
793    return this.fs;
794  }
795
796  boolean isNoReadahead() {
797    return this.noReadahead;
798  }
799
800  public HFileInfo getHFileInfo() {
801    return hfileInfo;
802  }
803
804  void initHDFSBlocksDistribution() throws IOException {
805    hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
806  }
807
808  StoreFileReader preStoreFileReaderOpen(ReaderContext context, CacheConfig cacheConf)
809    throws IOException {
810    StoreFileReader reader = null;
811    if (this.coprocessorHost != null) {
812      reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(),
813        context.getInputStreamWrapper(), context.getFileSize(), cacheConf, reference);
814    }
815    return reader;
816  }
817
818  StoreFileReader postStoreFileReaderOpen(ReaderContext context, CacheConfig cacheConf,
819    StoreFileReader reader) throws IOException {
820    StoreFileReader res = reader;
821    if (this.coprocessorHost != null) {
822      res = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(),
823        context.getInputStreamWrapper(), context.getFileSize(), cacheConf, reference, reader);
824    }
825    return res;
826  }
827
828  public void initHFileInfo(ReaderContext context) throws IOException {
829    this.hfileInfo = new HFileInfo(context, conf);
830  }
831
832  int getRefCount() {
833    return this.refCount.get();
834  }
835
836  int increaseRefCount() {
837    return this.refCount.incrementAndGet();
838  }
839
840  int decreaseRefCount() {
841    return this.refCount.decrementAndGet();
842  }
843
844  public static StoreFileInfo createStoreFileInfoForHFile(final Configuration conf,
845    final FileSystem fs, final Path initialPath, final boolean primaryReplica) throws IOException {
846    if (HFileLink.isHFileLink(initialPath) || isReference(initialPath)) {
847      throw new InvalidHFileException("Path " + initialPath + " is a Hfile link or a Regerence");
848    }
849    StoreFileInfo storeFileInfo =
850      new StoreFileInfo(conf, fs, null, initialPath, primaryReplica, null);
851    return storeFileInfo;
852  }
853
854}