001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.regionserver;
021
022import java.io.FileNotFoundException;
023import java.io.IOException;
024import java.util.concurrent.atomic.AtomicInteger;
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.fs.FileStatus;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.HDFSBlocksDistribution;
033import org.apache.yetus.audience.InterfaceAudience;
034import org.slf4j.Logger;
035import org.slf4j.LoggerFactory;
036import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
037import org.apache.hadoop.hbase.io.HFileLink;
038import org.apache.hadoop.hbase.io.HalfStoreFileReader;
039import org.apache.hadoop.hbase.io.Reference;
040import org.apache.hadoop.hbase.io.hfile.CacheConfig;
041import org.apache.hadoop.hbase.util.FSUtils;
042
043/**
044 * Describe a StoreFile (hfile, reference, link)
045 */
046@InterfaceAudience.Private
047public class StoreFileInfo {
048  private static final Logger LOG = LoggerFactory.getLogger(StoreFileInfo.class);
049
050  /**
051   * A non-capture group, for hfiles, so that this can be embedded.
052   * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix.
053   * The mob del file has (_del) as suffix.
054   */
055  public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?";
056
057  /** Regex that will work for hfiles */
058  private static final Pattern HFILE_NAME_PATTERN =
059    Pattern.compile("^(" + HFILE_NAME_REGEX + ")");
060
061  /**
062   * A non-capture group, for del files, so that this can be embedded.
063   * A del file has (_del) as suffix.
064   */
065  public static final String DELFILE_NAME_REGEX = "[0-9a-f]+(?:_del)";
066
067  /** Regex that will work for del files */
068  private static final Pattern DELFILE_NAME_PATTERN =
069    Pattern.compile("^(" + DELFILE_NAME_REGEX + ")");
070
071  /**
072   * Regex that will work for straight reference names ({@code <hfile>.<parentEncRegion>})
073   * and hfilelink reference names ({@code <table>=<region>-<hfile>.<parentEncRegion>})
074   * If reference, then the regex has more than just one group.
075   * Group 1, hfile/hfilelink pattern, is this file's id.
076   * Group 2 '(.+)' is the reference's parent region name.
077   */
078  private static final Pattern REF_NAME_PATTERN =
079    Pattern.compile(String.format("^(%s|%s)\\.(.+)$",
080      HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX));
081
082  // Configuration
083  private Configuration conf;
084
085  // FileSystem handle
086  private final FileSystem fs;
087
088  // HDFS blocks distribution information
089  private HDFSBlocksDistribution hdfsBlocksDistribution = null;
090
091  // If this storefile references another, this is the reference instance.
092  private final Reference reference;
093
094  // If this storefile is a link to another, this is the link instance.
095  private final HFileLink link;
096
097  private final Path initialPath;
098
099  private RegionCoprocessorHost coprocessorHost;
100
101  // timestamp on when the file was created, is 0 and ignored for reference or link files
102  private long createdTimestamp;
103
104  /**
105   * Create a Store File Info
106   * @param conf the {@link Configuration} to use
107   * @param fs The current file system to use.
108   * @param initialPath The {@link Path} of the file
109   */
110  public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath)
111      throws IOException {
112    assert fs != null;
113    assert initialPath != null;
114    assert conf != null;
115
116    this.fs = fs;
117    this.conf = conf;
118    this.initialPath = initialPath;
119    Path p = initialPath;
120    if (HFileLink.isHFileLink(p)) {
121      // HFileLink
122      this.reference = null;
123      this.link = HFileLink.buildFromHFileLinkPattern(conf, p);
124      if (LOG.isTraceEnabled()) LOG.trace(p + " is a link");
125    } else if (isReference(p)) {
126      this.reference = Reference.read(fs, p);
127      Path referencePath = getReferredToFile(p);
128      if (HFileLink.isHFileLink(referencePath)) {
129        // HFileLink Reference
130        this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath);
131      } else {
132        // Reference
133        this.link = null;
134      }
135      if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() +
136              " reference to " + referencePath);
137    } else if (isHFile(p)) {
138      // HFile
139      this.createdTimestamp = fs.getFileStatus(initialPath).getModificationTime();
140      this.reference = null;
141      this.link = null;
142    } else {
143      throw new IOException("path=" + p + " doesn't look like a valid StoreFile");
144    }
145  }
146
147  /**
148   * Create a Store File Info
149   * @param conf the {@link Configuration} to use
150   * @param fs The current file system to use.
151   * @param fileStatus The {@link FileStatus} of the file
152   */
153  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus)
154      throws IOException {
155    this(conf, fs, fileStatus.getPath());
156  }
157
158  /**
159   * Create a Store File Info from an HFileLink
160   * @param conf the {@link Configuration} to use
161   * @param fs The current file system to use.
162   * @param fileStatus The {@link FileStatus} of the file
163   */
164  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
165      final HFileLink link)
166      throws IOException {
167    this.fs = fs;
168    this.conf = conf;
169    // initialPath can be null only if we get a link.
170    this.initialPath = (fileStatus == null) ? null : fileStatus.getPath();
171      // HFileLink
172    this.reference = null;
173    this.link = link;
174  }
175
176  /**
177   * Create a Store File Info from an HFileLink
178   * @param conf
179   * @param fs
180   * @param fileStatus
181   * @param reference
182   * @throws IOException
183   */
184  public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus,
185      final Reference reference)
186      throws IOException {
187    this.fs = fs;
188    this.conf = conf;
189    this.initialPath = fileStatus.getPath();
190    this.createdTimestamp = fileStatus.getModificationTime();
191    this.reference = reference;
192    this.link = null;
193  }
194
195  /**
196   * Sets the region coprocessor env.
197   * @param coprocessorHost
198   */
199  public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) {
200    this.coprocessorHost = coprocessorHost;
201  }
202
203  /*
204   * @return the Reference object associated to this StoreFileInfo.
205   *         null if the StoreFile is not a reference.
206   */
207  public Reference getReference() {
208    return this.reference;
209  }
210
211  /** @return True if the store file is a Reference */
212  public boolean isReference() {
213    return this.reference != null;
214  }
215
216  /** @return True if the store file is a top Reference */
217  public boolean isTopReference() {
218    return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion());
219  }
220
221  /** @return True if the store file is a link */
222  public boolean isLink() {
223    return this.link != null && this.reference == null;
224  }
225
226  /** @return the HDFS block distribution */
227  public HDFSBlocksDistribution getHDFSBlockDistribution() {
228    return this.hdfsBlocksDistribution;
229  }
230
231  /**
232   * Open a Reader for the StoreFile
233   * @param fs The current file system to use.
234   * @param cacheConf The cache configuration and block cache reference.
235   * @return The StoreFile.Reader for the file
236   */
237  public StoreFileReader open(FileSystem fs, CacheConfig cacheConf, boolean canUseDropBehind,
238      long readahead, boolean isPrimaryReplicaStoreFile, AtomicInteger refCount, boolean shared)
239      throws IOException {
240    FSDataInputStreamWrapper in;
241    FileStatus status;
242
243    final boolean doDropBehind = canUseDropBehind && cacheConf.shouldDropBehindCompaction();
244    if (this.link != null) {
245      // HFileLink
246      in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind, readahead);
247      status = this.link.getFileStatus(fs);
248    } else if (this.reference != null) {
249      // HFile Reference
250      Path referencePath = getReferredToFile(this.getPath());
251      in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead);
252      status = fs.getFileStatus(referencePath);
253    } else {
254      in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead);
255      status = fs.getFileStatus(initialPath);
256    }
257    long length = status.getLen();
258    hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs);
259
260    StoreFileReader reader = null;
261    if (this.coprocessorHost != null) {
262      reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length,
263        cacheConf, reference);
264    }
265    if (reader == null) {
266      if (this.reference != null) {
267        reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference,
268            isPrimaryReplicaStoreFile, refCount, shared, conf);
269      } else {
270        reader = new StoreFileReader(fs, status.getPath(), in, length, cacheConf,
271            isPrimaryReplicaStoreFile, refCount, shared, conf);
272      }
273    }
274    if (this.coprocessorHost != null) {
275      reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length,
276        cacheConf, reference, reader);
277    }
278    return reader;
279  }
280
281  /**
282   * Compute the HDFS Block Distribution for this StoreFile
283   */
284  public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs)
285      throws IOException {
286    // guard against the case where we get the FileStatus from link, but by the time we
287    // call compute the file is moved again
288    if (this.link != null) {
289      FileNotFoundException exToThrow = null;
290      for (int i = 0; i < this.link.getLocations().length; i++) {
291        try {
292          return computeHDFSBlocksDistributionInternal(fs);
293        } catch (FileNotFoundException ex) {
294          // try the other location
295          exToThrow = ex;
296        }
297      }
298      throw exToThrow;
299    } else {
300      return computeHDFSBlocksDistributionInternal(fs);
301    }
302  }
303
304  private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs)
305      throws IOException {
306    FileStatus status = getReferencedFileStatus(fs);
307    if (this.reference != null) {
308      return computeRefFileHDFSBlockDistribution(fs, reference, status);
309    } else {
310      return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen());
311    }
312  }
313
314  /**
315   * Get the {@link FileStatus} of the file referenced by this StoreFileInfo
316   * @param fs The current file system to use.
317   * @return The {@link FileStatus} of the file referenced by this StoreFileInfo
318   */
319  public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException {
320    FileStatus status;
321    if (this.reference != null) {
322      if (this.link != null) {
323        FileNotFoundException exToThrow = null;
324        for (int i = 0; i < this.link.getLocations().length; i++) {
325          // HFileLink Reference
326          try {
327            return link.getFileStatus(fs);
328          } catch (FileNotFoundException ex) {
329            // try the other location
330            exToThrow = ex;
331          }
332        }
333        throw exToThrow;
334      } else {
335        // HFile Reference
336        Path referencePath = getReferredToFile(this.getPath());
337        status = fs.getFileStatus(referencePath);
338      }
339    } else {
340      if (this.link != null) {
341        FileNotFoundException exToThrow = null;
342        for (int i = 0; i < this.link.getLocations().length; i++) {
343          // HFileLink
344          try {
345            return link.getFileStatus(fs);
346          } catch (FileNotFoundException ex) {
347            // try the other location
348            exToThrow = ex;
349          }
350        }
351        throw exToThrow;
352      } else {
353        status = fs.getFileStatus(initialPath);
354      }
355    }
356    return status;
357  }
358
359  /** @return The {@link Path} of the file */
360  public Path getPath() {
361    return initialPath;
362  }
363
364  /** @return The {@link FileStatus} of the file */
365  public FileStatus getFileStatus() throws IOException {
366    return getReferencedFileStatus(fs);
367  }
368
369  /** @return Get the modification time of the file. */
370  public long getModificationTime() throws IOException {
371    return getFileStatus().getModificationTime();
372  }
373
374  @Override
375  public String toString() {
376    return this.getPath() +
377      (isReference() ? "->" + getReferredToFile(this.getPath()) + "-" + reference : "");
378  }
379
380  /**
381   * @param path Path to check.
382   * @return True if the path has format of a HFile.
383   */
384  public static boolean isHFile(final Path path) {
385    return isHFile(path.getName());
386  }
387
388  public static boolean isHFile(final String fileName) {
389    Matcher m = HFILE_NAME_PATTERN.matcher(fileName);
390    return m.matches() && m.groupCount() > 0;
391  }
392
393  /**
394   * @param path Path to check.
395   * @return True if the path has format of a del file.
396   */
397  public static boolean isDelFile(final Path path) {
398    return isDelFile(path.getName());
399  }
400
401  /**
402   * @param fileName Sting version of path to validate.
403   * @return True if the file name has format of a del file.
404   */
405  public static boolean isDelFile(final String fileName) {
406    Matcher m = DELFILE_NAME_PATTERN.matcher(fileName);
407    return m.matches() && m.groupCount() > 0;
408  }
409
410  /**
411   * @param path Path to check.
412   * @return True if the path has format of a HStoreFile reference.
413   */
414  public static boolean isReference(final Path path) {
415    return isReference(path.getName());
416  }
417
418  /**
419   * @param name file name to check.
420   * @return True if the path has format of a HStoreFile reference.
421   */
422  public static boolean isReference(final String name) {
423    Matcher m = REF_NAME_PATTERN.matcher(name);
424    return m.matches() && m.groupCount() > 1;
425  }
426
427  /**
428   * @return timestamp when this file was created (as returned by filesystem)
429   */
430  public long getCreatedTimestamp() {
431    return createdTimestamp;
432  }
433
434  /*
435   * Return path to the file referred to by a Reference.  Presumes a directory
436   * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>.
437   * @param p Path to a Reference file.
438   * @return Calculated path to parent region file.
439   * @throws IllegalArgumentException when path regex fails to match.
440   */
441  public static Path getReferredToFile(final Path p) {
442    Matcher m = REF_NAME_PATTERN.matcher(p.getName());
443    if (m == null || !m.matches()) {
444      LOG.warn("Failed match of store file name " + p.toString());
445      throw new IllegalArgumentException("Failed match of store file name " +
446          p.toString());
447    }
448
449    // Other region name is suffix on the passed Reference file name
450    String otherRegion = m.group(2);
451    // Tabledir is up two directories from where Reference was written.
452    Path tableDir = p.getParent().getParent().getParent();
453    String nameStrippedOfSuffix = m.group(1);
454    if (LOG.isTraceEnabled()) {
455      LOG.trace("reference '" + p + "' to region=" + otherRegion
456        + " hfile=" + nameStrippedOfSuffix);
457    }
458
459    // Build up new path with the referenced region in place of our current
460    // region in the reference path.  Also strip regionname suffix from name.
461    return new Path(new Path(new Path(tableDir, otherRegion),
462      p.getParent().getName()), nameStrippedOfSuffix);
463  }
464
465  /**
466   * Validate the store file name.
467   * @param fileName name of the file to validate
468   * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise
469   */
470  public static boolean validateStoreFileName(final String fileName) {
471    if (HFileLink.isHFileLink(fileName) || isReference(fileName))
472      return(true);
473    return !fileName.contains("-");
474  }
475
476  /**
477   * Return if the specified file is a valid store file or not.
478   * @param fileStatus The {@link FileStatus} of the file
479   * @return <tt>true</tt> if the file is valid
480   */
481  public static boolean isValid(final FileStatus fileStatus)
482      throws IOException {
483    final Path p = fileStatus.getPath();
484
485    if (fileStatus.isDirectory())
486      return false;
487
488    // Check for empty hfile. Should never be the case but can happen
489    // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646
490    // NOTE: that the HFileLink is just a name, so it's an empty file.
491    if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) {
492      LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?");
493      return false;
494    }
495
496    return validateStoreFileName(p.getName());
497  }
498
499  /**
500   * helper function to compute HDFS blocks distribution of a given reference
501   * file.For reference file, we don't compute the exact value. We use some
502   * estimate instead given it might be good enough. we assume bottom part
503   * takes the first half of reference file, top part takes the second half
504   * of the reference file. This is just estimate, given
505   * midkey ofregion != midkey of HFile, also the number and size of keys vary.
506   * If this estimate isn't good enough, we can improve it later.
507   * @param fs  The FileSystem
508   * @param reference  The reference
509   * @param status  The reference FileStatus
510   * @return HDFS blocks distribution
511   */
512  private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(
513      final FileSystem fs, final Reference reference, final FileStatus status)
514      throws IOException {
515    if (status == null) {
516      return null;
517    }
518
519    long start = 0;
520    long length = 0;
521
522    if (Reference.isTopFileRegion(reference.getFileRegion())) {
523      start = status.getLen()/2;
524      length = status.getLen() - status.getLen()/2;
525    } else {
526      start = 0;
527      length = status.getLen()/2;
528    }
529    return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length);
530  }
531
532  @Override
533  public boolean equals(Object that) {
534    if (this == that) return true;
535    if (that == null) return false;
536
537    if (!(that instanceof StoreFileInfo)) return false;
538
539    StoreFileInfo o = (StoreFileInfo)that;
540    if (initialPath != null && o.initialPath == null) return false;
541    if (initialPath == null && o.initialPath != null) return false;
542    if (initialPath != o.initialPath && initialPath != null
543            && !initialPath.equals(o.initialPath)) return false;
544
545    if (reference != null && o.reference == null) return false;
546    if (reference == null && o.reference != null) return false;
547    if (reference != o.reference && reference != null
548            && !reference.equals(o.reference)) return false;
549
550    if (link != null && o.link == null) return false;
551    if (link == null && o.link != null) return false;
552    if (link != o.link && link != null && !link.equals(o.link)) return false;
553
554    return true;
555  };
556
557
558  @Override
559  public int hashCode() {
560    int hash = 17;
561    hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode());
562    hash = hash * 31 + ((initialPath ==  null) ? 0 : initialPath.hashCode());
563    hash = hash * 31 + ((link == null) ? 0 : link.hashCode());
564    return  hash;
565  }
566
567  /**
568   * Return the active file name that contains the real data.
569   * <p>
570   * For referenced hfile, we will return the name of the reference file as it will be used to
571   * construct the StoreFileReader. And for linked hfile, we will return the name of the file being
572   * linked.
573   */
574  public String getActiveFileName() {
575    if (reference != null || link == null) {
576      return initialPath.getName();
577    } else {
578      return HFileLink.getReferencedHFileName(initialPath.getName());
579    }
580  }
581}