001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io;
019
020import java.io.IOException;
021import java.util.regex.Matcher;
022import java.util.regex.Pattern;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.fs.FileSystem;
025import org.apache.hadoop.fs.Path;
026import org.apache.hadoop.hbase.HConstants;
027import org.apache.hadoop.hbase.TableName;
028import org.apache.hadoop.hbase.client.RegionInfo;
029import org.apache.hadoop.hbase.client.RegionInfoBuilder;
030import org.apache.hadoop.hbase.mob.MobConstants;
031import org.apache.hadoop.hbase.regionserver.HRegion;
032import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
033import org.apache.hadoop.hbase.util.CommonFSUtils;
034import org.apache.hadoop.hbase.util.HFileArchiveUtil;
035import org.apache.hadoop.hbase.util.Pair;
036import org.apache.yetus.audience.InterfaceAudience;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040/**
041 * HFileLink describes a link to an hfile. An hfile can be served from a region or from the hfile
042 * archive directory (/hbase/.archive) HFileLink allows to access the referenced hfile regardless of
043 * the location where it is.
044 * <p>
045 * Searches for hfiles in the following order and locations:
046 * <ul>
047 * <li>/hbase/table/region/cf/hfile</li>
048 * <li>/hbase/.archive/table/region/cf/hfile</li>
049 * </ul>
050 * The link checks first in the original path if it is not present it fallbacks to the archived
051 * path.
052 */
053@InterfaceAudience.Private
054@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "EQ_DOESNT_OVERRIDE_EQUALS",
055    justification = "To be fixed but warning suppressed for now")
056public class HFileLink extends FileLink {
057  private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class);
058
059  /**
060   * A non-capture group, for HFileLink, so that this can be embedded. The HFileLink describe a link
061   * to an hfile in a different table/region and the name is in the form: table=region-hfile.
062   * <p>
063   * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid
064   * character for the table name. Region name is ([a-f0-9]+), so '-' is an invalid character for
065   * the region name. HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) and
066   * the bulk loaded (_SeqId_[0-9]+_) hfiles.
067   * <p>
068   * Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name and
069   * '4567' is region name and 'abcd' is filename.
070   */
071  public static final String LINK_NAME_REGEX = String.format("(?:(?:%s=)?)%s=%s-%s",
072    TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
073    RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
074
075  /** Define the HFile Link name parser in the form of: table=region-hfile */
076  public static final Pattern LINK_NAME_PATTERN =
077    Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
078      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
079      RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
080
081  /**
082   * The pattern should be used for hfile and reference links that can be found in
083   * /hbase/table/region/family/
084   */
085  private static final Pattern REF_OR_HFILE_LINK_PATTERN =
086    Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$", TableName.VALID_NAMESPACE_REGEX,
087      TableName.VALID_TABLE_QUALIFIER_REGEX, RegionInfoBuilder.ENCODED_REGION_NAME_REGEX));
088
089  private final Path archivePath;
090  private final Path originPath;
091  private final Path mobPath;
092  private final Path tempPath;
093
094  /**
095   * Dead simple hfile link constructor
096   */
097  public HFileLink(final Path originPath, final Path tempPath, final Path mobPath,
098    final Path archivePath) {
099    this.tempPath = tempPath;
100    this.originPath = originPath;
101    this.mobPath = mobPath;
102    this.archivePath = archivePath;
103    setLocations(originPath, tempPath, mobPath, archivePath);
104  }
105
106  /**
107   * @param conf             {@link Configuration} from which to extract specific archive locations
108   * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
109   * @throws IOException on unexpected error.
110   */
111  public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
112    throws IOException {
113    return buildFromHFileLinkPattern(CommonFSUtils.getRootDir(conf),
114      HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
115  }
116
117  /**
118   * @param rootDir          Path to the root directory where hbase files are stored
119   * @param archiveDir       Path to the hbase archive directory
120   * @param hFileLinkPattern The path of the HFile Link.
121   */
122  public final static HFileLink buildFromHFileLinkPattern(final Path rootDir, final Path archiveDir,
123    final Path hFileLinkPattern) {
124    Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
125    Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
126    Path originPath = new Path(rootDir, hfilePath);
127    Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath);
128    Path archivePath = new Path(archiveDir, hfilePath);
129    return new HFileLink(originPath, tempPath, mobPath, archivePath);
130  }
131
132  /**
133   * Create an HFileLink relative path for the table/region/family/hfile location
134   * @param table  Table name
135   * @param region Region Name
136   * @param family Family Name
137   * @param hfile  HFile Name
138   * @return the relative Path to open the specified table/region/family/hfile link
139   */
140  public static Path createPath(final TableName table, final String region, final String family,
141    final String hfile) {
142    if (HFileLink.isHFileLink(hfile)) {
143      return new Path(family, hfile);
144    }
145    return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
146  }
147
148  /**
149   * Create an HFileLink instance from table/region/family/hfile location
150   * @param conf   {@link Configuration} from which to extract specific archive locations
151   * @param table  Table name
152   * @param region Region Name
153   * @param family Family Name
154   * @param hfile  HFile Name
155   * @return Link to the file with the specified table/region/family/hfile location
156   * @throws IOException on unexpected error.
157   */
158  public static HFileLink build(final Configuration conf, final TableName table,
159    final String region, final String family, final String hfile) throws IOException {
160    return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
161  }
162
163  /** Returns the origin path of the hfile. */
164  public Path getOriginPath() {
165    return this.originPath;
166  }
167
168  /** Returns the path of the archived hfile. */
169  public Path getArchivePath() {
170    return this.archivePath;
171  }
172
173  /** Returns the path of the mob hfiles. */
174  public Path getMobPath() {
175    return this.mobPath;
176  }
177
178  /**
179   * @param path Path to check.
180   * @return True if the path is a HFileLink.
181   */
182  public static boolean isHFileLink(final Path path) {
183    return isHFileLink(path.getName());
184  }
185
186  /**
187   * @param fileName File name to check.
188   * @return True if the path is a HFileLink.
189   */
190  public static boolean isHFileLink(String fileName) {
191    Matcher m = LINK_NAME_PATTERN.matcher(fileName);
192    if (!m.matches()) {
193      return false;
194    }
195    return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
196  }
197
198  /**
199   * Convert a HFileLink path to a table relative path. e.g. the link:
200   * /hbase/test/0123/cf/testtb=4567-abcd becomes: /hbase/testtb/4567/cf/abcd
201   * @param path HFileLink path
202   * @return Relative table path
203   * @throws IOException on unexpected error.
204   */
205  private static Path getHFileLinkPatternRelativePath(final Path path) {
206    // table=region-hfile
207    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
208    if (!m.matches()) {
209      throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
210    }
211
212    // Convert the HFileLink name into a real table/region/cf/hfile path.
213    TableName tableName = TableName.valueOf(m.group(1), m.group(2));
214    String regionName = m.group(3);
215    String hfileName = m.group(4);
216    String familyName = path.getParent().getName();
217    Path tableDir = CommonFSUtils.getTableDir(new Path("./"), tableName);
218    return new Path(tableDir, new Path(regionName, new Path(familyName, hfileName)));
219  }
220
221  /**
222   * Get the HFile name of the referenced link
223   * @param fileName HFileLink file name
224   * @return the name of the referenced HFile
225   */
226  public static String getReferencedHFileName(final String fileName) {
227    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
228    if (!m.matches()) {
229      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
230    }
231    return (m.group(4));
232  }
233
234  /**
235   * Get the Region name of the referenced link
236   * @param fileName HFileLink file name
237   * @return the name of the referenced Region
238   */
239  public static String getReferencedRegionName(final String fileName) {
240    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
241    if (!m.matches()) {
242      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
243    }
244    return (m.group(3));
245  }
246
247  /**
248   * Get the Table name of the referenced link
249   * @param fileName HFileLink file name
250   * @return the name of the referenced Table
251   */
252  public static TableName getReferencedTableName(final String fileName) {
253    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
254    if (!m.matches()) {
255      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
256    }
257    return (TableName.valueOf(m.group(1), m.group(2)));
258  }
259
260  /**
261   * Create a new HFileLink name
262   * @param hfileRegionInfo - Linked HFile Region Info
263   * @param hfileName       - Linked HFile name
264   * @return file name of the HFile Link
265   */
266  public static String createHFileLinkName(final RegionInfo hfileRegionInfo,
267    final String hfileName) {
268    return createHFileLinkName(hfileRegionInfo.getTable(), hfileRegionInfo.getEncodedName(),
269      hfileName);
270  }
271
272  /**
273   * Create a new HFileLink name
274   * @param tableName  - Linked HFile table name
275   * @param regionName - Linked HFile region name
276   * @param hfileName  - Linked HFile name
277   * @return file name of the HFile Link
278   */
279  public static String createHFileLinkName(final TableName tableName, final String regionName,
280    final String hfileName) {
281    String s = String.format("%s=%s-%s",
282      tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='), regionName, hfileName);
283    return s;
284  }
285
286  /**
287   * Create a new HFileLink
288   * <p>
289   * It also adds a back-reference to the hfile back-reference directory to simplify the
290   * reference-count and the cleaning process.
291   * @param conf            {@link Configuration} to read for the archive directory name
292   * @param fs              {@link FileSystem} on which to write the HFileLink
293   * @param dstFamilyPath   - Destination path (table/region/cf/)
294   * @param hfileRegionInfo - Linked HFile Region Info
295   * @param hfileName       - Linked HFile name
296   * @return the file link name.
297   * @throws IOException on file or parent directory creation failure.
298   */
299  public static String create(final Configuration conf, final FileSystem fs,
300    final Path dstFamilyPath, final RegionInfo hfileRegionInfo, final String hfileName)
301    throws IOException {
302    return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true);
303  }
304
305  /**
306   * Create a new HFileLink
307   * <p>
308   * It also adds a back-reference to the hfile back-reference directory to simplify the
309   * reference-count and the cleaning process.
310   * @param conf            {@link Configuration} to read for the archive directory name
311   * @param fs              {@link FileSystem} on which to write the HFileLink
312   * @param dstFamilyPath   - Destination path (table/region/cf/)
313   * @param hfileRegionInfo - Linked HFile Region Info
314   * @param hfileName       - Linked HFile name
315   * @param createBackRef   - Whether back reference should be created. Defaults to true.
316   * @return the file link name.
317   * @throws IOException on file or parent directory creation failure.
318   */
319  public static String create(final Configuration conf, final FileSystem fs,
320    final Path dstFamilyPath, final RegionInfo hfileRegionInfo, final String hfileName,
321    final boolean createBackRef) throws IOException {
322    TableName linkedTable = hfileRegionInfo.getTable();
323    String linkedRegion = hfileRegionInfo.getEncodedName();
324    return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef);
325  }
326
327  /**
328   * Create a new HFileLink
329   * <p>
330   * It also adds a back-reference to the hfile back-reference directory to simplify the
331   * reference-count and the cleaning process.
332   * @param conf          {@link Configuration} to read for the archive directory name
333   * @param fs            {@link FileSystem} on which to write the HFileLink
334   * @param dstFamilyPath - Destination path (table/region/cf/)
335   * @param linkedTable   - Linked Table Name
336   * @param linkedRegion  - Linked Region Name
337   * @param hfileName     - Linked HFile name
338   * @return the file link name.
339   * @throws IOException on file or parent directory creation failure.
340   */
341  public static String create(final Configuration conf, final FileSystem fs,
342    final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
343    final String hfileName) throws IOException {
344    return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true);
345  }
346
347  /**
348   * Create a new HFileLink. In the event of link creation failure, this method throws an
349   * IOException, so that the calling upper laying can decide on how to proceed with this.
350   * <p>
351   * It also adds a back-reference to the hfile back-reference directory to simplify the
352   * reference-count and the cleaning process.
353   * @param conf          {@link Configuration} to read for the archive directory name
354   * @param fs            {@link FileSystem} on which to write the HFileLink
355   * @param dstFamilyPath - Destination path (table/region/cf/)
356   * @param linkedTable   - Linked Table Name
357   * @param linkedRegion  - Linked Region Name
358   * @param hfileName     - Linked HFile name
359   * @param createBackRef - Whether back reference should be created. Defaults to true.
360   * @return the file link name.
361   * @throws IOException on file or parent directory creation failure.
362   */
363  public static String create(final Configuration conf, final FileSystem fs,
364    final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
365    final String hfileName, final boolean createBackRef) throws IOException {
366    String familyName = dstFamilyPath.getName();
367    String regionName = dstFamilyPath.getParent().getName();
368    String tableName =
369      CommonFSUtils.getTableName(dstFamilyPath.getParent().getParent()).getNameAsString();
370
371    return create(conf, fs, dstFamilyPath, familyName, tableName, regionName, linkedTable,
372      linkedRegion, hfileName, createBackRef);
373  }
374
375  /**
376   * Create a new HFileLink
377   * <p>
378   * It also adds a back-reference to the hfile back-reference directory to simplify the
379   * reference-count and the cleaning process.
380   * @param conf          {@link Configuration} to read for the archive directory name
381   * @param fs            {@link FileSystem} on which to write the HFileLink
382   * @param dstFamilyPath - Destination path (table/region/cf/)
383   * @param dstTableName  - Destination table name
384   * @param dstRegionName - Destination region name
385   * @param linkedTable   - Linked Table Name
386   * @param linkedRegion  - Linked Region Name
387   * @param hfileName     - Linked HFile name
388   * @param createBackRef - Whether back reference should be created. Defaults to true.
389   * @return the file link name.
390   * @throws IOException on file or parent directory creation failure
391   */
392  public static String create(final Configuration conf, final FileSystem fs,
393    final Path dstFamilyPath, final String familyName, final String dstTableName,
394    final String dstRegionName, final TableName linkedTable, final String linkedRegion,
395    final String hfileName, final boolean createBackRef) throws IOException {
396    String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
397    String refName = createBackReferenceName(dstTableName, dstRegionName);
398
399    // Make sure the destination directory exists
400    fs.mkdirs(dstFamilyPath);
401
402    // Make sure the FileLink reference directory exists
403    Path archiveStoreDir =
404      HFileArchiveUtil.getStoreArchivePath(conf, linkedTable, linkedRegion, familyName);
405    Path backRefPath = null;
406    if (createBackRef) {
407      Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
408      fs.mkdirs(backRefssDir);
409
410      // Create the reference for the link
411      backRefPath = new Path(backRefssDir, refName);
412      fs.createNewFile(backRefPath);
413    }
414    try {
415      // Create the link
416      if (fs.createNewFile(new Path(dstFamilyPath, name))) {
417        return name;
418      }
419    } catch (IOException e) {
420      LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
421      // Revert the reference if the link creation failed
422      if (createBackRef) {
423        fs.delete(backRefPath, false);
424      }
425      throw e;
426    }
427    throw new IOException(
428      "File link=" + name + " already exists under " + dstFamilyPath + " folder.");
429  }
430
431  /**
432   * Create a new HFileLink starting from a hfileLink name
433   * <p>
434   * It also adds a back-reference to the hfile back-reference directory to simplify the
435   * reference-count and the cleaning process.
436   * @param conf          {@link Configuration} to read for the archive directory name
437   * @param fs            {@link FileSystem} on which to write the HFileLink
438   * @param dstFamilyPath - Destination path (table/region/cf/)
439   * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
440   * @param createBackRef - Whether back reference should be created. Defaults to true.
441   * @return the file link name.
442   * @throws IOException on file or parent directory creation failure.
443   */
444  public static String createFromHFileLink(final Configuration conf, final FileSystem fs,
445    final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef)
446    throws IOException {
447    Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
448    if (!m.matches()) {
449      throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
450    }
451    return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)), m.group(3),
452      m.group(4), createBackRef);
453  }
454
455  /**
456   * Create the back reference name
457   */
458  // package-private for testing
459  static String createBackReferenceName(final String tableNameStr, final String regionName) {
460
461    return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
462  }
463
464  /**
465   * Get the full path of the HFile referenced by the back reference
466   * @param rootDir     root hbase directory
467   * @param linkRefPath Link Back Reference path
468   * @return full path of the referenced hfile
469   */
470  public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
471    Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
472    TableName linkTableName = p.getFirst();
473    String linkRegionName = p.getSecond();
474
475    String hfileName = getBackReferenceFileName(linkRefPath.getParent());
476    Path familyPath = linkRefPath.getParent().getParent();
477    Path regionPath = familyPath.getParent();
478    Path tablePath = regionPath.getParent();
479
480    String linkName =
481      createHFileLinkName(CommonFSUtils.getTableName(tablePath), regionPath.getName(), hfileName);
482    Path linkTableDir = CommonFSUtils.getTableDir(rootDir, linkTableName);
483    Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
484    return new Path(new Path(regionDir, familyPath.getName()), linkName);
485  }
486
487  public static Pair<TableName, String> parseBackReferenceName(String name) {
488    int separatorIndex = name.indexOf('.');
489    String linkRegionName = name.substring(0, separatorIndex);
490    String tableSubstr = name.substring(separatorIndex + 1).replace('=', TableName.NAMESPACE_DELIM);
491    TableName linkTableName = TableName.valueOf(tableSubstr);
492    return new Pair<>(linkTableName, linkRegionName);
493  }
494
495  /**
496   * Get the full path of the HFile referenced by the back reference
497   * @param conf        {@link Configuration} to read for the archive directory name
498   * @param linkRefPath Link Back Reference path
499   * @return full path of the referenced hfile
500   * @throws IOException on unexpected error.
501   */
502  public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
503    throws IOException {
504    return getHFileFromBackReference(CommonFSUtils.getRootDir(conf), linkRefPath);
505  }
506
507}