001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.io;
020
021import java.io.IOException;
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024import org.apache.hadoop.conf.Configuration;
025import org.apache.hadoop.fs.FileSystem;
026import org.apache.hadoop.fs.Path;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.TableName;
029import org.apache.hadoop.hbase.client.RegionInfo;
030import org.apache.hadoop.hbase.client.RegionInfoBuilder;
031import org.apache.hadoop.hbase.mob.MobConstants;
032import org.apache.hadoop.hbase.regionserver.HRegion;
033import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
034import org.apache.hadoop.hbase.util.CommonFSUtils;
035import org.apache.hadoop.hbase.util.HFileArchiveUtil;
036import org.apache.hadoop.hbase.util.Pair;
037import org.apache.yetus.audience.InterfaceAudience;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041/**
042 * HFileLink describes a link to an hfile.
043 *
044 * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
045 * HFileLink allows to access the referenced hfile regardless of the location where it is.
046 *
047 * <p>Searches for hfiles in the following order and locations:
048 * <ul>
049 *  <li>/hbase/table/region/cf/hfile</li>
050 *  <li>/hbase/.archive/table/region/cf/hfile</li>
051 * </ul>
052 *
053 * The link checks first in the original path if it is not present
054 * it fallbacks to the archived path.
055 */
056@InterfaceAudience.Private
057@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_DOESNT_OVERRIDE_EQUALS",
058  justification="To be fixed but warning suppressed for now")
059public class HFileLink extends FileLink {
060  private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class);
061
062  /**
063   * A non-capture group, for HFileLink, so that this can be embedded.
064   * The HFileLink describe a link to an hfile in a different table/region
065   * and the name is in the form: table=region-hfile.
066   * <p>
067   * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid
068   * character for the table name.
069   * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
070   * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
071   * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
072   *
073   * <p>Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name
074   * and '4567' is region name and 'abcd' is filename.
075   */
076  public static final String LINK_NAME_REGEX =
077    String.format("(?:(?:%s=)?)%s=%s-%s",
078      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
079        RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
080
081  /** Define the HFile Link name parser in the form of: table=region-hfile */
082  //made package private for testing
083  static final Pattern LINK_NAME_PATTERN =
084    Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
085      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
086      RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
087
088  /**
089   * The pattern should be used for hfile and reference links
090   * that can be found in /hbase/table/region/family/
091   */
092  private static final Pattern REF_OR_HFILE_LINK_PATTERN =
093    Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$",
094      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
095        RegionInfoBuilder.ENCODED_REGION_NAME_REGEX));
096
097  private final Path archivePath;
098  private final Path originPath;
099  private final Path mobPath;
100  private final Path tempPath;
101
102  /**
103   * Dead simple hfile link constructor
104   */
105  public HFileLink(final Path originPath, final Path tempPath, final Path mobPath,
106                   final Path archivePath) {
107    this.tempPath = tempPath;
108    this.originPath = originPath;
109    this.mobPath = mobPath;
110    this.archivePath = archivePath;
111    setLocations(originPath, tempPath, mobPath, archivePath);
112  }
113
114
115  /**
116   * @param conf {@link Configuration} from which to extract specific archive locations
117   * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
118   * @throws IOException on unexpected error.
119   */
120  public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
121          throws IOException {
122    return buildFromHFileLinkPattern(CommonFSUtils.getRootDir(conf),
123            HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
124  }
125
126
127
128  /**
129   * @param rootDir Path to the root directory where hbase files are stored
130   * @param archiveDir Path to the hbase archive directory
131   * @param hFileLinkPattern The path of the HFile Link.
132   */
133  public final static HFileLink buildFromHFileLinkPattern(final Path rootDir,
134                                                          final Path archiveDir,
135                                                          final Path hFileLinkPattern) {
136    Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
137    Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
138    Path originPath = new Path(rootDir, hfilePath);
139    Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath);
140    Path archivePath = new Path(archiveDir, hfilePath);
141    return new HFileLink(originPath, tempPath, mobPath, archivePath);
142  }
143
144  /**
145   * Create an HFileLink relative path for the table/region/family/hfile location
146   * @param table Table name
147   * @param region Region Name
148   * @param family Family Name
149   * @param hfile HFile Name
150   * @return the relative Path to open the specified table/region/family/hfile link
151   */
152  public static Path createPath(final TableName table, final String region,
153                                final String family, final String hfile) {
154    if (HFileLink.isHFileLink(hfile)) {
155      return new Path(family, hfile);
156    }
157    return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
158  }
159
160  /**
161   * Create an HFileLink instance from table/region/family/hfile location
162   * @param conf {@link Configuration} from which to extract specific archive locations
163   * @param table Table name
164   * @param region Region Name
165   * @param family Family Name
166   * @param hfile HFile Name
167   * @return Link to the file with the specified table/region/family/hfile location
168   * @throws IOException on unexpected error.
169   */
170  public static HFileLink build(final Configuration conf, final TableName table,
171                                 final String region, final String family, final String hfile)
172          throws IOException {
173    return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
174  }
175
176  /**
177   * @return the origin path of the hfile.
178   */
179  public Path getOriginPath() {
180    return this.originPath;
181  }
182
183  /**
184   * @return the path of the archived hfile.
185   */
186  public Path getArchivePath() {
187    return this.archivePath;
188  }
189
190  /**
191   * @return the path of the mob hfiles.
192   */
193  public Path getMobPath() {
194    return this.mobPath;
195  }
196
197    /**
198   * @param path Path to check.
199   * @return True if the path is a HFileLink.
200   */
201  public static boolean isHFileLink(final Path path) {
202    return isHFileLink(path.getName());
203  }
204
205
206  /**
207   * @param fileName File name to check.
208   * @return True if the path is a HFileLink.
209   */
210  public static boolean isHFileLink(String fileName) {
211    Matcher m = LINK_NAME_PATTERN.matcher(fileName);
212    if (!m.matches()) {
213      return false;
214    }
215    return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
216  }
217
218  /**
219   * Convert a HFileLink path to a table relative path.
220   * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
221   *      becomes: /hbase/testtb/4567/cf/abcd
222   *
223   * @param path HFileLink path
224   * @return Relative table path
225   * @throws IOException on unexpected error.
226   */
227  private static Path getHFileLinkPatternRelativePath(final Path path) {
228    // table=region-hfile
229    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
230    if (!m.matches()) {
231      throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
232    }
233
234    // Convert the HFileLink name into a real table/region/cf/hfile path.
235    TableName tableName = TableName.valueOf(m.group(1), m.group(2));
236    String regionName = m.group(3);
237    String hfileName = m.group(4);
238    String familyName = path.getParent().getName();
239    Path tableDir = CommonFSUtils.getTableDir(new Path("./"), tableName);
240    return new Path(tableDir, new Path(regionName, new Path(familyName,
241        hfileName)));
242  }
243
244  /**
245   * Get the HFile name of the referenced link
246   *
247   * @param fileName HFileLink file name
248   * @return the name of the referenced HFile
249   */
250  public static String getReferencedHFileName(final String fileName) {
251    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
252    if (!m.matches()) {
253      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
254    }
255    return(m.group(4));
256  }
257
258  /**
259   * Get the Region name of the referenced link
260   *
261   * @param fileName HFileLink file name
262   * @return the name of the referenced Region
263   */
264  public static String getReferencedRegionName(final String fileName) {
265    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
266    if (!m.matches()) {
267      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
268    }
269    return(m.group(3));
270  }
271
272  /**
273   * Get the Table name of the referenced link
274   *
275   * @param fileName HFileLink file name
276   * @return the name of the referenced Table
277   */
278  public static TableName getReferencedTableName(final String fileName) {
279    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
280    if (!m.matches()) {
281      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
282    }
283    return(TableName.valueOf(m.group(1), m.group(2)));
284  }
285
286  /**
287   * Create a new HFileLink name
288   *
289   * @param hfileRegionInfo - Linked HFile Region Info
290   * @param hfileName - Linked HFile name
291   * @return file name of the HFile Link
292   */
293  public static String createHFileLinkName(final RegionInfo hfileRegionInfo,
294      final String hfileName) {
295    return createHFileLinkName(hfileRegionInfo.getTable(),
296            hfileRegionInfo.getEncodedName(), hfileName);
297  }
298
299  /**
300   * Create a new HFileLink name
301   *
302   * @param tableName - Linked HFile table name
303   * @param regionName - Linked HFile region name
304   * @param hfileName - Linked HFile name
305   * @return file name of the HFile Link
306   */
307  public static String createHFileLinkName(final TableName tableName,
308      final String regionName, final String hfileName) {
309    String s = String.format("%s=%s-%s",
310        tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='),
311        regionName, hfileName);
312    return s;
313  }
314
315  /**
316   * Create a new HFileLink
317   *
318   * <p>It also adds a back-reference to the hfile back-reference directory
319   * to simplify the reference-count and the cleaning process.
320   *
321   * @param conf {@link Configuration} to read for the archive directory name
322   * @param fs {@link FileSystem} on which to write the HFileLink
323   * @param dstFamilyPath - Destination path (table/region/cf/)
324   * @param hfileRegionInfo - Linked HFile Region Info
325   * @param hfileName - Linked HFile name
326   * @return true if the file is created, otherwise the file exists.
327   * @throws IOException on file or parent directory creation failure
328   */
329  public static boolean create(final Configuration conf, final FileSystem fs,
330      final Path dstFamilyPath, final RegionInfo hfileRegionInfo,
331      final String hfileName) throws IOException {
332    return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true);
333  }
334
335  /**
336   * Create a new HFileLink
337   *
338   * <p>It also adds a back-reference to the hfile back-reference directory
339   * to simplify the reference-count and the cleaning process.
340   *
341   * @param conf {@link Configuration} to read for the archive directory name
342   * @param fs {@link FileSystem} on which to write the HFileLink
343   * @param dstFamilyPath - Destination path (table/region/cf/)
344   * @param hfileRegionInfo - Linked HFile Region Info
345   * @param hfileName - Linked HFile name
346   * @param createBackRef - Whether back reference should be created. Defaults to true.
347   * @return true if the file is created, otherwise the file exists.
348   * @throws IOException on file or parent directory creation failure
349   */
350  public static boolean create(final Configuration conf, final FileSystem fs,
351      final Path dstFamilyPath, final RegionInfo hfileRegionInfo,
352      final String hfileName, final boolean createBackRef) throws IOException {
353    TableName linkedTable = hfileRegionInfo.getTable();
354    String linkedRegion = hfileRegionInfo.getEncodedName();
355    return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef);
356  }
357
358  /**
359   * Create a new HFileLink
360   *
361   * <p>It also adds a back-reference to the hfile back-reference directory
362   * to simplify the reference-count and the cleaning process.
363   *
364   * @param conf {@link Configuration} to read for the archive directory name
365   * @param fs {@link FileSystem} on which to write the HFileLink
366   * @param dstFamilyPath - Destination path (table/region/cf/)
367   * @param linkedTable - Linked Table Name
368   * @param linkedRegion - Linked Region Name
369   * @param hfileName - Linked HFile name
370   * @return true if the file is created, otherwise the file exists.
371   * @throws IOException on file or parent directory creation failure
372   */
373  public static boolean create(final Configuration conf, final FileSystem fs,
374      final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
375      final String hfileName) throws IOException {
376    return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true);
377  }
378
379  /**
380   * Create a new HFileLink
381   *
382   * <p>It also adds a back-reference to the hfile back-reference directory
383   * to simplify the reference-count and the cleaning process.
384   *
385   * @param conf {@link Configuration} to read for the archive directory name
386   * @param fs {@link FileSystem} on which to write the HFileLink
387   * @param dstFamilyPath - Destination path (table/region/cf/)
388   * @param linkedTable - Linked Table Name
389   * @param linkedRegion - Linked Region Name
390   * @param hfileName - Linked HFile name
391   * @param createBackRef - Whether back reference should be created. Defaults to true.
392   * @return true if the file is created, otherwise the file exists.
393   * @throws IOException on file or parent directory creation failure
394   */
395  public static boolean create(final Configuration conf, final FileSystem fs,
396      final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
397      final String hfileName, final boolean createBackRef) throws IOException {
398    String familyName = dstFamilyPath.getName();
399    String regionName = dstFamilyPath.getParent().getName();
400    String tableName = CommonFSUtils.getTableName(dstFamilyPath.getParent().getParent())
401        .getNameAsString();
402
403    String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
404    String refName = createBackReferenceName(tableName, regionName);
405
406    // Make sure the destination directory exists
407    fs.mkdirs(dstFamilyPath);
408
409    // Make sure the FileLink reference directory exists
410    Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
411          linkedTable, linkedRegion, familyName);
412    Path backRefPath = null;
413    if (createBackRef) {
414      Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
415      fs.mkdirs(backRefssDir);
416
417      // Create the reference for the link
418      backRefPath = new Path(backRefssDir, refName);
419      fs.createNewFile(backRefPath);
420    }
421    try {
422      // Create the link
423      return fs.createNewFile(new Path(dstFamilyPath, name));
424    } catch (IOException e) {
425      LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
426      // Revert the reference if the link creation failed
427      if (createBackRef) {
428        fs.delete(backRefPath, false);
429      }
430      throw e;
431    }
432  }
433
434  /**
435   * Create a new HFileLink starting from a hfileLink name
436   *
437   * <p>It also adds a back-reference to the hfile back-reference directory
438   * to simplify the reference-count and the cleaning process.
439   *
440   * @param conf {@link Configuration} to read for the archive directory name
441   * @param fs {@link FileSystem} on which to write the HFileLink
442   * @param dstFamilyPath - Destination path (table/region/cf/)
443   * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
444   * @return true if the file is created, otherwise the file exists.
445   * @throws IOException on file or parent directory creation failure
446   */
447  public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
448      final Path dstFamilyPath, final String hfileLinkName)
449          throws IOException {
450    return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true);
451  }
452
453  /**
454   * Create a new HFileLink starting from a hfileLink name
455   *
456   * <p>It also adds a back-reference to the hfile back-reference directory
457   * to simplify the reference-count and the cleaning process.
458   *
459   * @param conf {@link Configuration} to read for the archive directory name
460   * @param fs {@link FileSystem} on which to write the HFileLink
461   * @param dstFamilyPath - Destination path (table/region/cf/)
462   * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
463   * @param createBackRef - Whether back reference should be created. Defaults to true.
464   * @return true if the file is created, otherwise the file exists.
465   * @throws IOException on file or parent directory creation failure
466   */
467  public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
468      final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef)
469          throws IOException {
470    Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
471    if (!m.matches()) {
472      throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
473    }
474    return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)),
475        m.group(3), m.group(4), createBackRef);
476  }
477
478  /**
479   * Create the back reference name
480   */
481  //package-private for testing
482  static String createBackReferenceName(final String tableNameStr,
483                                        final String regionName) {
484
485    return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
486  }
487
488  /**
489   * Get the full path of the HFile referenced by the back reference
490   *
491   * @param rootDir root hbase directory
492   * @param linkRefPath Link Back Reference path
493   * @return full path of the referenced hfile
494   */
495  public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
496    Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
497    TableName linkTableName = p.getFirst();
498    String linkRegionName = p.getSecond();
499
500    String hfileName = getBackReferenceFileName(linkRefPath.getParent());
501    Path familyPath = linkRefPath.getParent().getParent();
502    Path regionPath = familyPath.getParent();
503    Path tablePath = regionPath.getParent();
504
505    String linkName = createHFileLinkName(CommonFSUtils.getTableName(tablePath),
506            regionPath.getName(), hfileName);
507    Path linkTableDir = CommonFSUtils.getTableDir(rootDir, linkTableName);
508    Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
509    return new Path(new Path(regionDir, familyPath.getName()), linkName);
510  }
511
512  public static Pair<TableName, String> parseBackReferenceName(String name) {
513    int separatorIndex = name.indexOf('.');
514    String linkRegionName = name.substring(0, separatorIndex);
515    String tableSubstr = name.substring(separatorIndex + 1)
516        .replace('=', TableName.NAMESPACE_DELIM);
517    TableName linkTableName = TableName.valueOf(tableSubstr);
518    return new Pair<>(linkTableName, linkRegionName);
519  }
520
521  /**
522   * Get the full path of the HFile referenced by the back reference
523   *
524   * @param conf {@link Configuration} to read for the archive directory name
525   * @param linkRefPath Link Back Reference path
526   * @return full path of the referenced hfile
527   * @throws IOException on unexpected error.
528   */
529  public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
530      throws IOException {
531    return getHFileFromBackReference(CommonFSUtils.getRootDir(conf), linkRefPath);
532  }
533
534}