001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.io;
020
021import java.io.IOException;
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileSystem;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.HConstants;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.RegionInfo;
031import org.apache.hadoop.hbase.client.RegionInfoBuilder;
032import org.apache.hadoop.hbase.mob.MobConstants;
033import org.apache.hadoop.hbase.regionserver.HRegion;
034import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
035import org.apache.hadoop.hbase.util.FSUtils;
036import org.apache.hadoop.hbase.util.HFileArchiveUtil;
037import org.apache.hadoop.hbase.util.Pair;
038import org.apache.yetus.audience.InterfaceAudience;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042/**
043 * HFileLink describes a link to an hfile.
044 *
045 * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
046 * HFileLink allows to access the referenced hfile regardless of the location where it is.
047 *
048 * <p>Searches for hfiles in the following order and locations:
049 * <ul>
050 *  <li>/hbase/table/region/cf/hfile</li>
051 *  <li>/hbase/.archive/table/region/cf/hfile</li>
052 * </ul>
053 *
054 * The link checks first in the original path if it is not present
055 * it fallbacks to the archived path.
056 */
057@InterfaceAudience.Private
058@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_DOESNT_OVERRIDE_EQUALS",
059  justification="To be fixed but warning suppressed for now")
060public class HFileLink extends FileLink {
061  private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class);
062
063  /**
064   * A non-capture group, for HFileLink, so that this can be embedded.
065   * The HFileLink describe a link to an hfile in a different table/region
066   * and the name is in the form: table=region-hfile.
067   * <p>
068   * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid
069   * character for the table name.
070   * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
071   * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
072   * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
073   */
074  public static final String LINK_NAME_REGEX =
075    String.format("(?:(?:%s=)?)%s=%s-%s",
076      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
077        RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
078
079  /** Define the HFile Link name parser in the form of: table=region-hfile */
080  //made package private for testing
081  static final Pattern LINK_NAME_PATTERN =
082    Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
083      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
084      RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
085
086  /**
087   * The pattern should be used for hfile and reference links
088   * that can be found in /hbase/table/region/family/
089   */
090  private static final Pattern REF_OR_HFILE_LINK_PATTERN =
091    Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$",
092      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
093        RegionInfoBuilder.ENCODED_REGION_NAME_REGEX));
094
095  private final Path archivePath;
096  private final Path originPath;
097  private final Path mobPath;
098  private final Path tempPath;
099
100  /**
101   * Dead simple hfile link constructor
102   */
103  public HFileLink(final Path originPath, final Path tempPath, final Path mobPath,
104                   final Path archivePath) {
105    this.tempPath = tempPath;
106    this.originPath = originPath;
107    this.mobPath = mobPath;
108    this.archivePath = archivePath;
109    setLocations(originPath, tempPath, mobPath, archivePath);
110  }
111
112
113  /**
114   * @param conf {@link Configuration} from which to extract specific archive locations
115   * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
116   * @throws IOException on unexpected error.
117   */
118  public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
119          throws IOException {
120    return buildFromHFileLinkPattern(FSUtils.getRootDir(conf),
121            HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
122  }
123
124
125
126  /**
127   * @param rootDir Path to the root directory where hbase files are stored
128   * @param archiveDir Path to the hbase archive directory
129   * @param hFileLinkPattern The path of the HFile Link.
130   */
131  public final static HFileLink buildFromHFileLinkPattern(final Path rootDir,
132                                                          final Path archiveDir,
133                                                          final Path hFileLinkPattern) {
134    Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
135    Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
136    Path originPath = new Path(rootDir, hfilePath);
137    Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath);
138    Path archivePath = new Path(archiveDir, hfilePath);
139    return new HFileLink(originPath, tempPath, mobPath, archivePath);
140  }
141
142  /**
143   * Create an HFileLink relative path for the table/region/family/hfile location
144   * @param table Table name
145   * @param region Region Name
146   * @param family Family Name
147   * @param hfile HFile Name
148   * @return the relative Path to open the specified table/region/family/hfile link
149   */
150  public static Path createPath(final TableName table, final String region,
151                                final String family, final String hfile) {
152    if (HFileLink.isHFileLink(hfile)) {
153      return new Path(family, hfile);
154    }
155    return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
156  }
157
158  /**
159   * Create an HFileLink instance from table/region/family/hfile location
160   * @param conf {@link Configuration} from which to extract specific archive locations
161   * @param table Table name
162   * @param region Region Name
163   * @param family Family Name
164   * @param hfile HFile Name
165   * @return Link to the file with the specified table/region/family/hfile location
166   * @throws IOException on unexpected error.
167   */
168  public static HFileLink build(final Configuration conf, final TableName table,
169                                 final String region, final String family, final String hfile)
170          throws IOException {
171    return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
172  }
173
174  /**
175   * @return the origin path of the hfile.
176   */
177  public Path getOriginPath() {
178    return this.originPath;
179  }
180
181  /**
182   * @return the path of the archived hfile.
183   */
184  public Path getArchivePath() {
185    return this.archivePath;
186  }
187
188  /**
189   * @return the path of the mob hfiles.
190   */
191  public Path getMobPath() {
192    return this.mobPath;
193  }
194
195    /**
196   * @param path Path to check.
197   * @return True if the path is a HFileLink.
198   */
199  public static boolean isHFileLink(final Path path) {
200    return isHFileLink(path.getName());
201  }
202
203
204  /**
205   * @param fileName File name to check.
206   * @return True if the path is a HFileLink.
207   */
208  public static boolean isHFileLink(String fileName) {
209    Matcher m = LINK_NAME_PATTERN.matcher(fileName);
210    if (!m.matches()) return false;
211    return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
212  }
213
214  /**
215   * Convert a HFileLink path to a table relative path.
216   * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
217   *      becomes: /hbase/testtb/4567/cf/abcd
218   *
219   * @param path HFileLink path
220   * @return Relative table path
221   * @throws IOException on unexpected error.
222   */
223  private static Path getHFileLinkPatternRelativePath(final Path path) {
224    // table=region-hfile
225    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
226    if (!m.matches()) {
227      throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
228    }
229
230    // Convert the HFileLink name into a real table/region/cf/hfile path.
231    TableName tableName = TableName.valueOf(m.group(1), m.group(2));
232    String regionName = m.group(3);
233    String hfileName = m.group(4);
234    String familyName = path.getParent().getName();
235    Path tableDir = FSUtils.getTableDir(new Path("./"), tableName);
236    return new Path(tableDir, new Path(regionName, new Path(familyName,
237        hfileName)));
238  }
239
240  /**
241   * Get the HFile name of the referenced link
242   *
243   * @param fileName HFileLink file name
244   * @return the name of the referenced HFile
245   */
246  public static String getReferencedHFileName(final String fileName) {
247    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
248    if (!m.matches()) {
249      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
250    }
251    return(m.group(4));
252  }
253
254  /**
255   * Get the Region name of the referenced link
256   *
257   * @param fileName HFileLink file name
258   * @return the name of the referenced Region
259   */
260  public static String getReferencedRegionName(final String fileName) {
261    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
262    if (!m.matches()) {
263      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
264    }
265    return(m.group(3));
266  }
267
268  /**
269   * Get the Table name of the referenced link
270   *
271   * @param fileName HFileLink file name
272   * @return the name of the referenced Table
273   */
274  public static TableName getReferencedTableName(final String fileName) {
275    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
276    if (!m.matches()) {
277      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
278    }
279    return(TableName.valueOf(m.group(1), m.group(2)));
280  }
281
282  /**
283   * Create a new HFileLink name
284   *
285   * @param hfileRegionInfo - Linked HFile Region Info
286   * @param hfileName - Linked HFile name
287   * @return file name of the HFile Link
288   */
289  public static String createHFileLinkName(final RegionInfo hfileRegionInfo,
290      final String hfileName) {
291    return createHFileLinkName(hfileRegionInfo.getTable(),
292            hfileRegionInfo.getEncodedName(), hfileName);
293  }
294
295  /**
296   * Create a new HFileLink name
297   *
298   * @param tableName - Linked HFile table name
299   * @param regionName - Linked HFile region name
300   * @param hfileName - Linked HFile name
301   * @return file name of the HFile Link
302   */
303  public static String createHFileLinkName(final TableName tableName,
304      final String regionName, final String hfileName) {
305    String s = String.format("%s=%s-%s",
306        tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='),
307        regionName, hfileName);
308    return s;
309  }
310
311  /**
312   * Create a new HFileLink
313   *
314   * <p>It also adds a back-reference to the hfile back-reference directory
315   * to simplify the reference-count and the cleaning process.
316   *
317   * @param conf {@link Configuration} to read for the archive directory name
318   * @param fs {@link FileSystem} on which to write the HFileLink
319   * @param dstFamilyPath - Destination path (table/region/cf/)
320   * @param hfileRegionInfo - Linked HFile Region Info
321   * @param hfileName - Linked HFile name
322   * @return true if the file is created, otherwise the file exists.
323   * @throws IOException on file or parent directory creation failure
324   */
325  public static boolean create(final Configuration conf, final FileSystem fs,
326      final Path dstFamilyPath, final RegionInfo hfileRegionInfo,
327      final String hfileName) throws IOException {
328    return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true);
329  }
330
331  /**
332   * Create a new HFileLink
333   *
334   * <p>It also adds a back-reference to the hfile back-reference directory
335   * to simplify the reference-count and the cleaning process.
336   *
337   * @param conf {@link Configuration} to read for the archive directory name
338   * @param fs {@link FileSystem} on which to write the HFileLink
339   * @param dstFamilyPath - Destination path (table/region/cf/)
340   * @param hfileRegionInfo - Linked HFile Region Info
341   * @param hfileName - Linked HFile name
342   * @param createBackRef - Whether back reference should be created. Defaults to true.
343   * @return true if the file is created, otherwise the file exists.
344   * @throws IOException on file or parent directory creation failure
345   */
346  public static boolean create(final Configuration conf, final FileSystem fs,
347      final Path dstFamilyPath, final RegionInfo hfileRegionInfo,
348      final String hfileName, final boolean createBackRef) throws IOException {
349    TableName linkedTable = hfileRegionInfo.getTable();
350    String linkedRegion = hfileRegionInfo.getEncodedName();
351    return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef);
352  }
353
354  /**
355   * Create a new HFileLink
356   *
357   * <p>It also adds a back-reference to the hfile back-reference directory
358   * to simplify the reference-count and the cleaning process.
359   *
360   * @param conf {@link Configuration} to read for the archive directory name
361   * @param fs {@link FileSystem} on which to write the HFileLink
362   * @param dstFamilyPath - Destination path (table/region/cf/)
363   * @param linkedTable - Linked Table Name
364   * @param linkedRegion - Linked Region Name
365   * @param hfileName - Linked HFile name
366   * @return true if the file is created, otherwise the file exists.
367   * @throws IOException on file or parent directory creation failure
368   */
369  public static boolean create(final Configuration conf, final FileSystem fs,
370      final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
371      final String hfileName) throws IOException {
372    return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true);
373  }
374
375  /**
376   * Create a new HFileLink
377   *
378   * <p>It also adds a back-reference to the hfile back-reference directory
379   * to simplify the reference-count and the cleaning process.
380   *
381   * @param conf {@link Configuration} to read for the archive directory name
382   * @param fs {@link FileSystem} on which to write the HFileLink
383   * @param dstFamilyPath - Destination path (table/region/cf/)
384   * @param linkedTable - Linked Table Name
385   * @param linkedRegion - Linked Region Name
386   * @param hfileName - Linked HFile name
387   * @param createBackRef - Whether back reference should be created. Defaults to true.
388   * @return true if the file is created, otherwise the file exists.
389   * @throws IOException on file or parent directory creation failure
390   */
391  public static boolean create(final Configuration conf, final FileSystem fs,
392      final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
393      final String hfileName, final boolean createBackRef) throws IOException {
394    String familyName = dstFamilyPath.getName();
395    String regionName = dstFamilyPath.getParent().getName();
396    String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent())
397        .getNameAsString();
398
399    String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
400    String refName = createBackReferenceName(tableName, regionName);
401
402    // Make sure the destination directory exists
403    fs.mkdirs(dstFamilyPath);
404
405    // Make sure the FileLink reference directory exists
406    Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
407          linkedTable, linkedRegion, familyName);
408    Path backRefPath = null;
409    if (createBackRef) {
410      Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
411      fs.mkdirs(backRefssDir);
412
413      // Create the reference for the link
414      backRefPath = new Path(backRefssDir, refName);
415      fs.createNewFile(backRefPath);
416    }
417    try {
418      // Create the link
419      return fs.createNewFile(new Path(dstFamilyPath, name));
420    } catch (IOException e) {
421      LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
422      // Revert the reference if the link creation failed
423      if (createBackRef) {
424        fs.delete(backRefPath, false);
425      }
426      throw e;
427    }
428  }
429
430  /**
431   * Create a new HFileLink starting from a hfileLink name
432   *
433   * <p>It also adds a back-reference to the hfile back-reference directory
434   * to simplify the reference-count and the cleaning process.
435   *
436   * @param conf {@link Configuration} to read for the archive directory name
437   * @param fs {@link FileSystem} on which to write the HFileLink
438   * @param dstFamilyPath - Destination path (table/region/cf/)
439   * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
440   * @return true if the file is created, otherwise the file exists.
441   * @throws IOException on file or parent directory creation failure
442   */
443  public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
444      final Path dstFamilyPath, final String hfileLinkName)
445          throws IOException {
446    return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true);
447  }
448
449  /**
450   * Create a new HFileLink starting from a hfileLink name
451   *
452   * <p>It also adds a back-reference to the hfile back-reference directory
453   * to simplify the reference-count and the cleaning process.
454   *
455   * @param conf {@link Configuration} to read for the archive directory name
456   * @param fs {@link FileSystem} on which to write the HFileLink
457   * @param dstFamilyPath - Destination path (table/region/cf/)
458   * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
459   * @param createBackRef - Whether back reference should be created. Defaults to true.
460   * @return true if the file is created, otherwise the file exists.
461   * @throws IOException on file or parent directory creation failure
462   */
463  public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
464      final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef)
465          throws IOException {
466    Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
467    if (!m.matches()) {
468      throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
469    }
470    return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)),
471        m.group(3), m.group(4), createBackRef);
472  }
473
474  /**
475   * Create the back reference name
476   */
477  //package-private for testing
478  static String createBackReferenceName(final String tableNameStr,
479                                        final String regionName) {
480
481    return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
482  }
483
484  /**
485   * Get the full path of the HFile referenced by the back reference
486   *
487   * @param rootDir root hbase directory
488   * @param linkRefPath Link Back Reference path
489   * @return full path of the referenced hfile
490   */
491  public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
492    Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
493    TableName linkTableName = p.getFirst();
494    String linkRegionName = p.getSecond();
495
496    String hfileName = getBackReferenceFileName(linkRefPath.getParent());
497    Path familyPath = linkRefPath.getParent().getParent();
498    Path regionPath = familyPath.getParent();
499    Path tablePath = regionPath.getParent();
500
501    String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
502            regionPath.getName(), hfileName);
503    Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
504    Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
505    return new Path(new Path(regionDir, familyPath.getName()), linkName);
506  }
507
508  static Pair<TableName, String> parseBackReferenceName(String name) {
509    int separatorIndex = name.indexOf('.');
510    String linkRegionName = name.substring(0, separatorIndex);
511    String tableSubstr = name.substring(separatorIndex + 1)
512        .replace('=', TableName.NAMESPACE_DELIM);
513    TableName linkTableName = TableName.valueOf(tableSubstr);
514    return new Pair<>(linkTableName, linkRegionName);
515  }
516
517  /**
518   * Get the full path of the HFile referenced by the back reference
519   *
520   * @param conf {@link Configuration} to read for the archive directory name
521   * @param linkRefPath Link Back Reference path
522   * @return full path of the referenced hfile
523   * @throws IOException on unexpected error.
524   */
525  public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
526      throws IOException {
527    return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath);
528  }
529
530}