001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.io;
020
021import java.io.IOException;
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileSystem;
027import org.apache.hadoop.fs.Path;
028import org.apache.hadoop.hbase.HConstants;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.RegionInfo;
031import org.apache.hadoop.hbase.client.RegionInfoBuilder;
032import org.apache.hadoop.hbase.mob.MobConstants;
033import org.apache.hadoop.hbase.regionserver.HRegion;
034import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
035import org.apache.hadoop.hbase.util.FSUtils;
036import org.apache.hadoop.hbase.util.HFileArchiveUtil;
037import org.apache.hadoop.hbase.util.Pair;
038import org.apache.yetus.audience.InterfaceAudience;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042/**
043 * HFileLink describes a link to an hfile.
044 *
045 * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
046 * HFileLink allows to access the referenced hfile regardless of the location where it is.
047 *
048 * <p>Searches for hfiles in the following order and locations:
049 * <ul>
050 *  <li>/hbase/table/region/cf/hfile</li>
051 *  <li>/hbase/.archive/table/region/cf/hfile</li>
052 * </ul>
053 *
054 * The link checks first in the original path if it is not present
055 * it fallbacks to the archived path.
056 */
057@InterfaceAudience.Private
058@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_DOESNT_OVERRIDE_EQUALS",
059  justification="To be fixed but warning suppressed for now")
060public class HFileLink extends FileLink {
061  private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class);
062
063  /**
064   * A non-capture group, for HFileLink, so that this can be embedded.
065   * The HFileLink describe a link to an hfile in a different table/region
066   * and the name is in the form: table=region-hfile.
067   * <p>
068   * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid
069   * character for the table name.
070   * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
071   * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
072   * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
073   *
074   * <p>Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name
075   * and '4567' is region name and 'abcd' is filename.
076   */
077  public static final String LINK_NAME_REGEX =
078    String.format("(?:(?:%s=)?)%s=%s-%s",
079      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
080        RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
081
082  /** Define the HFile Link name parser in the form of: table=region-hfile */
083  //made package private for testing
084  static final Pattern LINK_NAME_PATTERN =
085    Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
086      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
087      RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
088
089  /**
090   * The pattern should be used for hfile and reference links
091   * that can be found in /hbase/table/region/family/
092   */
093  private static final Pattern REF_OR_HFILE_LINK_PATTERN =
094    Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$",
095      TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
096        RegionInfoBuilder.ENCODED_REGION_NAME_REGEX));
097
098  private final Path archivePath;
099  private final Path originPath;
100  private final Path mobPath;
101  private final Path tempPath;
102
103  /**
104   * Dead simple hfile link constructor
105   */
106  public HFileLink(final Path originPath, final Path tempPath, final Path mobPath,
107                   final Path archivePath) {
108    this.tempPath = tempPath;
109    this.originPath = originPath;
110    this.mobPath = mobPath;
111    this.archivePath = archivePath;
112    setLocations(originPath, tempPath, mobPath, archivePath);
113  }
114
115
116  /**
117   * @param conf {@link Configuration} from which to extract specific archive locations
118   * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
119   * @throws IOException on unexpected error.
120   */
121  public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
122          throws IOException {
123    return buildFromHFileLinkPattern(FSUtils.getRootDir(conf),
124            HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
125  }
126
127
128
129  /**
130   * @param rootDir Path to the root directory where hbase files are stored
131   * @param archiveDir Path to the hbase archive directory
132   * @param hFileLinkPattern The path of the HFile Link.
133   */
134  public final static HFileLink buildFromHFileLinkPattern(final Path rootDir,
135                                                          final Path archiveDir,
136                                                          final Path hFileLinkPattern) {
137    Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
138    Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
139    Path originPath = new Path(rootDir, hfilePath);
140    Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath);
141    Path archivePath = new Path(archiveDir, hfilePath);
142    return new HFileLink(originPath, tempPath, mobPath, archivePath);
143  }
144
145  /**
146   * Create an HFileLink relative path for the table/region/family/hfile location
147   * @param table Table name
148   * @param region Region Name
149   * @param family Family Name
150   * @param hfile HFile Name
151   * @return the relative Path to open the specified table/region/family/hfile link
152   */
153  public static Path createPath(final TableName table, final String region,
154                                final String family, final String hfile) {
155    if (HFileLink.isHFileLink(hfile)) {
156      return new Path(family, hfile);
157    }
158    return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
159  }
160
161  /**
162   * Create an HFileLink instance from table/region/family/hfile location
163   * @param conf {@link Configuration} from which to extract specific archive locations
164   * @param table Table name
165   * @param region Region Name
166   * @param family Family Name
167   * @param hfile HFile Name
168   * @return Link to the file with the specified table/region/family/hfile location
169   * @throws IOException on unexpected error.
170   */
171  public static HFileLink build(final Configuration conf, final TableName table,
172                                 final String region, final String family, final String hfile)
173          throws IOException {
174    return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
175  }
176
177  /**
178   * @return the origin path of the hfile.
179   */
180  public Path getOriginPath() {
181    return this.originPath;
182  }
183
184  /**
185   * @return the path of the archived hfile.
186   */
187  public Path getArchivePath() {
188    return this.archivePath;
189  }
190
191  /**
192   * @return the path of the mob hfiles.
193   */
194  public Path getMobPath() {
195    return this.mobPath;
196  }
197
198    /**
199   * @param path Path to check.
200   * @return True if the path is a HFileLink.
201   */
202  public static boolean isHFileLink(final Path path) {
203    return isHFileLink(path.getName());
204  }
205
206
207  /**
208   * @param fileName File name to check.
209   * @return True if the path is a HFileLink.
210   */
211  public static boolean isHFileLink(String fileName) {
212    Matcher m = LINK_NAME_PATTERN.matcher(fileName);
213    if (!m.matches()) return false;
214    return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
215  }
216
217  /**
218   * Convert a HFileLink path to a table relative path.
219   * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
220   *      becomes: /hbase/testtb/4567/cf/abcd
221   *
222   * @param path HFileLink path
223   * @return Relative table path
224   * @throws IOException on unexpected error.
225   */
226  private static Path getHFileLinkPatternRelativePath(final Path path) {
227    // table=region-hfile
228    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
229    if (!m.matches()) {
230      throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
231    }
232
233    // Convert the HFileLink name into a real table/region/cf/hfile path.
234    TableName tableName = TableName.valueOf(m.group(1), m.group(2));
235    String regionName = m.group(3);
236    String hfileName = m.group(4);
237    String familyName = path.getParent().getName();
238    Path tableDir = FSUtils.getTableDir(new Path("./"), tableName);
239    return new Path(tableDir, new Path(regionName, new Path(familyName,
240        hfileName)));
241  }
242
243  /**
244   * Get the HFile name of the referenced link
245   *
246   * @param fileName HFileLink file name
247   * @return the name of the referenced HFile
248   */
249  public static String getReferencedHFileName(final String fileName) {
250    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
251    if (!m.matches()) {
252      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
253    }
254    return(m.group(4));
255  }
256
257  /**
258   * Get the Region name of the referenced link
259   *
260   * @param fileName HFileLink file name
261   * @return the name of the referenced Region
262   */
263  public static String getReferencedRegionName(final String fileName) {
264    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
265    if (!m.matches()) {
266      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
267    }
268    return(m.group(3));
269  }
270
271  /**
272   * Get the Table name of the referenced link
273   *
274   * @param fileName HFileLink file name
275   * @return the name of the referenced Table
276   */
277  public static TableName getReferencedTableName(final String fileName) {
278    Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
279    if (!m.matches()) {
280      throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
281    }
282    return(TableName.valueOf(m.group(1), m.group(2)));
283  }
284
285  /**
286   * Create a new HFileLink name
287   *
288   * @param hfileRegionInfo - Linked HFile Region Info
289   * @param hfileName - Linked HFile name
290   * @return file name of the HFile Link
291   */
292  public static String createHFileLinkName(final RegionInfo hfileRegionInfo,
293      final String hfileName) {
294    return createHFileLinkName(hfileRegionInfo.getTable(),
295            hfileRegionInfo.getEncodedName(), hfileName);
296  }
297
298  /**
299   * Create a new HFileLink name
300   *
301   * @param tableName - Linked HFile table name
302   * @param regionName - Linked HFile region name
303   * @param hfileName - Linked HFile name
304   * @return file name of the HFile Link
305   */
306  public static String createHFileLinkName(final TableName tableName,
307      final String regionName, final String hfileName) {
308    String s = String.format("%s=%s-%s",
309        tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='),
310        regionName, hfileName);
311    return s;
312  }
313
314  /**
315   * Create a new HFileLink
316   *
317   * <p>It also adds a back-reference to the hfile back-reference directory
318   * to simplify the reference-count and the cleaning process.
319   *
320   * @param conf {@link Configuration} to read for the archive directory name
321   * @param fs {@link FileSystem} on which to write the HFileLink
322   * @param dstFamilyPath - Destination path (table/region/cf/)
323   * @param hfileRegionInfo - Linked HFile Region Info
324   * @param hfileName - Linked HFile name
325   * @return true if the file is created, otherwise the file exists.
326   * @throws IOException on file or parent directory creation failure
327   */
328  public static boolean create(final Configuration conf, final FileSystem fs,
329      final Path dstFamilyPath, final RegionInfo hfileRegionInfo,
330      final String hfileName) throws IOException {
331    return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true);
332  }
333
334  /**
335   * Create a new HFileLink
336   *
337   * <p>It also adds a back-reference to the hfile back-reference directory
338   * to simplify the reference-count and the cleaning process.
339   *
340   * @param conf {@link Configuration} to read for the archive directory name
341   * @param fs {@link FileSystem} on which to write the HFileLink
342   * @param dstFamilyPath - Destination path (table/region/cf/)
343   * @param hfileRegionInfo - Linked HFile Region Info
344   * @param hfileName - Linked HFile name
345   * @param createBackRef - Whether back reference should be created. Defaults to true.
346   * @return true if the file is created, otherwise the file exists.
347   * @throws IOException on file or parent directory creation failure
348   */
349  public static boolean create(final Configuration conf, final FileSystem fs,
350      final Path dstFamilyPath, final RegionInfo hfileRegionInfo,
351      final String hfileName, final boolean createBackRef) throws IOException {
352    TableName linkedTable = hfileRegionInfo.getTable();
353    String linkedRegion = hfileRegionInfo.getEncodedName();
354    return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef);
355  }
356
357  /**
358   * Create a new HFileLink
359   *
360   * <p>It also adds a back-reference to the hfile back-reference directory
361   * to simplify the reference-count and the cleaning process.
362   *
363   * @param conf {@link Configuration} to read for the archive directory name
364   * @param fs {@link FileSystem} on which to write the HFileLink
365   * @param dstFamilyPath - Destination path (table/region/cf/)
366   * @param linkedTable - Linked Table Name
367   * @param linkedRegion - Linked Region Name
368   * @param hfileName - Linked HFile name
369   * @return true if the file is created, otherwise the file exists.
370   * @throws IOException on file or parent directory creation failure
371   */
372  public static boolean create(final Configuration conf, final FileSystem fs,
373      final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
374      final String hfileName) throws IOException {
375    return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true);
376  }
377
378  /**
379   * Create a new HFileLink
380   *
381   * <p>It also adds a back-reference to the hfile back-reference directory
382   * to simplify the reference-count and the cleaning process.
383   *
384   * @param conf {@link Configuration} to read for the archive directory name
385   * @param fs {@link FileSystem} on which to write the HFileLink
386   * @param dstFamilyPath - Destination path (table/region/cf/)
387   * @param linkedTable - Linked Table Name
388   * @param linkedRegion - Linked Region Name
389   * @param hfileName - Linked HFile name
390   * @param createBackRef - Whether back reference should be created. Defaults to true.
391   * @return true if the file is created, otherwise the file exists.
392   * @throws IOException on file or parent directory creation failure
393   */
394  public static boolean create(final Configuration conf, final FileSystem fs,
395      final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
396      final String hfileName, final boolean createBackRef) throws IOException {
397    String familyName = dstFamilyPath.getName();
398    String regionName = dstFamilyPath.getParent().getName();
399    String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent())
400        .getNameAsString();
401
402    String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
403    String refName = createBackReferenceName(tableName, regionName);
404
405    // Make sure the destination directory exists
406    fs.mkdirs(dstFamilyPath);
407
408    // Make sure the FileLink reference directory exists
409    Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
410          linkedTable, linkedRegion, familyName);
411    Path backRefPath = null;
412    if (createBackRef) {
413      Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
414      fs.mkdirs(backRefssDir);
415
416      // Create the reference for the link
417      backRefPath = new Path(backRefssDir, refName);
418      fs.createNewFile(backRefPath);
419    }
420    try {
421      // Create the link
422      return fs.createNewFile(new Path(dstFamilyPath, name));
423    } catch (IOException e) {
424      LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
425      // Revert the reference if the link creation failed
426      if (createBackRef) {
427        fs.delete(backRefPath, false);
428      }
429      throw e;
430    }
431  }
432
433  /**
434   * Create a new HFileLink starting from a hfileLink name
435   *
436   * <p>It also adds a back-reference to the hfile back-reference directory
437   * to simplify the reference-count and the cleaning process.
438   *
439   * @param conf {@link Configuration} to read for the archive directory name
440   * @param fs {@link FileSystem} on which to write the HFileLink
441   * @param dstFamilyPath - Destination path (table/region/cf/)
442   * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
443   * @return true if the file is created, otherwise the file exists.
444   * @throws IOException on file or parent directory creation failure
445   */
446  public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
447      final Path dstFamilyPath, final String hfileLinkName)
448          throws IOException {
449    return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true);
450  }
451
452  /**
453   * Create a new HFileLink starting from a hfileLink name
454   *
455   * <p>It also adds a back-reference to the hfile back-reference directory
456   * to simplify the reference-count and the cleaning process.
457   *
458   * @param conf {@link Configuration} to read for the archive directory name
459   * @param fs {@link FileSystem} on which to write the HFileLink
460   * @param dstFamilyPath - Destination path (table/region/cf/)
461   * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
462   * @param createBackRef - Whether back reference should be created. Defaults to true.
463   * @return true if the file is created, otherwise the file exists.
464   * @throws IOException on file or parent directory creation failure
465   */
466  public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
467      final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef)
468          throws IOException {
469    Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
470    if (!m.matches()) {
471      throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
472    }
473    return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)),
474        m.group(3), m.group(4), createBackRef);
475  }
476
477  /**
478   * Create the back reference name
479   */
480  //package-private for testing
481  static String createBackReferenceName(final String tableNameStr,
482                                        final String regionName) {
483
484    return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
485  }
486
487  /**
488   * Get the full path of the HFile referenced by the back reference
489   *
490   * @param rootDir root hbase directory
491   * @param linkRefPath Link Back Reference path
492   * @return full path of the referenced hfile
493   */
494  public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
495    Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
496    TableName linkTableName = p.getFirst();
497    String linkRegionName = p.getSecond();
498
499    String hfileName = getBackReferenceFileName(linkRefPath.getParent());
500    Path familyPath = linkRefPath.getParent().getParent();
501    Path regionPath = familyPath.getParent();
502    Path tablePath = regionPath.getParent();
503
504    String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
505            regionPath.getName(), hfileName);
506    Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
507    Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
508    return new Path(new Path(regionDir, familyPath.getName()), linkName);
509  }
510
511  public static Pair<TableName, String> parseBackReferenceName(String name) {
512    int separatorIndex = name.indexOf('.');
513    String linkRegionName = name.substring(0, separatorIndex);
514    String tableSubstr = name.substring(separatorIndex + 1)
515        .replace('=', TableName.NAMESPACE_DELIM);
516    TableName linkTableName = TableName.valueOf(tableSubstr);
517    return new Pair<>(linkTableName, linkRegionName);
518  }
519
520  /**
521   * Get the full path of the HFile referenced by the back reference
522   *
523   * @param conf {@link Configuration} to read for the archive directory name
524   * @param linkRefPath Link Back Reference path
525   * @return full path of the referenced hfile
526   * @throws IOException on unexpected error.
527   */
528  public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
529      throws IOException {
530    return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath);
531  }
532
533}