View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io;
20  
21  import java.io.IOException;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.mob.MobConstants;
35  import org.apache.hadoop.hbase.regionserver.HRegion;
36  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
37  import org.apache.hadoop.hbase.util.FSUtils;
38  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
39  import org.apache.hadoop.hbase.util.Pair;
40  
41  /**
42   * HFileLink describes a link to an hfile.
43   *
44   * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
45   * HFileLink allows to access the referenced hfile regardless of the location where it is.
46   *
47   * <p>Searches for hfiles in the following order and locations:
48   * <ul>
49   *  <li>/hbase/table/region/cf/hfile</li>
50   *  <li>/hbase/.archive/table/region/cf/hfile</li>
51   * </ul>
52   *
53   * The link checks first in the original path if it is not present
54   * it fallbacks to the archived path.
55   */
56  @InterfaceAudience.Private
57  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_DOESNT_OVERRIDE_EQUALS",
58    justification="To be fixed but warning suppressed for now")
59  public class HFileLink extends FileLink {
60    private static final Log LOG = LogFactory.getLog(HFileLink.class);
61  
62    /**
63     * A non-capture group, for HFileLink, so that this can be embedded.
64     * The HFileLink describe a link to an hfile in a different table/region
65     * and the name is in the form: table=region-hfile.
66     * <p>
67     * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
68     * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
69     * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
70     * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
71     */
72    public static final String LINK_NAME_REGEX =
73      String.format("(?:(?:%s=)?)%s=%s-%s",
74        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
75        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
76  
77    /** Define the HFile Link name parser in the form of: table=region-hfile */
78    //made package private for testing
79    static final Pattern LINK_NAME_PATTERN =
80      Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
81        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
82        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
83  
84    /**
85     * The pattern should be used for hfile and reference links
86     * that can be found in /hbase/table/region/family/
87     */
88    private static final Pattern REF_OR_HFILE_LINK_PATTERN =
89      Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$",
90        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
91        HRegionInfo.ENCODED_REGION_NAME_REGEX));
92  
93    private final Path archivePath;
94    private final Path originPath;
95    private final Path mobPath;
96    private final Path tempPath;
97  
98    /**
99     * Dead simple hfile link constructor
100    */
101   public HFileLink(final Path originPath, final Path tempPath, final Path mobPath,
102                    final Path archivePath) {
103     this.tempPath = tempPath;
104     this.originPath = originPath;
105     this.mobPath = mobPath;
106     this.archivePath = archivePath;
107     setLocations(originPath, tempPath, mobPath, archivePath);
108   }
109 
110 
111   /**
112    * @param conf {@link Configuration} from which to extract specific archive locations
113    * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
114    * @throws IOException on unexpected error.
115    */
116   public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
117           throws IOException {
118     return buildFromHFileLinkPattern(FSUtils.getRootDir(conf),
119             HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
120   }
121 
122 
123 
124   /**
125    * @param rootDir Path to the root directory where hbase files are stored
126    * @param archiveDir Path to the hbase archive directory
127    * @param hFileLinkPattern The path of the HFile Link.
128    */
129   public final static HFileLink buildFromHFileLinkPattern(final Path rootDir,
130                                                           final Path archiveDir,
131                                                           final Path hFileLinkPattern) {
132     Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
133     Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
134     Path originPath = new Path(rootDir, hfilePath);
135     Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath);
136     Path archivePath = new Path(archiveDir, hfilePath);
137     return new HFileLink(originPath, tempPath, mobPath, archivePath);
138   }
139 
140   /**
141    * Create an HFileLink relative path for the table/region/family/hfile location
142    * @param table Table name
143    * @param region Region Name
144    * @param family Family Name
145    * @param hfile HFile Name
146    * @return the relative Path to open the specified table/region/family/hfile link
147    */
148   public static Path createPath(final TableName table, final String region,
149                                 final String family, final String hfile) {
150     if (HFileLink.isHFileLink(hfile)) {
151       return new Path(family, hfile);
152     }
153     return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
154   }
155 
156   /**
157    * Create an HFileLink instance from table/region/family/hfile location
158    * @param conf {@link Configuration} from which to extract specific archive locations
159    * @param table Table name
160    * @param region Region Name
161    * @param family Family Name
162    * @param hfile HFile Name
163    * @return Link to the file with the specified table/region/family/hfile location
164    * @throws IOException on unexpected error.
165    */
166   public static HFileLink build(final Configuration conf, final TableName table,
167                                  final String region, final String family, final String hfile)
168           throws IOException {
169     return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
170   }
171 
172   /**
173    * @return the origin path of the hfile.
174    */
175   public Path getOriginPath() {
176     return this.originPath;
177   }
178 
179   /**
180    * @return the path of the archived hfile.
181    */
182   public Path getArchivePath() {
183     return this.archivePath;
184   }
185 
186   /**
187    * @return the path of the mob hfiles.
188    */
189   public Path getMobPath() {
190     return this.mobPath;
191   }
192 
193     /**
194    * @param path Path to check.
195    * @return True if the path is a HFileLink.
196    */
197   public static boolean isHFileLink(final Path path) {
198     return isHFileLink(path.getName());
199   }
200 
201 
202   /**
203    * @param fileName File name to check.
204    * @return True if the path is a HFileLink.
205    */
206   public static boolean isHFileLink(String fileName) {
207     Matcher m = LINK_NAME_PATTERN.matcher(fileName);
208     if (!m.matches()) return false;
209     return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
210   }
211 
212   /**
213    * Convert a HFileLink path to a table relative path.
214    * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
215    *      becomes: /hbase/testtb/4567/cf/abcd
216    *
217    * @param path HFileLink path
218    * @return Relative table path
219    * @throws IOException on unexpected error.
220    */
221   private static Path getHFileLinkPatternRelativePath(final Path path) {
222     // table=region-hfile
223     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
224     if (!m.matches()) {
225       throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
226     }
227 
228     // Convert the HFileLink name into a real table/region/cf/hfile path.
229     TableName tableName = TableName.valueOf(m.group(1), m.group(2));
230     String regionName = m.group(3);
231     String hfileName = m.group(4);
232     String familyName = path.getParent().getName();
233     Path tableDir = FSUtils.getTableDir(new Path("./"), tableName);
234     return new Path(tableDir, new Path(regionName, new Path(familyName,
235         hfileName)));
236   }
237 
238   /**
239    * Get the HFile name of the referenced link
240    *
241    * @param fileName HFileLink file name
242    * @return the name of the referenced HFile
243    */
244   public static String getReferencedHFileName(final String fileName) {
245     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
246     if (!m.matches()) {
247       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
248     }
249     return(m.group(4));
250   }
251 
252   /**
253    * Get the Region name of the referenced link
254    *
255    * @param fileName HFileLink file name
256    * @return the name of the referenced Region
257    */
258   public static String getReferencedRegionName(final String fileName) {
259     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
260     if (!m.matches()) {
261       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
262     }
263     return(m.group(3));
264   }
265 
266   /**
267    * Get the Table name of the referenced link
268    *
269    * @param fileName HFileLink file name
270    * @return the name of the referenced Table
271    */
272   public static TableName getReferencedTableName(final String fileName) {
273     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
274     if (!m.matches()) {
275       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
276     }
277     return(TableName.valueOf(m.group(1), m.group(2)));
278   }
279 
280   /**
281    * Create a new HFileLink name
282    *
283    * @param hfileRegionInfo - Linked HFile Region Info
284    * @param hfileName - Linked HFile name
285    * @return file name of the HFile Link
286    */
287   public static String createHFileLinkName(final HRegionInfo hfileRegionInfo,
288       final String hfileName) {
289     return createHFileLinkName(hfileRegionInfo.getTable(),
290             hfileRegionInfo.getEncodedName(), hfileName);
291   }
292 
293   /**
294    * Create a new HFileLink name
295    *
296    * @param tableName - Linked HFile table name
297    * @param regionName - Linked HFile region name
298    * @param hfileName - Linked HFile name
299    * @return file name of the HFile Link
300    */
301   public static String createHFileLinkName(final TableName tableName,
302       final String regionName, final String hfileName) {
303     String s = String.format("%s=%s-%s",
304         tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='),
305         regionName, hfileName);
306     return s;
307   }
308 
309   /**
310    * Create a new HFileLink
311    *
312    * <p>It also adds a back-reference to the hfile back-reference directory
313    * to simplify the reference-count and the cleaning process.
314    *
315    * @param conf {@link Configuration} to read for the archive directory name
316    * @param fs {@link FileSystem} on which to write the HFileLink
317    * @param dstFamilyPath - Destination path (table/region/cf/)
318    * @param hfileRegionInfo - Linked HFile Region Info
319    * @param hfileName - Linked HFile name
320    * @return true if the file is created, otherwise the file exists.
321    * @throws IOException on file or parent directory creation failure
322    */
323   public static boolean create(final Configuration conf, final FileSystem fs,
324       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
325       final String hfileName) throws IOException {
326     return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true);
327   }
328 
329   /**
330    * Create a new HFileLink
331    *
332    * <p>It also adds a back-reference to the hfile back-reference directory
333    * to simplify the reference-count and the cleaning process.
334    *
335    * @param conf {@link Configuration} to read for the archive directory name
336    * @param fs {@link FileSystem} on which to write the HFileLink
337    * @param dstFamilyPath - Destination path (table/region/cf/)
338    * @param hfileRegionInfo - Linked HFile Region Info
339    * @param hfileName - Linked HFile name
340    * @param createBackRef - Whether back reference should be created. Defaults to true.
341    * @return true if the file is created, otherwise the file exists.
342    * @throws IOException on file or parent directory creation failure
343    */
344   public static boolean create(final Configuration conf, final FileSystem fs,
345       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
346       final String hfileName, final boolean createBackRef) throws IOException {
347     TableName linkedTable = hfileRegionInfo.getTable();
348     String linkedRegion = hfileRegionInfo.getEncodedName();
349     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef);
350   }
351 
352   /**
353    * Create a new HFileLink
354    *
355    * <p>It also adds a back-reference to the hfile back-reference directory
356    * to simplify the reference-count and the cleaning process.
357    *
358    * @param conf {@link Configuration} to read for the archive directory name
359    * @param fs {@link FileSystem} on which to write the HFileLink
360    * @param dstFamilyPath - Destination path (table/region/cf/)
361    * @param linkedTable - Linked Table Name
362    * @param linkedRegion - Linked Region Name
363    * @param hfileName - Linked HFile name
364    * @return true if the file is created, otherwise the file exists.
365    * @throws IOException on file or parent directory creation failure
366    */
367   public static boolean create(final Configuration conf, final FileSystem fs,
368       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
369       final String hfileName) throws IOException {
370     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true);
371   }
372 
373   /**
374    * Create a new HFileLink
375    *
376    * <p>It also adds a back-reference to the hfile back-reference directory
377    * to simplify the reference-count and the cleaning process.
378    *
379    * @param conf {@link Configuration} to read for the archive directory name
380    * @param fs {@link FileSystem} on which to write the HFileLink
381    * @param dstFamilyPath - Destination path (table/region/cf/)
382    * @param linkedTable - Linked Table Name
383    * @param linkedRegion - Linked Region Name
384    * @param hfileName - Linked HFile name
385    * @param createBackRef - Whether back reference should be created. Defaults to true.
386    * @return true if the file is created, otherwise the file exists.
387    * @throws IOException on file or parent directory creation failure
388    */
389   public static boolean create(final Configuration conf, final FileSystem fs,
390       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
391       final String hfileName, final boolean createBackRef) throws IOException {
392     String familyName = dstFamilyPath.getName();
393     String regionName = dstFamilyPath.getParent().getName();
394     String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent())
395         .getNameAsString();
396 
397     String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
398     String refName = createBackReferenceName(tableName, regionName);
399 
400     // Make sure the destination directory exists
401     fs.mkdirs(dstFamilyPath);
402 
403     // Make sure the FileLink reference directory exists
404     Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
405           linkedTable, linkedRegion, familyName);
406     Path backRefPath = null;
407     if (createBackRef) {
408       Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
409       fs.mkdirs(backRefssDir);
410 
411       // Create the reference for the link
412       backRefPath = new Path(backRefssDir, refName);
413       fs.createNewFile(backRefPath);
414     }
415     try {
416       // Create the link
417       return fs.createNewFile(new Path(dstFamilyPath, name));
418     } catch (IOException e) {
419       LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
420       // Revert the reference if the link creation failed
421       if (createBackRef) {
422         fs.delete(backRefPath, false);
423       }
424       throw e;
425     }
426   }
427 
428   /**
429    * Create a new HFileLink starting from a hfileLink name
430    *
431    * <p>It also adds a back-reference to the hfile back-reference directory
432    * to simplify the reference-count and the cleaning process.
433    *
434    * @param conf {@link Configuration} to read for the archive directory name
435    * @param fs {@link FileSystem} on which to write the HFileLink
436    * @param dstFamilyPath - Destination path (table/region/cf/)
437    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
438    * @return true if the file is created, otherwise the file exists.
439    * @throws IOException on file or parent directory creation failure
440    */
441   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
442       final Path dstFamilyPath, final String hfileLinkName)
443           throws IOException {
444     return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true);
445   }
446 
447   /**
448    * Create a new HFileLink starting from a hfileLink name
449    *
450    * <p>It also adds a back-reference to the hfile back-reference directory
451    * to simplify the reference-count and the cleaning process.
452    *
453    * @param conf {@link Configuration} to read for the archive directory name
454    * @param fs {@link FileSystem} on which to write the HFileLink
455    * @param dstFamilyPath - Destination path (table/region/cf/)
456    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
457    * @param createBackRef - Whether back reference should be created. Defaults to true.
458    * @return true if the file is created, otherwise the file exists.
459    * @throws IOException on file or parent directory creation failure
460    */
461   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
462       final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef)
463           throws IOException {
464     Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
465     if (!m.matches()) {
466       throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
467     }
468     return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)),
469         m.group(3), m.group(4), createBackRef);
470   }
471 
472   /**
473    * Create the back reference name
474    */
475   //package-private for testing
476   static String createBackReferenceName(final String tableNameStr,
477                                         final String regionName) {
478 
479     return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
480   }
481 
482   /**
483    * Get the full path of the HFile referenced by the back reference
484    *
485    * @param rootDir root hbase directory
486    * @param linkRefPath Link Back Reference path
487    * @return full path of the referenced hfile
488    */
489   public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
490     Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
491     TableName linkTableName = p.getFirst();
492     String linkRegionName = p.getSecond();
493 
494     String hfileName = getBackReferenceFileName(linkRefPath.getParent());
495     Path familyPath = linkRefPath.getParent().getParent();
496     Path regionPath = familyPath.getParent();
497     Path tablePath = regionPath.getParent();
498 
499     String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
500             regionPath.getName(), hfileName);
501     Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
502     Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
503     return new Path(new Path(regionDir, familyPath.getName()), linkName);
504   }
505 
506   static Pair<TableName, String> parseBackReferenceName(String name) {
507     int separatorIndex = name.indexOf('.');
508     String linkRegionName = name.substring(0, separatorIndex);
509     String tableSubstr = name.substring(separatorIndex + 1)
510         .replace('=', TableName.NAMESPACE_DELIM);
511     TableName linkTableName = TableName.valueOf(tableSubstr);
512     return new Pair<TableName, String>(linkTableName, linkRegionName);
513   }
514 
515   /**
516    * Get the full path of the HFile referenced by the back reference
517    *
518    * @param conf {@link Configuration} to read for the archive directory name
519    * @param linkRefPath Link Back Reference path
520    * @return full path of the referenced hfile
521    * @throws IOException on unexpected error.
522    */
523   public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
524       throws IOException {
525     return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath);
526   }
527 
528 }