View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io;
20  
21  import java.io.IOException;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.regionserver.HRegion;
35  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
36  import org.apache.hadoop.hbase.util.FSUtils;
37  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
38  import org.apache.hadoop.hbase.util.Pair;
39  
40  /**
41   * HFileLink describes a link to an hfile.
42   *
43   * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
44   * HFileLink allows to access the referenced hfile regardless of the location where it is.
45   *
46   * <p>Searches for hfiles in the following order and locations:
47   * <ul>
48   *  <li>/hbase/table/region/cf/hfile</li>
49   *  <li>/hbase/.archive/table/region/cf/hfile</li>
50   * </ul>
51   *
52   * The link checks first in the original path if it is not present
53   * it fallbacks to the archived path.
54   */
55  @InterfaceAudience.Private
56  public class HFileLink extends FileLink {
57    private static final Log LOG = LogFactory.getLog(HFileLink.class);
58  
59    /**
60     * A non-capture group, for HFileLink, so that this can be embedded.
61     * The HFileLink describe a link to an hfile in a different table/region
62     * and the name is in the form: table=region-hfile.
63     * <p>
64     * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
65     * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
66     * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
67     * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
68     */
69    public static final String LINK_NAME_REGEX =
70      String.format("(?:(?:%s=)?)%s=%s-%s",
71        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
72        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
73  
74    /** Define the HFile Link name parser in the form of: table=region-hfile */
75    //made package private for testing
76    static final Pattern LINK_NAME_PATTERN =
77      Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
78        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
79        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
80  
81    /**
82     * The pattern should be used for hfile and reference links
83     * that can be found in /hbase/table/region/family/
84     */
85    private static final Pattern REF_OR_HFILE_LINK_PATTERN =
86      Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$",
87        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
88        HRegionInfo.ENCODED_REGION_NAME_REGEX));
89  
90    private final Path archivePath;
91    private final Path originPath;
92    private final Path tempPath;
93  
94    /**
95     * Dead simple hfile link constructor
96     */
97    public HFileLink(final Path originPath, final Path tempPath,
98                     final Path archivePath) {
99      this.tempPath  = tempPath;
100     this.originPath = originPath;
101     this.archivePath = archivePath;
102 
103     setLocations(originPath, tempPath, archivePath);
104   }
105 
106   /**
107    * @param conf {@link Configuration} from which to extract specific archive locations
108    * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
109    * @throws IOException on unexpected error.
110    */
111   public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
112           throws IOException {
113     return buildFromHFileLinkPattern(FSUtils.getRootDir(conf),
114             HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
115   }
116 
117   /**
118    * @param rootDir Path to the root directory where hbase files are stored
119    * @param archiveDir Path to the hbase archive directory
120    * @param hFileLinkPattern The path of the HFile Link.
121    */
122   public final static HFileLink buildFromHFileLinkPattern(final Path rootDir,
123                                                           final Path archiveDir,
124                                                           final Path hFileLinkPattern) {
125     Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
126     Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
127     Path originPath = new Path(rootDir, hfilePath);
128     Path archivePath = new Path(archiveDir, hfilePath);
129     return new HFileLink(originPath, tempPath, archivePath);
130   }
131 
132   /**
133    * Create an HFileLink relative path for the table/region/family/hfile location
134    * @param table Table name
135    * @param region Region Name
136    * @param family Family Name
137    * @param hfile HFile Name
138    * @return the relative Path to open the specified table/region/family/hfile link
139    */
140   public static Path createPath(final TableName table, final String region,
141                                 final String family, final String hfile) {
142     if (HFileLink.isHFileLink(hfile)) {
143       return new Path(family, hfile);
144     }
145     return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
146   }
147 
148   /**
149    * Create an HFileLink instance from table/region/family/hfile location
150    * @param conf {@link Configuration} from which to extract specific archive locations
151    * @param table Table name
152    * @param region Region Name
153    * @param family Family Name
154    * @param hfile HFile Name
155    * @return Link to the file with the specified table/region/family/hfile location
156    * @throws IOException on unexpected error.
157    */
158   public static HFileLink build(final Configuration conf, final TableName table,
159                                  final String region, final String family, final String hfile)
160           throws IOException {
161     return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
162   }
163 
164   /**
165    * @return the origin path of the hfile.
166    */
167   public Path getOriginPath() {
168     return this.originPath;
169   }
170 
171   /**
172    * @return the path of the archived hfile.
173    */
174   public Path getArchivePath() {
175     return this.archivePath;
176   }
177 
178   /**
179    * @param path Path to check.
180    * @return True if the path is a HFileLink.
181    */
182   public static boolean isHFileLink(final Path path) {
183     return isHFileLink(path.getName());
184   }
185 
186 
187   /**
188    * @param fileName File name to check.
189    * @return True if the path is a HFileLink.
190    */
191   public static boolean isHFileLink(String fileName) {
192     Matcher m = LINK_NAME_PATTERN.matcher(fileName);
193     if (!m.matches()) return false;
194     return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
195   }
196 
197   /**
198    * Convert a HFileLink path to a table relative path.
199    * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
200    *      becomes: /hbase/testtb/4567/cf/abcd
201    *
202    * @param path HFileLink path
203    * @return Relative table path
204    * @throws IOException on unexpected error.
205    */
206   private static Path getHFileLinkPatternRelativePath(final Path path) {
207     // table=region-hfile
208     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
209     if (!m.matches()) {
210       throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
211     }
212 
213     // Convert the HFileLink name into a real table/region/cf/hfile path.
214     TableName tableName = TableName.valueOf(m.group(1), m.group(2));
215     String regionName = m.group(3);
216     String hfileName = m.group(4);
217     String familyName = path.getParent().getName();
218     Path tableDir = FSUtils.getTableDir(new Path("./"), tableName);
219     return new Path(tableDir, new Path(regionName, new Path(familyName,
220         hfileName)));
221   }
222 
223   /**
224    * Get the HFile name of the referenced link
225    *
226    * @param fileName HFileLink file name
227    * @return the name of the referenced HFile
228    */
229   public static String getReferencedHFileName(final String fileName) {
230     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
231     if (!m.matches()) {
232       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
233     }
234     return(m.group(4));
235   }
236 
237   /**
238    * Get the Region name of the referenced link
239    *
240    * @param fileName HFileLink file name
241    * @return the name of the referenced Region
242    */
243   public static String getReferencedRegionName(final String fileName) {
244     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
245     if (!m.matches()) {
246       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
247     }
248     return(m.group(3));
249   }
250 
251   /**
252    * Get the Table name of the referenced link
253    *
254    * @param fileName HFileLink file name
255    * @return the name of the referenced Table
256    */
257   public static TableName getReferencedTableName(final String fileName) {
258     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
259     if (!m.matches()) {
260       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
261     }
262     return(TableName.valueOf(m.group(1), m.group(2)));
263   }
264 
265   /**
266    * Create a new HFileLink name
267    *
268    * @param hfileRegionInfo - Linked HFile Region Info
269    * @param hfileName - Linked HFile name
270    * @return file name of the HFile Link
271    */
272   public static String createHFileLinkName(final HRegionInfo hfileRegionInfo,
273       final String hfileName) {
274     return createHFileLinkName(hfileRegionInfo.getTable(),
275             hfileRegionInfo.getEncodedName(), hfileName);
276   }
277 
278   /**
279    * Create a new HFileLink name
280    *
281    * @param tableName - Linked HFile table name
282    * @param regionName - Linked HFile region name
283    * @param hfileName - Linked HFile name
284    * @return file name of the HFile Link
285    */
286   public static String createHFileLinkName(final TableName tableName,
287       final String regionName, final String hfileName) {
288     String s = String.format("%s=%s-%s",
289         tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='),
290         regionName, hfileName);
291     return s;
292   }
293 
294   /**
295    * Create a new HFileLink
296    *
297    * <p>It also adds a back-reference to the hfile back-reference directory
298    * to simplify the reference-count and the cleaning process.
299    *
300    * @param conf {@link Configuration} to read for the archive directory name
301    * @param fs {@link FileSystem} on which to write the HFileLink
302    * @param dstFamilyPath - Destination path (table/region/cf/)
303    * @param hfileRegionInfo - Linked HFile Region Info
304    * @param hfileName - Linked HFile name
305    * @return true if the file is created, otherwise the file exists.
306    * @throws IOException on file or parent directory creation failure
307    */
308   public static boolean create(final Configuration conf, final FileSystem fs,
309       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
310       final String hfileName) throws IOException {
311     return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true);
312   }
313 
314   /**
315    * Create a new HFileLink
316    *
317    * <p>It also adds a back-reference to the hfile back-reference directory
318    * to simplify the reference-count and the cleaning process.
319    *
320    * @param conf {@link Configuration} to read for the archive directory name
321    * @param fs {@link FileSystem} on which to write the HFileLink
322    * @param dstFamilyPath - Destination path (table/region/cf/)
323    * @param hfileRegionInfo - Linked HFile Region Info
324    * @param hfileName - Linked HFile name
325    * @param createBackRef - Whether back reference should be created. Defaults to true.
326    * @return true if the file is created, otherwise the file exists.
327    * @throws IOException on file or parent directory creation failure
328    */
329   public static boolean create(final Configuration conf, final FileSystem fs,
330       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
331       final String hfileName, final boolean createBackRef) throws IOException {
332     TableName linkedTable = hfileRegionInfo.getTable();
333     String linkedRegion = hfileRegionInfo.getEncodedName();
334     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef);
335   }
336 
337   /**
338    * Create a new HFileLink
339    *
340    * <p>It also adds a back-reference to the hfile back-reference directory
341    * to simplify the reference-count and the cleaning process.
342    *
343    * @param conf {@link Configuration} to read for the archive directory name
344    * @param fs {@link FileSystem} on which to write the HFileLink
345    * @param dstFamilyPath - Destination path (table/region/cf/)
346    * @param linkedTable - Linked Table Name
347    * @param linkedRegion - Linked Region Name
348    * @param hfileName - Linked HFile name
349    * @return true if the file is created, otherwise the file exists.
350    * @throws IOException on file or parent directory creation failure
351    */
352   public static boolean create(final Configuration conf, final FileSystem fs,
353       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
354       final String hfileName) throws IOException {
355     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true);
356   }
357 
358   /**
359    * Create a new HFileLink
360    *
361    * <p>It also adds a back-reference to the hfile back-reference directory
362    * to simplify the reference-count and the cleaning process.
363    *
364    * @param conf {@link Configuration} to read for the archive directory name
365    * @param fs {@link FileSystem} on which to write the HFileLink
366    * @param dstFamilyPath - Destination path (table/region/cf/)
367    * @param linkedTable - Linked Table Name
368    * @param linkedRegion - Linked Region Name
369    * @param hfileName - Linked HFile name
370    * @param createBackRef - Whether back reference should be created. Defaults to true.
371    * @return true if the file is created, otherwise the file exists.
372    * @throws IOException on file or parent directory creation failure
373    */
374   public static boolean create(final Configuration conf, final FileSystem fs,
375       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
376       final String hfileName, final boolean createBackRef) throws IOException {
377     String familyName = dstFamilyPath.getName();
378     String regionName = dstFamilyPath.getParent().getName();
379     String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent())
380         .getNameAsString();
381 
382     String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
383     String refName = createBackReferenceName(tableName, regionName);
384 
385     // Make sure the destination directory exists
386     fs.mkdirs(dstFamilyPath);
387 
388     // Make sure the FileLink reference directory exists
389     Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
390           linkedTable, linkedRegion, familyName);
391     Path backRefPath = null;
392     if (createBackRef) {
393       Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
394       fs.mkdirs(backRefssDir);
395 
396       // Create the reference for the link
397       backRefPath = new Path(backRefssDir, refName);
398       fs.createNewFile(backRefPath);
399     }
400     try {
401       // Create the link
402       return fs.createNewFile(new Path(dstFamilyPath, name));
403     } catch (IOException e) {
404       LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
405       // Revert the reference if the link creation failed
406       if (createBackRef) {
407         fs.delete(backRefPath, false);
408       }
409       throw e;
410     }
411   }
412 
413   /**
414    * Create a new HFileLink starting from a hfileLink name
415    *
416    * <p>It also adds a back-reference to the hfile back-reference directory
417    * to simplify the reference-count and the cleaning process.
418    *
419    * @param conf {@link Configuration} to read for the archive directory name
420    * @param fs {@link FileSystem} on which to write the HFileLink
421    * @param dstFamilyPath - Destination path (table/region/cf/)
422    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
423    * @return true if the file is created, otherwise the file exists.
424    * @throws IOException on file or parent directory creation failure
425    */
426   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
427       final Path dstFamilyPath, final String hfileLinkName)
428           throws IOException {
429     return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true);
430   }
431 
432   /**
433    * Create a new HFileLink starting from a hfileLink name
434    *
435    * <p>It also adds a back-reference to the hfile back-reference directory
436    * to simplify the reference-count and the cleaning process.
437    *
438    * @param conf {@link Configuration} to read for the archive directory name
439    * @param fs {@link FileSystem} on which to write the HFileLink
440    * @param dstFamilyPath - Destination path (table/region/cf/)
441    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
442    * @param createBackRef - Whether back reference should be created. Defaults to true.
443    * @return true if the file is created, otherwise the file exists.
444    * @throws IOException on file or parent directory creation failure
445    */
446   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
447       final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef)
448           throws IOException {
449     Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
450     if (!m.matches()) {
451       throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
452     }
453     return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)),
454         m.group(3), m.group(4), createBackRef);
455   }
456 
457   /**
458    * Create the back reference name
459    */
460   //package-private for testing
461   static String createBackReferenceName(final String tableNameStr,
462                                         final String regionName) {
463 
464     return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
465   }
466 
467   /**
468    * Get the full path of the HFile referenced by the back reference
469    *
470    * @param rootDir root hbase directory
471    * @param linkRefPath Link Back Reference path
472    * @return full path of the referenced hfile
473    * @throws IOException on unexpected error.
474    */
475   public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
476     Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
477     TableName linkTableName = p.getFirst();
478     String linkRegionName = p.getSecond();
479 
480     String hfileName = getBackReferenceFileName(linkRefPath.getParent());
481     Path familyPath = linkRefPath.getParent().getParent();
482     Path regionPath = familyPath.getParent();
483     Path tablePath = regionPath.getParent();
484 
485     String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
486             regionPath.getName(), hfileName);
487     Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
488     Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
489     return new Path(new Path(regionDir, familyPath.getName()), linkName);
490   }
491 
492   static Pair<TableName, String> parseBackReferenceName(String name) {
493     int separatorIndex = name.indexOf('.');
494     String linkRegionName = name.substring(0, separatorIndex);
495     String tableSubstr = name.substring(separatorIndex + 1)
496         .replace('=', TableName.NAMESPACE_DELIM);
497     TableName linkTableName = TableName.valueOf(tableSubstr);
498     return new Pair<TableName, String>(linkTableName, linkRegionName);
499   }
500 
501   /**
502    * Get the full path of the HFile referenced by the back reference
503    *
504    * @param conf {@link Configuration} to read for the archive directory name
505    * @param linkRefPath Link Back Reference path
506    * @return full path of the referenced hfile
507    * @throws IOException on unexpected error.
508    */
509   public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
510       throws IOException {
511     return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath);
512   }
513 
514 }