View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io;
20  
21  import java.io.IOException;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.regionserver.HRegion;
35  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
36  import org.apache.hadoop.hbase.util.FSUtils;
37  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
38  import org.apache.hadoop.hbase.util.Pair;
39  
40  /**
41   * HFileLink describes a link to an hfile.
42   *
43   * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
44   * HFileLink allows to access the referenced hfile regardless of the location where it is.
45   *
46   * <p>Searches for hfiles in the following order and locations:
47   * <ul>
48   *  <li>/hbase/table/region/cf/hfile</li>
49   *  <li>/hbase/.archive/table/region/cf/hfile</li>
50   * </ul>
51   *
52   * The link checks first in the original path if it is not present
53   * it fallbacks to the archived path.
54   */
55  @InterfaceAudience.Private
56  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_DOESNT_OVERRIDE_EQUALS",
57    justification="To be fixed but warning suppressed for now")
58  public class HFileLink extends FileLink {
59    private static final Log LOG = LogFactory.getLog(HFileLink.class);
60  
61    /**
62     * A non-capture group, for HFileLink, so that this can be embedded.
63     * The HFileLink describe a link to an hfile in a different table/region
64     * and the name is in the form: table=region-hfile.
65     * <p>
66     * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
67     * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
68     * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
69     * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
70     */
71    public static final String LINK_NAME_REGEX =
72      String.format("(?:(?:%s=)?)%s=%s-%s",
73        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
74        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
75  
76    /** Define the HFile Link name parser in the form of: table=region-hfile */
77    //made package private for testing
78    static final Pattern LINK_NAME_PATTERN =
79      Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
80        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
81        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
82  
83    /**
84     * The pattern should be used for hfile and reference links
85     * that can be found in /hbase/table/region/family/
86     */
87    private static final Pattern REF_OR_HFILE_LINK_PATTERN =
88      Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$",
89        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
90        HRegionInfo.ENCODED_REGION_NAME_REGEX));
91  
92    private final Path archivePath;
93    private final Path originPath;
94    private final Path tempPath;
95  
96    /**
97     * Dead simple hfile link constructor
98     */
99    public HFileLink(final Path originPath, final Path tempPath,
100                    final Path archivePath) {
101     this.tempPath  = tempPath;
102     this.originPath = originPath;
103     this.archivePath = archivePath;
104 
105     setLocations(originPath, tempPath, archivePath);
106   }
107 
108   /**
109    * @param conf {@link Configuration} from which to extract specific archive locations
110    * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
111    * @throws IOException on unexpected error.
112    */
113   public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
114           throws IOException {
115     return buildFromHFileLinkPattern(FSUtils.getRootDir(conf),
116             HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
117   }
118 
119   /**
120    * @param rootDir Path to the root directory where hbase files are stored
121    * @param archiveDir Path to the hbase archive directory
122    * @param hFileLinkPattern The path of the HFile Link.
123    */
124   public final static HFileLink buildFromHFileLinkPattern(final Path rootDir,
125                                                           final Path archiveDir,
126                                                           final Path hFileLinkPattern) {
127     Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
128     Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
129     Path originPath = new Path(rootDir, hfilePath);
130     Path archivePath = new Path(archiveDir, hfilePath);
131     return new HFileLink(originPath, tempPath, archivePath);
132   }
133 
134   /**
135    * Create an HFileLink relative path for the table/region/family/hfile location
136    * @param table Table name
137    * @param region Region Name
138    * @param family Family Name
139    * @param hfile HFile Name
140    * @return the relative Path to open the specified table/region/family/hfile link
141    */
142   public static Path createPath(final TableName table, final String region,
143                                 final String family, final String hfile) {
144     if (HFileLink.isHFileLink(hfile)) {
145       return new Path(family, hfile);
146     }
147     return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
148   }
149 
150   /**
151    * Create an HFileLink instance from table/region/family/hfile location
152    * @param conf {@link Configuration} from which to extract specific archive locations
153    * @param table Table name
154    * @param region Region Name
155    * @param family Family Name
156    * @param hfile HFile Name
157    * @return Link to the file with the specified table/region/family/hfile location
158    * @throws IOException on unexpected error.
159    */
160   public static HFileLink build(final Configuration conf, final TableName table,
161                                  final String region, final String family, final String hfile)
162           throws IOException {
163     return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
164   }
165 
166   /**
167    * @return the origin path of the hfile.
168    */
169   public Path getOriginPath() {
170     return this.originPath;
171   }
172 
173   /**
174    * @return the path of the archived hfile.
175    */
176   public Path getArchivePath() {
177     return this.archivePath;
178   }
179 
180   /**
181    * @param path Path to check.
182    * @return True if the path is a HFileLink.
183    */
184   public static boolean isHFileLink(final Path path) {
185     return isHFileLink(path.getName());
186   }
187 
188 
189   /**
190    * @param fileName File name to check.
191    * @return True if the path is a HFileLink.
192    */
193   public static boolean isHFileLink(String fileName) {
194     Matcher m = LINK_NAME_PATTERN.matcher(fileName);
195     if (!m.matches()) return false;
196     return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
197   }
198 
199   /**
200    * Convert a HFileLink path to a table relative path.
201    * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
202    *      becomes: /hbase/testtb/4567/cf/abcd
203    *
204    * @param path HFileLink path
205    * @return Relative table path
206    * @throws IOException on unexpected error.
207    */
208   private static Path getHFileLinkPatternRelativePath(final Path path) {
209     // table=region-hfile
210     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
211     if (!m.matches()) {
212       throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
213     }
214 
215     // Convert the HFileLink name into a real table/region/cf/hfile path.
216     TableName tableName = TableName.valueOf(m.group(1), m.group(2));
217     String regionName = m.group(3);
218     String hfileName = m.group(4);
219     String familyName = path.getParent().getName();
220     Path tableDir = FSUtils.getTableDir(new Path("./"), tableName);
221     return new Path(tableDir, new Path(regionName, new Path(familyName,
222         hfileName)));
223   }
224 
225   /**
226    * Get the HFile name of the referenced link
227    *
228    * @param fileName HFileLink file name
229    * @return the name of the referenced HFile
230    */
231   public static String getReferencedHFileName(final String fileName) {
232     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
233     if (!m.matches()) {
234       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
235     }
236     return(m.group(4));
237   }
238 
239   /**
240    * Get the Region name of the referenced link
241    *
242    * @param fileName HFileLink file name
243    * @return the name of the referenced Region
244    */
245   public static String getReferencedRegionName(final String fileName) {
246     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
247     if (!m.matches()) {
248       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
249     }
250     return(m.group(3));
251   }
252 
253   /**
254    * Get the Table name of the referenced link
255    *
256    * @param fileName HFileLink file name
257    * @return the name of the referenced Table
258    */
259   public static TableName getReferencedTableName(final String fileName) {
260     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
261     if (!m.matches()) {
262       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
263     }
264     return(TableName.valueOf(m.group(1), m.group(2)));
265   }
266 
267   /**
268    * Create a new HFileLink name
269    *
270    * @param hfileRegionInfo - Linked HFile Region Info
271    * @param hfileName - Linked HFile name
272    * @return file name of the HFile Link
273    */
274   public static String createHFileLinkName(final HRegionInfo hfileRegionInfo,
275       final String hfileName) {
276     return createHFileLinkName(hfileRegionInfo.getTable(),
277             hfileRegionInfo.getEncodedName(), hfileName);
278   }
279 
280   /**
281    * Create a new HFileLink name
282    *
283    * @param tableName - Linked HFile table name
284    * @param regionName - Linked HFile region name
285    * @param hfileName - Linked HFile name
286    * @return file name of the HFile Link
287    */
288   public static String createHFileLinkName(final TableName tableName,
289       final String regionName, final String hfileName) {
290     String s = String.format("%s=%s-%s",
291         tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='),
292         regionName, hfileName);
293     return s;
294   }
295 
296   /**
297    * Create a new HFileLink
298    *
299    * <p>It also adds a back-reference to the hfile back-reference directory
300    * to simplify the reference-count and the cleaning process.
301    *
302    * @param conf {@link Configuration} to read for the archive directory name
303    * @param fs {@link FileSystem} on which to write the HFileLink
304    * @param dstFamilyPath - Destination path (table/region/cf/)
305    * @param hfileRegionInfo - Linked HFile Region Info
306    * @param hfileName - Linked HFile name
307    * @return true if the file is created, otherwise the file exists.
308    * @throws IOException on file or parent directory creation failure
309    */
310   public static boolean create(final Configuration conf, final FileSystem fs,
311       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
312       final String hfileName) throws IOException {
313     return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true);
314   }
315 
316   /**
317    * Create a new HFileLink
318    *
319    * <p>It also adds a back-reference to the hfile back-reference directory
320    * to simplify the reference-count and the cleaning process.
321    *
322    * @param conf {@link Configuration} to read for the archive directory name
323    * @param fs {@link FileSystem} on which to write the HFileLink
324    * @param dstFamilyPath - Destination path (table/region/cf/)
325    * @param hfileRegionInfo - Linked HFile Region Info
326    * @param hfileName - Linked HFile name
327    * @param createBackRef - Whether back reference should be created. Defaults to true.
328    * @return true if the file is created, otherwise the file exists.
329    * @throws IOException on file or parent directory creation failure
330    */
331   public static boolean create(final Configuration conf, final FileSystem fs,
332       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
333       final String hfileName, final boolean createBackRef) throws IOException {
334     TableName linkedTable = hfileRegionInfo.getTable();
335     String linkedRegion = hfileRegionInfo.getEncodedName();
336     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef);
337   }
338 
339   /**
340    * Create a new HFileLink
341    *
342    * <p>It also adds a back-reference to the hfile back-reference directory
343    * to simplify the reference-count and the cleaning process.
344    *
345    * @param conf {@link Configuration} to read for the archive directory name
346    * @param fs {@link FileSystem} on which to write the HFileLink
347    * @param dstFamilyPath - Destination path (table/region/cf/)
348    * @param linkedTable - Linked Table Name
349    * @param linkedRegion - Linked Region Name
350    * @param hfileName - Linked HFile name
351    * @return true if the file is created, otherwise the file exists.
352    * @throws IOException on file or parent directory creation failure
353    */
354   public static boolean create(final Configuration conf, final FileSystem fs,
355       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
356       final String hfileName) throws IOException {
357     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true);
358   }
359 
360   /**
361    * Create a new HFileLink
362    *
363    * <p>It also adds a back-reference to the hfile back-reference directory
364    * to simplify the reference-count and the cleaning process.
365    *
366    * @param conf {@link Configuration} to read for the archive directory name
367    * @param fs {@link FileSystem} on which to write the HFileLink
368    * @param dstFamilyPath - Destination path (table/region/cf/)
369    * @param linkedTable - Linked Table Name
370    * @param linkedRegion - Linked Region Name
371    * @param hfileName - Linked HFile name
372    * @param createBackRef - Whether back reference should be created. Defaults to true.
373    * @return true if the file is created, otherwise the file exists.
374    * @throws IOException on file or parent directory creation failure
375    */
376   public static boolean create(final Configuration conf, final FileSystem fs,
377       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
378       final String hfileName, final boolean createBackRef) throws IOException {
379     String familyName = dstFamilyPath.getName();
380     String regionName = dstFamilyPath.getParent().getName();
381     String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent())
382         .getNameAsString();
383 
384     String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
385     String refName = createBackReferenceName(tableName, regionName);
386 
387     // Make sure the destination directory exists
388     fs.mkdirs(dstFamilyPath);
389 
390     // Make sure the FileLink reference directory exists
391     Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
392           linkedTable, linkedRegion, familyName);
393     Path backRefPath = null;
394     if (createBackRef) {
395       Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
396       fs.mkdirs(backRefssDir);
397 
398       // Create the reference for the link
399       backRefPath = new Path(backRefssDir, refName);
400       fs.createNewFile(backRefPath);
401     }
402     try {
403       // Create the link
404       return fs.createNewFile(new Path(dstFamilyPath, name));
405     } catch (IOException e) {
406       LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
407       // Revert the reference if the link creation failed
408       if (createBackRef) {
409         fs.delete(backRefPath, false);
410       }
411       throw e;
412     }
413   }
414 
415   /**
416    * Create a new HFileLink starting from a hfileLink name
417    *
418    * <p>It also adds a back-reference to the hfile back-reference directory
419    * to simplify the reference-count and the cleaning process.
420    *
421    * @param conf {@link Configuration} to read for the archive directory name
422    * @param fs {@link FileSystem} on which to write the HFileLink
423    * @param dstFamilyPath - Destination path (table/region/cf/)
424    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
425    * @return true if the file is created, otherwise the file exists.
426    * @throws IOException on file or parent directory creation failure
427    */
428   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
429       final Path dstFamilyPath, final String hfileLinkName)
430           throws IOException {
431     return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true);
432   }
433 
434   /**
435    * Create a new HFileLink starting from a hfileLink name
436    *
437    * <p>It also adds a back-reference to the hfile back-reference directory
438    * to simplify the reference-count and the cleaning process.
439    *
440    * @param conf {@link Configuration} to read for the archive directory name
441    * @param fs {@link FileSystem} on which to write the HFileLink
442    * @param dstFamilyPath - Destination path (table/region/cf/)
443    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
444    * @param createBackRef - Whether back reference should be created. Defaults to true.
445    * @return true if the file is created, otherwise the file exists.
446    * @throws IOException on file or parent directory creation failure
447    */
448   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
449       final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef)
450           throws IOException {
451     Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
452     if (!m.matches()) {
453       throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
454     }
455     return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)),
456         m.group(3), m.group(4), createBackRef);
457   }
458 
459   /**
460    * Create the back reference name
461    */
462   //package-private for testing
463   static String createBackReferenceName(final String tableNameStr,
464                                         final String regionName) {
465 
466     return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
467   }
468 
469   /**
470    * Get the full path of the HFile referenced by the back reference
471    *
472    * @param rootDir root hbase directory
473    * @param linkRefPath Link Back Reference path
474    * @return full path of the referenced hfile
475    */
476   public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
477     Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
478     TableName linkTableName = p.getFirst();
479     String linkRegionName = p.getSecond();
480 
481     String hfileName = getBackReferenceFileName(linkRefPath.getParent());
482     Path familyPath = linkRefPath.getParent().getParent();
483     Path regionPath = familyPath.getParent();
484     Path tablePath = regionPath.getParent();
485 
486     String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
487             regionPath.getName(), hfileName);
488     Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
489     Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
490     return new Path(new Path(regionDir, familyPath.getName()), linkName);
491   }
492 
493   static Pair<TableName, String> parseBackReferenceName(String name) {
494     int separatorIndex = name.indexOf('.');
495     String linkRegionName = name.substring(0, separatorIndex);
496     String tableSubstr = name.substring(separatorIndex + 1)
497         .replace('=', TableName.NAMESPACE_DELIM);
498     TableName linkTableName = TableName.valueOf(tableSubstr);
499     return new Pair<TableName, String>(linkTableName, linkRegionName);
500   }
501 
502   /**
503    * Get the full path of the HFile referenced by the back reference
504    *
505    * @param conf {@link Configuration} to read for the archive directory name
506    * @param linkRefPath Link Back Reference path
507    * @return full path of the referenced hfile
508    * @throws IOException on unexpected error.
509    */
510   public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
511       throws IOException {
512     return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath);
513   }
514 
515 }