View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io;
20  
21  import java.io.IOException;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.regionserver.HRegion;
35  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
36  import org.apache.hadoop.hbase.util.FSUtils;
37  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
38  import org.apache.hadoop.hbase.util.Pair;
39  
40  /**
41   * HFileLink describes a link to an hfile.
42   *
43   * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
44   * HFileLink allows to access the referenced hfile regardless of the location where it is.
45   *
46   * <p>Searches for hfiles in the following order and locations:
47   * <ul>
48   *  <li>/hbase/table/region/cf/hfile</li>
49   *  <li>/hbase/.archive/table/region/cf/hfile</li>
50   * </ul>
51   *
52   * The link checks first in the original path if it is not present
53   * it fallbacks to the archived path.
54   */
55  @InterfaceAudience.Private
56  public class HFileLink extends FileLink {
57    private static final Log LOG = LogFactory.getLog(HFileLink.class);
58  
59    /**
60     * A non-capture group, for HFileLink, so that this can be embedded.
61     * The HFileLink describe a link to an hfile in a different table/region
62     * and the name is in the form: table=region-hfile.
63     * <p>
64     * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
65     * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
66     * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
67     * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
68     */
69    public static final String LINK_NAME_REGEX =
70      String.format("(?:(?:%s=)?)%s=%s-%s",
71        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
72        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
73  
74    /** Define the HFile Link name parser in the form of: table=region-hfile */
75    //made package private for testing
76    static final Pattern LINK_NAME_PATTERN =
77      Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
78        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
79        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
80  
81    /**
82     * The pattern should be used for hfile and reference links
83     * that can be found in /hbase/table/region/family/
84     */
85    private static final Pattern REF_OR_HFILE_LINK_PATTERN =
86      Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$",
87        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
88        HRegionInfo.ENCODED_REGION_NAME_REGEX));
89  
90    private final Path archivePath;
91    private final Path originPath;
92    private final Path tempPath;
93  
94    /**
95     * Dead simple hfile link constructor
96     */
97    public HFileLink(final Path originPath, final Path tempPath,
98                     final Path archivePath) {
99      this.tempPath  = tempPath;
100     this.originPath = originPath;
101     this.archivePath = archivePath;
102 
103     setLocations(originPath, tempPath, archivePath);
104   }
105 
106   /**
107    * @param conf {@link Configuration} from which to extract specific archive locations
108    * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile)
109    * @throws IOException on unexpected error.
110    */
111   public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern)
112           throws IOException {
113     return buildFromHFileLinkPattern(FSUtils.getRootDir(conf),
114             HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern);
115   }
116 
117   /**
118    * @param rootDir Path to the root directory where hbase files are stored
119    * @param archiveDir Path to the hbase archive directory
120    * @param hFileLinkPattern The path of the HFile Link.
121    */
122   public final static HFileLink buildFromHFileLinkPattern(final Path rootDir,
123                                                           final Path archiveDir,
124                                                           final Path hFileLinkPattern) {
125     Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern);
126     Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
127     Path originPath = new Path(rootDir, hfilePath);
128     Path archivePath = new Path(archiveDir, hfilePath);
129     return new HFileLink(originPath, tempPath, archivePath);
130   }
131 
132   /**
133    * Create an HFileLink relative path for the table/region/family/hfile location
134    * @param table Table name
135    * @param region Region Name
136    * @param family Family Name
137    * @param hfile HFile Name
138    * @return the relative Path to open the specified table/region/family/hfile link
139    */
140   public static Path createPath(final TableName table, final String region,
141                                 final String family, final String hfile) {
142     if (HFileLink.isHFileLink(hfile)) {
143       return new Path(family, hfile);
144     }
145     return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
146   }
147 
148   /**
149    * Create an HFileLink instance from table/region/family/hfile location
150    * @param conf {@link Configuration} from which to extract specific archive locations
151    * @param table Table name
152    * @param region Region Name
153    * @param family Family Name
154    * @param hfile HFile Name
155    * @return Link to the file with the specified table/region/family/hfile location
156    * @throws IOException on unexpected error.
157    */
158   public static HFileLink build(final Configuration conf, final TableName table,
159                                  final String region, final String family, final String hfile)
160           throws IOException {
161     return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile));
162   }
163 
164   /**
165    * @return the origin path of the hfile.
166    */
167   public Path getOriginPath() {
168     return this.originPath;
169   }
170 
171   /**
172    * @return the path of the archived hfile.
173    */
174   public Path getArchivePath() {
175     return this.archivePath;
176   }
177 
178   /**
179    * @param path Path to check.
180    * @return True if the path is a HFileLink.
181    */
182   public static boolean isHFileLink(final Path path) {
183     return isHFileLink(path.getName());
184   }
185 
186 
187   /**
188    * @param fileName File name to check.
189    * @return True if the path is a HFileLink.
190    */
191   public static boolean isHFileLink(String fileName) {
192     Matcher m = LINK_NAME_PATTERN.matcher(fileName);
193     if (!m.matches()) return false;
194     return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
195   }
196 
197   /**
198    * Convert a HFileLink path to a table relative path.
199    * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
200    *      becomes: /hbase/testtb/4567/cf/abcd
201    *
202    * @param path HFileLink path
203    * @return Relative table path
204    * @throws IOException on unexpected error.
205    */
206   private static Path getHFileLinkPatternRelativePath(final Path path) {
207     // table=region-hfile
208     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
209     if (!m.matches()) {
210       throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!");
211     }
212 
213     // Convert the HFileLink name into a real table/region/cf/hfile path.
214     TableName tableName = TableName.valueOf(m.group(1), m.group(2));
215     String regionName = m.group(3);
216     String hfileName = m.group(4);
217     String familyName = path.getParent().getName();
218     Path tableDir = FSUtils.getTableDir(new Path("./"), tableName);
219     return new Path(tableDir, new Path(regionName, new Path(familyName,
220         hfileName)));
221   }
222 
223   /**
224    * Get the HFile name of the referenced link
225    *
226    * @param fileName HFileLink file name
227    * @return the name of the referenced HFile
228    */
229   public static String getReferencedHFileName(final String fileName) {
230     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
231     if (!m.matches()) {
232       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
233     }
234     return(m.group(4));
235   }
236 
237   /**
238    * Get the Region name of the referenced link
239    *
240    * @param fileName HFileLink file name
241    * @return the name of the referenced Region
242    */
243   public static String getReferencedRegionName(final String fileName) {
244     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
245     if (!m.matches()) {
246       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
247     }
248     return(m.group(3));
249   }
250 
251   /**
252    * Get the Table name of the referenced link
253    *
254    * @param fileName HFileLink file name
255    * @return the name of the referenced Table
256    */
257   public static TableName getReferencedTableName(final String fileName) {
258     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
259     if (!m.matches()) {
260       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
261     }
262     return(TableName.valueOf(m.group(1), m.group(2)));
263   }
264 
265   /**
266    * Create a new HFileLink name
267    *
268    * @param hfileRegionInfo - Linked HFile Region Info
269    * @param hfileName - Linked HFile name
270    * @return file name of the HFile Link
271    */
272   public static String createHFileLinkName(final HRegionInfo hfileRegionInfo,
273       final String hfileName) {
274     return createHFileLinkName(hfileRegionInfo.getTable(),
275             hfileRegionInfo.getEncodedName(), hfileName);
276   }
277 
278   /**
279    * Create a new HFileLink name
280    *
281    * @param tableName - Linked HFile table name
282    * @param regionName - Linked HFile region name
283    * @param hfileName - Linked HFile name
284    * @return file name of the HFile Link
285    */
286   public static String createHFileLinkName(final TableName tableName,
287       final String regionName, final String hfileName) {
288     String s = String.format("%s=%s-%s",
289         tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='),
290         regionName, hfileName);
291     return s;
292   }
293 
294   /**
295    * Create a new HFileLink
296    *
297    * <p>It also adds a back-reference to the hfile back-reference directory
298    * to simplify the reference-count and the cleaning process.
299    *
300    * @param conf {@link Configuration} to read for the archive directory name
301    * @param fs {@link FileSystem} on which to write the HFileLink
302    * @param dstFamilyPath - Destination path (table/region/cf/)
303    * @param hfileRegionInfo - Linked HFile Region Info
304    * @param hfileName - Linked HFile name
305    * @return true if the file is created, otherwise the file exists.
306    * @throws IOException on file or parent directory creation failure
307    */
308   public static boolean create(final Configuration conf, final FileSystem fs,
309       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
310       final String hfileName) throws IOException {
311     TableName linkedTable = hfileRegionInfo.getTable();
312     String linkedRegion = hfileRegionInfo.getEncodedName();
313     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName);
314   }
315 
316   /**
317    * Create a new HFileLink
318    *
319    * <p>It also adds a back-reference to the hfile back-reference directory
320    * to simplify the reference-count and the cleaning process.
321    *
322    * @param conf {@link Configuration} to read for the archive directory name
323    * @param fs {@link FileSystem} on which to write the HFileLink
324    * @param dstFamilyPath - Destination path (table/region/cf/)
325    * @param linkedTable - Linked Table Name
326    * @param linkedRegion - Linked Region Name
327    * @param hfileName - Linked HFile name
328    * @return true if the file is created, otherwise the file exists.
329    * @throws IOException on file or parent directory creation failure
330    */
331   public static boolean create(final Configuration conf, final FileSystem fs,
332       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
333       final String hfileName) throws IOException {
334     String familyName = dstFamilyPath.getName();
335     String regionName = dstFamilyPath.getParent().getName();
336     String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent())
337         .getNameAsString();
338 
339     String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
340     String refName = createBackReferenceName(tableName, regionName);
341 
342     // Make sure the destination directory exists
343     fs.mkdirs(dstFamilyPath);
344 
345     // Make sure the FileLink reference directory exists
346     Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
347           linkedTable, linkedRegion, familyName);
348     Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
349     fs.mkdirs(backRefssDir);
350 
351     // Create the reference for the link
352     Path backRefPath = new Path(backRefssDir, refName);
353     fs.createNewFile(backRefPath);
354     try {
355       // Create the link
356       return fs.createNewFile(new Path(dstFamilyPath, name));
357     } catch (IOException e) {
358       LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
359       // Revert the reference if the link creation failed
360       fs.delete(backRefPath, false);
361       throw e;
362     }
363   }
364 
365   /**
366    * Create a new HFileLink starting from a hfileLink name
367    *
368    * <p>It also adds a back-reference to the hfile back-reference directory
369    * to simplify the reference-count and the cleaning process.
370    *
371    * @param conf {@link Configuration} to read for the archive directory name
372    * @param fs {@link FileSystem} on which to write the HFileLink
373    * @param dstFamilyPath - Destination path (table/region/cf/)
374    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
375    * @return true if the file is created, otherwise the file exists.
376    * @throws IOException on file or parent directory creation failure
377    */
378   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
379       final Path dstFamilyPath, final String hfileLinkName) throws IOException {
380     Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
381     if (!m.matches()) {
382       throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
383     }
384     return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)),
385         m.group(3), m.group(4));
386   }
387 
388   /**
389    * Create the back reference name
390    */
391   //package-private for testing
392   static String createBackReferenceName(final String tableNameStr,
393                                         final String regionName) {
394 
395     return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
396   }
397 
398   /**
399    * Get the full path of the HFile referenced by the back reference
400    *
401    * @param rootDir root hbase directory
402    * @param linkRefPath Link Back Reference path
403    * @return full path of the referenced hfile
404    * @throws IOException on unexpected error.
405    */
406   public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
407     Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
408     TableName linkTableName = p.getFirst();
409     String linkRegionName = p.getSecond();
410 
411     String hfileName = getBackReferenceFileName(linkRefPath.getParent());
412     Path familyPath = linkRefPath.getParent().getParent();
413     Path regionPath = familyPath.getParent();
414     Path tablePath = regionPath.getParent();
415 
416     String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
417             regionPath.getName(), hfileName);
418     Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
419     Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
420     return new Path(new Path(regionDir, familyPath.getName()), linkName);
421   }
422 
423   static Pair<TableName, String> parseBackReferenceName(String name) {
424     int separatorIndex = name.indexOf('.');
425     String linkRegionName = name.substring(0, separatorIndex);
426     String tableSubstr = name.substring(separatorIndex + 1)
427         .replace('=', TableName.NAMESPACE_DELIM);
428     TableName linkTableName = TableName.valueOf(tableSubstr);
429     return new Pair<TableName, String>(linkTableName, linkRegionName);
430   }
431 
432   /**
433    * Get the full path of the HFile referenced by the back reference
434    *
435    * @param conf {@link Configuration} to read for the archive directory name
436    * @param linkRefPath Link Back Reference path
437    * @return full path of the referenced hfile
438    * @throws IOException on unexpected error.
439    */
440   public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
441       throws IOException {
442     return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath);
443   }
444 
445 }