View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io;
20  
21  import java.io.IOException;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.TableName;
32  import org.apache.hadoop.hbase.HConstants;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.regionserver.HRegion;
35  import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
36  import org.apache.hadoop.hbase.util.FSUtils;
37  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
38  import org.apache.hadoop.hbase.util.Pair;
39  
40  /**
41   * HFileLink describes a link to an hfile.
42   *
43   * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
44   * HFileLink allows to access the referenced hfile regardless of the location where it is.
45   *
46   * <p>Searches for hfiles in the following order and locations:
47   * <ul>
48   *  <li>/hbase/table/region/cf/hfile</li>
49   *  <li>/hbase/.archive/table/region/cf/hfile</li>
50   * </ul>
51   *
52   * The link checks first in the original path if it is not present
53   * it fallbacks to the archived path.
54   */
55  @InterfaceAudience.Private
56  public class HFileLink extends FileLink {
57    private static final Log LOG = LogFactory.getLog(HFileLink.class);
58  
59    /**
60     * A non-capture group, for HFileLink, so that this can be embedded.
61     * The HFileLink describe a link to an hfile in a different table/region
62     * and the name is in the form: table=region-hfile.
63     * <p>
64     * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
65     * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
66     * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
67     * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
68     */
69    public static final String LINK_NAME_REGEX =
70      String.format("(?:(?:%s=)?)%s=%s-%s",
71        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
72        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX);
73  
74    /** Define the HFile Link name parser in the form of: table=region-hfile */
75    //made package private for testing
76    static final Pattern LINK_NAME_PATTERN =
77      Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$",
78        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
79        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX));
80  
81    /**
82     * The pattern should be used for hfile and reference links
83     * that can be found in /hbase/table/region/family/
84     */
85    private static final Pattern REF_OR_HFILE_LINK_PATTERN =
86      Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$",
87        TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX,
88        HRegionInfo.ENCODED_REGION_NAME_REGEX));
89  
90    private final Path archivePath;
91    private final Path originPath;
92    private final Path tempPath;
93  
94    /**
95     * @param conf {@link Configuration} from which to extract specific archive locations
96     * @param path The path of the HFile Link.
97     * @throws IOException on unexpected error.
98     */
99    public HFileLink(Configuration conf, Path path) throws IOException {
100     this(FSUtils.getRootDir(conf), HFileArchiveUtil.getArchivePath(conf), path);
101   }
102 
103   /**
104    * @param rootDir Path to the root directory where hbase files are stored
105    * @param archiveDir Path to the hbase archive directory
106    * @param path The path of the HFile Link.
107    */
108   public HFileLink(final Path rootDir, final Path archiveDir, final Path path) {
109     Path hfilePath = getRelativeTablePath(path);
110     this.tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
111     this.originPath = new Path(rootDir, hfilePath);
112     this.archivePath = new Path(archiveDir, hfilePath);
113     setLocations(originPath, tempPath, archivePath);
114   }
115 
116   /**
117    * Create an HFileLink relative path for the table/region/family/hfile location
118    * @param table Table name
119    * @param region Region Name
120    * @param family Family Name
121    * @param hfile HFile Name
122    * @return the relative Path to open the specified table/region/family/hfile link
123    */
124   public static Path createPath(final TableName table, final String region,
125       final String family, final String hfile) {
126     if (HFileLink.isHFileLink(hfile)) {
127       return new Path(family, hfile);
128     }
129     return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
130   }
131 
132   /**
133    * Create an HFileLink instance from table/region/family/hfile location
134    * @param conf {@link Configuration} from which to extract specific archive locations
135    * @param table Table name
136    * @param region Region Name
137    * @param family Family Name
138    * @param hfile HFile Name
139    * @return Link to the file with the specified table/region/family/hfile location
140    * @throws IOException on unexpected error.
141    */
142   public static HFileLink create(final Configuration conf, final TableName table,
143       final String region, final String family, final String hfile) throws IOException {
144     return new HFileLink(conf, createPath(table, region, family, hfile));
145   }
146 
147   /**
148    * @return the origin path of the hfile.
149    */
150   public Path getOriginPath() {
151     return this.originPath;
152   }
153 
154   /**
155    * @return the path of the archived hfile.
156    */
157   public Path getArchivePath() {
158     return this.archivePath;
159   }
160 
161   /**
162    * @param path Path to check.
163    * @return True if the path is a HFileLink.
164    */
165   public static boolean isHFileLink(final Path path) {
166     return isHFileLink(path.getName());
167   }
168 
169 
170   /**
171    * @param fileName File name to check.
172    * @return True if the path is a HFileLink.
173    */
174   public static boolean isHFileLink(String fileName) {
175     Matcher m = LINK_NAME_PATTERN.matcher(fileName);
176     if (!m.matches()) return false;
177     return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null;
178   }
179 
180   /**
181    * Convert a HFileLink path to a table relative path.
182    * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
183    *      becomes: /hbase/testtb/4567/cf/abcd
184    *
185    * @param path HFileLink path
186    * @return Relative table path
187    * @throws IOException on unexpected error.
188    */
189   private static Path getRelativeTablePath(final Path path) {
190     // table=region-hfile
191     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
192     if (!m.matches()) {
193       throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink name!");
194     }
195 
196     // Convert the HFileLink name into a real table/region/cf/hfile path.
197     TableName tableName = TableName.valueOf(m.group(1), m.group(2));
198     String regionName = m.group(3);
199     String hfileName = m.group(4);
200     String familyName = path.getParent().getName();
201     Path tableDir = FSUtils.getTableDir(new Path("./"), tableName);
202     return new Path(tableDir, new Path(regionName, new Path(familyName,
203         hfileName)));
204   }
205 
206   /**
207    * Get the HFile name of the referenced link
208    *
209    * @param fileName HFileLink file name
210    * @return the name of the referenced HFile
211    */
212   public static String getReferencedHFileName(final String fileName) {
213     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
214     if (!m.matches()) {
215       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
216     }
217     return(m.group(4));
218   }
219 
220   /**
221    * Get the Region name of the referenced link
222    *
223    * @param fileName HFileLink file name
224    * @return the name of the referenced Region
225    */
226   public static String getReferencedRegionName(final String fileName) {
227     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
228     if (!m.matches()) {
229       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
230     }
231     return(m.group(3));
232   }
233 
234   /**
235    * Get the Table name of the referenced link
236    *
237    * @param fileName HFileLink file name
238    * @return the name of the referenced Table
239    */
240   public static TableName getReferencedTableName(final String fileName) {
241     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
242     if (!m.matches()) {
243       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
244     }
245     return(TableName.valueOf(m.group(1), m.group(2)));
246   }
247 
248   /**
249    * Returns true if the HFileLink exists
250    */
251   public boolean exists(final FileSystem fs) throws IOException {
252     return fs.exists(this.originPath) ||
253            fs.exists(this.tempPath) ||
254            fs.exists(this.archivePath);
255   }
256 
257   /**
258    * Create a new HFileLink name
259    *
260    * @param hfileRegionInfo - Linked HFile Region Info
261    * @param hfileName - Linked HFile name
262    * @return file name of the HFile Link
263    */
264   public static String createHFileLinkName(final HRegionInfo hfileRegionInfo,
265       final String hfileName) {
266     return createHFileLinkName(hfileRegionInfo.getTable(),
267                       hfileRegionInfo.getEncodedName(), hfileName);
268   }
269 
270   /**
271    * Create a new HFileLink name
272    *
273    * @param tableName - Linked HFile table name
274    * @param regionName - Linked HFile region name
275    * @param hfileName - Linked HFile name
276    * @return file name of the HFile Link
277    */
278   public static String createHFileLinkName(final TableName tableName,
279       final String regionName, final String hfileName) {
280     String s = String.format("%s=%s-%s",
281         tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='),
282         regionName, hfileName);
283     return s;
284   }
285 
286   /**
287    * Create a new HFileLink
288    *
289    * <p>It also adds a back-reference to the hfile back-reference directory
290    * to simplify the reference-count and the cleaning process.
291    *
292    * @param conf {@link Configuration} to read for the archive directory name
293    * @param fs {@link FileSystem} on which to write the HFileLink
294    * @param dstFamilyPath - Destination path (table/region/cf/)
295    * @param hfileRegionInfo - Linked HFile Region Info
296    * @param hfileName - Linked HFile name
297    * @return true if the file is created, otherwise the file exists.
298    * @throws IOException on file or parent directory creation failure
299    */
300   public static boolean create(final Configuration conf, final FileSystem fs,
301       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
302       final String hfileName) throws IOException {
303     TableName linkedTable = hfileRegionInfo.getTable();
304     String linkedRegion = hfileRegionInfo.getEncodedName();
305     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName);
306   }
307 
308   /**
309    * Create a new HFileLink
310    *
311    * <p>It also adds a back-reference to the hfile back-reference directory
312    * to simplify the reference-count and the cleaning process.
313    *
314    * @param conf {@link Configuration} to read for the archive directory name
315    * @param fs {@link FileSystem} on which to write the HFileLink
316    * @param dstFamilyPath - Destination path (table/region/cf/)
317    * @param linkedTable - Linked Table Name
318    * @param linkedRegion - Linked Region Name
319    * @param hfileName - Linked HFile name
320    * @return true if the file is created, otherwise the file exists.
321    * @throws IOException on file or parent directory creation failure
322    */
323   public static boolean create(final Configuration conf, final FileSystem fs,
324       final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion,
325       final String hfileName) throws IOException {
326     String familyName = dstFamilyPath.getName();
327     String regionName = dstFamilyPath.getParent().getName();
328     String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent())
329         .getNameAsString();
330 
331     String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
332     String refName = createBackReferenceName(tableName, regionName);
333 
334     // Make sure the destination directory exists
335     fs.mkdirs(dstFamilyPath);
336 
337     // Make sure the FileLink reference directory exists
338     Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
339           linkedTable, linkedRegion, familyName);
340     Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
341     fs.mkdirs(backRefssDir);
342 
343     // Create the reference for the link
344     Path backRefPath = new Path(backRefssDir, refName);
345     fs.createNewFile(backRefPath);
346     try {
347       // Create the link
348       return fs.createNewFile(new Path(dstFamilyPath, name));
349     } catch (IOException e) {
350       LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
351       // Revert the reference if the link creation failed
352       fs.delete(backRefPath, false);
353       throw e;
354     }
355   }
356 
357   /**
358    * Create a new HFileLink starting from a hfileLink name
359    *
360    * <p>It also adds a back-reference to the hfile back-reference directory
361    * to simplify the reference-count and the cleaning process.
362    *
363    * @param conf {@link Configuration} to read for the archive directory name
364    * @param fs {@link FileSystem} on which to write the HFileLink
365    * @param dstFamilyPath - Destination path (table/region/cf/)
366    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
367    * @return true if the file is created, otherwise the file exists.
368    * @throws IOException on file or parent directory creation failure
369    */
370   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
371       final Path dstFamilyPath, final String hfileLinkName) throws IOException {
372     Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
373     if (!m.matches()) {
374       throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
375     }
376     return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)),
377         m.group(3), m.group(4));
378   }
379 
380   /**
381    * Create the back reference name
382    */
383   //package-private for testing
384   static String createBackReferenceName(final String tableNameStr,
385                                         final String regionName) {
386 
387     return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '=');
388   }
389 
390   /**
391    * Get the full path of the HFile referenced by the back reference
392    *
393    * @param rootDir root hbase directory
394    * @param linkRefPath Link Back Reference path
395    * @return full path of the referenced hfile
396    * @throws IOException on unexpected error.
397    */
398   public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
399     Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName());
400     TableName linkTableName = p.getFirst();
401     String linkRegionName = p.getSecond();
402 
403     String hfileName = getBackReferenceFileName(linkRefPath.getParent());
404     Path familyPath = linkRefPath.getParent().getParent();
405     Path regionPath = familyPath.getParent();
406     Path tablePath = regionPath.getParent();
407 
408     String linkName = createHFileLinkName(FSUtils.getTableName(tablePath),
409         regionPath.getName(), hfileName);
410     Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName);
411     Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
412     return new Path(new Path(regionDir, familyPath.getName()), linkName);
413   }
414 
415   static Pair<TableName, String> parseBackReferenceName(String name) {
416     int separatorIndex = name.indexOf('.');
417     String linkRegionName = name.substring(0, separatorIndex);
418     String tableSubstr = name.substring(separatorIndex + 1)
419         .replace('=', TableName.NAMESPACE_DELIM);
420     TableName linkTableName = TableName.valueOf(tableSubstr);
421     return new Pair<TableName, String>(linkTableName, linkRegionName);
422   }
423 
424   /**
425    * Get the full path of the HFile referenced by the back reference
426    *
427    * @param conf {@link Configuration} to read for the archive directory name
428    * @param linkRefPath Link Back Reference path
429    * @return full path of the referenced hfile
430    * @throws IOException on unexpected error.
431    */
432   public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
433       throws IOException {
434     return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath);
435   }
436 
437 }