View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io;
20  
21  import java.io.IOException;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.apache.hadoop.classification.InterfaceAudience;
28  import org.apache.hadoop.conf.Configuration;
29  import org.apache.hadoop.fs.FileSystem;
30  import org.apache.hadoop.fs.Path;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.HRegionInfo;
33  import org.apache.hadoop.hbase.HTableDescriptor;
34  import org.apache.hadoop.hbase.regionserver.HRegion;
35  import org.apache.hadoop.hbase.regionserver.StoreFile;
36  import org.apache.hadoop.hbase.util.FSUtils;
37  import org.apache.hadoop.hbase.util.HFileArchiveUtil;
38  
39  /**
40   * HFileLink describes a link to an hfile.
41   *
42   * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive)
43   * HFileLink allows to access the referenced hfile regardless of the location where it is.
44   *
45   * <p>Searches for hfiles in the following order and locations:
46   * <ul>
47   *  <li>/hbase/table/region/cf/hfile</li>
48   *  <li>/hbase/.archive/table/region/cf/hfile</li>
49   * </ul>
50   *
51   * The link checks first in the original path if it is not present
52   * it fallbacks to the archived path.
53   */
54  @InterfaceAudience.Private
55  public class HFileLink extends FileLink {
56    private static final Log LOG = LogFactory.getLog(HFileLink.class);
57  
58    /**
59     * A non-capture group, for HFileLink, so that this can be embedded.
60     * The HFileLink describe a link to an hfile in a different table/region
61     * and the name is in the form: table=region-hfile.
62     * <p>
63     * Table name is ([a-zA-Z_0-9][a-zA-Z_0-9.-]*), so '=' is an invalid character for the table name.
64     * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name.
65     * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid)
66     * and the bulk loaded (_SeqId_[0-9]+_) hfiles.
67     */
68    public static final String LINK_NAME_REGEX =
69      String.format("%s=%s-%s", HTableDescriptor.VALID_USER_TABLE_REGEX,
70        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFile.HFILE_NAME_REGEX);
71  
72    /** Define the HFile Link name parser in the form of: table=region-hfile */
73    private static final Pattern LINK_NAME_PATTERN =
74      Pattern.compile(String.format("^(%s)=(%s)-(%s)$", HTableDescriptor.VALID_USER_TABLE_REGEX,
75        HRegionInfo.ENCODED_REGION_NAME_REGEX, StoreFile.HFILE_NAME_REGEX));
76  
77    /**
78     * The pattern should be used for hfile and reference links
79     * that can be found in /hbase/table/region/family/
80     */
81    private static final Pattern REF_OR_HFILE_LINK_PATTERN =
82      Pattern.compile(String.format("^(%s)=(%s)-(.+)$", HTableDescriptor.VALID_USER_TABLE_REGEX,
83        HRegionInfo.ENCODED_REGION_NAME_REGEX));
84  
85    private final Path archivePath;
86    private final Path originPath;
87    private final Path tempPath;
88  
89    /**
90     * @param conf {@link Configuration} from which to extract specific archive locations
91     * @param path The path of the HFile Link.
92     * @throws IOException on unexpected error.
93     */
94    public HFileLink(Configuration conf, Path path) throws IOException {
95      this(FSUtils.getRootDir(conf), HFileArchiveUtil.getArchivePath(conf), path);
96    }
97  
98    /**
99     * @param rootDir Path to the root directory where hbase files are stored
100    * @param archiveDir Path to the hbase archive directory
101    * @param path The path of the HFile Link.
102    */
103   public HFileLink(final Path rootDir, final Path archiveDir, final Path path) {
104     Path hfilePath = getRelativeTablePath(path);
105     this.tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath);
106     this.originPath = new Path(rootDir, hfilePath);
107     this.archivePath = new Path(archiveDir, hfilePath);
108     setLocations(originPath, tempPath, archivePath);
109   }
110 
111   /**
112    * Create an HFileLink relative path for the table/region/family/hfile location
113    * @param table Table name
114    * @param region Region Name
115    * @param family Family Name
116    * @param hfile HFile Name
117    * @return the relative Path to open the specified table/region/family/hfile link
118    */
119   public static Path createPath(final String table, final String region,
120       final String family, final String hfile) {
121     if (HFileLink.isHFileLink(hfile)) {
122       return new Path(family, hfile);
123     }
124     return new Path(family, HFileLink.createHFileLinkName(table, region, hfile));
125   }
126 
127   /**
128    * Create an HFileLink instance from table/region/family/hfile location
129    * @param conf {@link Configuration} from which to extract specific archive locations
130    * @param table Table name
131    * @param region Region Name
132    * @param family Family Name
133    * @param hfile HFile Name
134    * @return Link to the file with the specified table/region/family/hfile location
135    * @throws IOException on unexpected error.
136    */
137   public static HFileLink create(final Configuration conf, final String table,
138       final String region, final String family, final String hfile) throws IOException {
139     return new HFileLink(conf, createPath(table, region, family, hfile));
140   }
141 
142   /**
143    * @return the origin path of the hfile.
144    */
145   public Path getOriginPath() {
146     return this.originPath;
147   }
148 
149   /**
150    * @return the path of the archived hfile.
151    */
152   public Path getArchivePath() {
153     return this.archivePath;
154   }
155 
156   /**
157    * @param path Path to check.
158    * @return True if the path is a HFileLink.
159    */
160   public static boolean isHFileLink(final Path path) {
161     return isHFileLink(path.getName());
162   }
163 
164   /**
165    * @param fileName File name to check.
166    * @return True if the path is a HFileLink.
167    */
168   public static boolean isHFileLink(String fileName) {
169     Matcher m = LINK_NAME_PATTERN.matcher(fileName);
170     if (!m.matches()) return false;
171 
172     return m.groupCount() > 2 && m.group(3) != null && m.group(2) != null && m.group(1) != null;
173   }
174 
175   /**
176    * Convert a HFileLink path to a table relative path.
177    * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd
178    *      becomes: /hbase/testtb/4567/cf/abcd
179    *
180    * @param path HFileLink path
181    * @return Relative table path
182    * @throws IOException on unexpected error.
183    */
184   private static Path getRelativeTablePath(final Path path) {
185     // table=region-hfile
186     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName());
187     if (!m.matches()) {
188       throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink name!");
189     }
190 
191     // Convert the HFileLink name into a real table/region/cf/hfile path.
192     String tableName = m.group(1);
193     String regionName = m.group(2);
194     String hfileName = m.group(3);
195     String familyName = path.getParent().getName();
196     return new Path(new Path(tableName, regionName), new Path(familyName, hfileName));
197   }
198 
199   /**
200    * Get the HFile name of the referenced link
201    *
202    * @param fileName HFileLink file name
203    * @return the name of the referenced HFile
204    */
205   public static String getReferencedHFileName(final String fileName) {
206     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
207     if (!m.matches()) {
208       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
209     }
210     return(m.group(3));
211   }
212 
213   /**
214    * Get the Region name of the referenced link
215    *
216    * @param fileName HFileLink file name
217    * @return the name of the referenced Region
218    */
219   public static String getReferencedRegionName(final String fileName) {
220     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
221     if (!m.matches()) {
222       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
223     }
224     return(m.group(2));
225   }
226 
227   /**
228    * Get the Table name of the referenced link
229    *
230    * @param fileName HFileLink file name
231    * @return the name of the referenced Table
232    */
233   public static String getReferencedTableName(final String fileName) {
234     Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName);
235     if (!m.matches()) {
236       throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!");
237     }
238     return(m.group(1));
239   }
240 
241   /**
242    * Returns true if the HFileLink exists
243    */
244   public boolean exists(final FileSystem fs) throws IOException {
245     return fs.exists(this.originPath) ||
246            fs.exists(this.tempPath) ||
247            fs.exists(this.archivePath);
248   }
249 
250   /**
251    * Create a new HFileLink name
252    *
253    * @param hfileRegionInfo - Linked HFile Region Info
254    * @param hfileName - Linked HFile name
255    * @return file name of the HFile Link
256    */
257   public static String createHFileLinkName(final HRegionInfo hfileRegionInfo,
258       final String hfileName) {
259     return createHFileLinkName(hfileRegionInfo.getTableNameAsString(),
260                       hfileRegionInfo.getEncodedName(), hfileName);
261   }
262 
263   /**
264    * Create a new HFileLink name
265    *
266    * @param tableName - Linked HFile table name
267    * @param regionName - Linked HFile region name
268    * @param hfileName - Linked HFile name
269    * @return file name of the HFile Link
270    */
271   public static String createHFileLinkName(final String tableName,
272       final String regionName, final String hfileName) {
273     return String.format("%s=%s-%s", tableName, regionName, hfileName);
274   }
275 
276   /**
277    * Create a new HFileLink
278    *
279    * <p>It also adds a back-reference to the hfile back-reference directory
280    * to simplify the reference-count and the cleaning process.
281    *
282    * @param conf {@link Configuration} to read for the archive directory name
283    * @param fs {@link FileSystem} on which to write the HFileLink
284    * @param dstFamilyPath - Destination path (table/region/cf/)
285    * @param hfileRegionInfo - Linked HFile Region Info
286    * @param hfileName - Linked HFile name
287    * @return true if the file is created, otherwise the file exists.
288    * @throws IOException on file or parent directory creation failure
289    */
290   public static boolean create(final Configuration conf, final FileSystem fs,
291       final Path dstFamilyPath, final HRegionInfo hfileRegionInfo,
292       final String hfileName) throws IOException {
293     String linkedTable = hfileRegionInfo.getTableNameAsString();
294     String linkedRegion = hfileRegionInfo.getEncodedName();
295     return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName);
296   }
297 
298   /**
299    * Create a new HFileLink
300    *
301    * <p>It also adds a back-reference to the hfile back-reference directory
302    * to simplify the reference-count and the cleaning process.
303    *
304    * @param conf {@link Configuration} to read for the archive directory name
305    * @param fs {@link FileSystem} on which to write the HFileLink
306    * @param dstFamilyPath - Destination path (table/region/cf/)
307    * @param linkedTable - Linked Table Name
308    * @param linkedRegion - Linked Region Name
309    * @param hfileName - Linked HFile name
310    * @return true if the file is created, otherwise the file exists.
311    * @throws IOException on file or parent directory creation failure
312    */
313   public static boolean create(final Configuration conf, final FileSystem fs,
314       final Path dstFamilyPath, final String linkedTable, final String linkedRegion,
315       final String hfileName) throws IOException {
316     String familyName = dstFamilyPath.getName();
317     String regionName = dstFamilyPath.getParent().getName();
318     String tableName = dstFamilyPath.getParent().getParent().getName();
319 
320     String name = createHFileLinkName(linkedTable, linkedRegion, hfileName);
321     String refName = createBackReferenceName(tableName, regionName);
322 
323     // Make sure the destination directory exists
324     fs.mkdirs(dstFamilyPath);
325 
326     // Make sure the FileLink reference directory exists
327     Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf,
328           linkedTable, linkedRegion, familyName);
329     Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName);
330     fs.mkdirs(backRefssDir);
331 
332     // Create the reference for the link
333     Path backRefPath = new Path(backRefssDir, refName);
334     fs.createNewFile(backRefPath);
335     try {
336       // Create the link
337       return fs.createNewFile(new Path(dstFamilyPath, name));
338     } catch (IOException e) {
339       LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e);
340       // Revert the reference if the link creation failed
341       fs.delete(backRefPath, false);
342       throw e;
343     }
344   }
345 
346   /**
347    * Create a new HFileLink starting from a hfileLink name
348    *
349    * <p>It also adds a back-reference to the hfile back-reference directory
350    * to simplify the reference-count and the cleaning process.
351    *
352    * @param conf {@link Configuration} to read for the archive directory name
353    * @param fs {@link FileSystem} on which to write the HFileLink
354    * @param dstFamilyPath - Destination path (table/region/cf/)
355    * @param hfileLinkName - HFileLink name (it contains hfile-region-table)
356    * @return true if the file is created, otherwise the file exists.
357    * @throws IOException on file or parent directory creation failure
358    */
359   public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs,
360       final Path dstFamilyPath, final String hfileLinkName) throws IOException {
361     Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName);
362     if (!m.matches()) {
363       throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!");
364     }
365     return create(conf, fs, dstFamilyPath, m.group(1), m.group(2), m.group(3));
366   }
367 
368   /**
369    * Create the back reference name
370    */
371   private static String createBackReferenceName(final String tableName, final String regionName) {
372     return regionName + "." + tableName;
373   }
374 
375   /**
376    * Get the full path of the HFile referenced by the back reference
377    *
378    * @param rootDir root hbase directory
379    * @param linkRefPath Link Back Reference path
380    * @return full path of the referenced hfile
381    * @throws IOException on unexpected error.
382    */
383   public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) {
384     int separatorIndex = linkRefPath.getName().indexOf('.');
385     String linkRegionName = linkRefPath.getName().substring(0, separatorIndex);
386     String linkTableName = linkRefPath.getName().substring(separatorIndex + 1);
387     String hfileName = getBackReferenceFileName(linkRefPath.getParent());
388     Path familyPath = linkRefPath.getParent().getParent();
389     Path regionPath = familyPath.getParent();
390     Path tablePath = regionPath.getParent();
391 
392     String linkName = createHFileLinkName(tablePath.getName(), regionPath.getName(), hfileName);
393     Path linkTableDir = FSUtils.getTablePath(rootDir, linkTableName);
394     Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName);
395     return new Path(new Path(regionDir, familyPath.getName()), linkName);
396   }
397 
398   /**
399    * Get the full path of the HFile referenced by the back reference
400    *
401    * @param conf {@link Configuration} to read for the archive directory name
402    * @param linkRefPath Link Back Reference path
403    * @return full path of the referenced hfile
404    * @throws IOException on unexpected error.
405    */
406   public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath)
407       throws IOException {
408     return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath);
409   }
410 }