001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io; 019 020import java.io.IOException; 021import java.util.regex.Matcher; 022import java.util.regex.Pattern; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.fs.FileSystem; 025import org.apache.hadoop.fs.Path; 026import org.apache.hadoop.hbase.HConstants; 027import org.apache.hadoop.hbase.TableName; 028import org.apache.hadoop.hbase.client.RegionInfo; 029import org.apache.hadoop.hbase.client.RegionInfoBuilder; 030import org.apache.hadoop.hbase.mob.MobConstants; 031import org.apache.hadoop.hbase.regionserver.HRegion; 032import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 033import org.apache.hadoop.hbase.util.CommonFSUtils; 034import org.apache.hadoop.hbase.util.HFileArchiveUtil; 035import org.apache.hadoop.hbase.util.Pair; 036import org.apache.yetus.audience.InterfaceAudience; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040/** 041 * HFileLink describes a link to an hfile. An hfile can be served from a region or from the hfile 042 * archive directory (/hbase/.archive) HFileLink allows to access the referenced hfile regardless of 043 * the location where it is. 044 * <p> 045 * Searches for hfiles in the following order and locations: 046 * <ul> 047 * <li>/hbase/table/region/cf/hfile</li> 048 * <li>/hbase/.archive/table/region/cf/hfile</li> 049 * </ul> 050 * The link checks first in the original path if it is not present it fallbacks to the archived 051 * path. 052 */ 053@InterfaceAudience.Private 054@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "EQ_DOESNT_OVERRIDE_EQUALS", 055 justification = "To be fixed but warning suppressed for now") 056public class HFileLink extends FileLink { 057 private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class); 058 059 /** 060 * A non-capture group, for HFileLink, so that this can be embedded. The HFileLink describe a link 061 * to an hfile in a different table/region and the name is in the form: table=region-hfile. 062 * <p> 063 * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid 064 * character for the table name. Region name is ([a-f0-9]+), so '-' is an invalid character for 065 * the region name. HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) and 066 * the bulk loaded (_SeqId_[0-9]+_) hfiles. 067 * <p> 068 * Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name and 069 * '4567' is region name and 'abcd' is filename. 070 */ 071 public static final String LINK_NAME_REGEX = String.format("(?:(?:%s=)?)%s=%s-%s", 072 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 073 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX); 074 075 /** Define the HFile Link name parser in the form of: table=region-hfile */ 076 public static final Pattern LINK_NAME_PATTERN = 077 Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$", 078 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 079 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX)); 080 081 /** 082 * The pattern should be used for hfile and reference links that can be found in 083 * /hbase/table/region/family/ 084 */ 085 private static final Pattern REF_OR_HFILE_LINK_PATTERN = 086 Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$", TableName.VALID_NAMESPACE_REGEX, 087 TableName.VALID_TABLE_QUALIFIER_REGEX, RegionInfoBuilder.ENCODED_REGION_NAME_REGEX)); 088 089 private final Path archivePath; 090 private final Path originPath; 091 private final Path mobPath; 092 private final Path tempPath; 093 094 /** 095 * Dead simple hfile link constructor 096 */ 097 public HFileLink(final Path originPath, final Path tempPath, final Path mobPath, 098 final Path archivePath) { 099 this.tempPath = tempPath; 100 this.originPath = originPath; 101 this.mobPath = mobPath; 102 this.archivePath = archivePath; 103 setLocations(originPath, tempPath, mobPath, archivePath); 104 } 105 106 /** 107 * @param conf {@link Configuration} from which to extract specific archive locations 108 * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile) 109 * @throws IOException on unexpected error. 110 */ 111 public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern) 112 throws IOException { 113 return buildFromHFileLinkPattern(CommonFSUtils.getRootDir(conf), 114 HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern); 115 } 116 117 /** 118 * @param rootDir Path to the root directory where hbase files are stored 119 * @param archiveDir Path to the hbase archive directory 120 * @param hFileLinkPattern The path of the HFile Link. 121 */ 122 public final static HFileLink buildFromHFileLinkPattern(final Path rootDir, final Path archiveDir, 123 final Path hFileLinkPattern) { 124 Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern); 125 Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath); 126 Path originPath = new Path(rootDir, hfilePath); 127 Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath); 128 Path archivePath = new Path(archiveDir, hfilePath); 129 return new HFileLink(originPath, tempPath, mobPath, archivePath); 130 } 131 132 /** 133 * Create an HFileLink relative path for the table/region/family/hfile location 134 * @param table Table name 135 * @param region Region Name 136 * @param family Family Name 137 * @param hfile HFile Name 138 * @return the relative Path to open the specified table/region/family/hfile link 139 */ 140 public static Path createPath(final TableName table, final String region, final String family, 141 final String hfile) { 142 if (HFileLink.isHFileLink(hfile)) { 143 return new Path(family, hfile); 144 } 145 return new Path(family, HFileLink.createHFileLinkName(table, region, hfile)); 146 } 147 148 /** 149 * Create an HFileLink instance from table/region/family/hfile location 150 * @param conf {@link Configuration} from which to extract specific archive locations 151 * @param table Table name 152 * @param region Region Name 153 * @param family Family Name 154 * @param hfile HFile Name 155 * @return Link to the file with the specified table/region/family/hfile location 156 * @throws IOException on unexpected error. 157 */ 158 public static HFileLink build(final Configuration conf, final TableName table, 159 final String region, final String family, final String hfile) throws IOException { 160 return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile)); 161 } 162 163 /** Returns the origin path of the hfile. */ 164 public Path getOriginPath() { 165 return this.originPath; 166 } 167 168 /** Returns the path of the archived hfile. */ 169 public Path getArchivePath() { 170 return this.archivePath; 171 } 172 173 /** Returns the path of the mob hfiles. */ 174 public Path getMobPath() { 175 return this.mobPath; 176 } 177 178 /** 179 * @param path Path to check. 180 * @return True if the path is a HFileLink. 181 */ 182 public static boolean isHFileLink(final Path path) { 183 return isHFileLink(path.getName()); 184 } 185 186 /** 187 * @param fileName File name to check. 188 * @return True if the path is a HFileLink. 189 */ 190 public static boolean isHFileLink(String fileName) { 191 Matcher m = LINK_NAME_PATTERN.matcher(fileName); 192 if (!m.matches()) { 193 return false; 194 } 195 return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null; 196 } 197 198 /** 199 * Convert a HFileLink path to a table relative path. e.g. the link: 200 * /hbase/test/0123/cf/testtb=4567-abcd becomes: /hbase/testtb/4567/cf/abcd 201 * @param path HFileLink path 202 * @return Relative table path 203 * @throws IOException on unexpected error. 204 */ 205 private static Path getHFileLinkPatternRelativePath(final Path path) { 206 // table=region-hfile 207 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName()); 208 if (!m.matches()) { 209 throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!"); 210 } 211 212 // Convert the HFileLink name into a real table/region/cf/hfile path. 213 TableName tableName = TableName.valueOf(m.group(1), m.group(2)); 214 String regionName = m.group(3); 215 String hfileName = m.group(4); 216 String familyName = path.getParent().getName(); 217 Path tableDir = CommonFSUtils.getTableDir(new Path("./"), tableName); 218 return new Path(tableDir, new Path(regionName, new Path(familyName, hfileName))); 219 } 220 221 /** 222 * Get the HFile name of the referenced link 223 * @param fileName HFileLink file name 224 * @return the name of the referenced HFile 225 */ 226 public static String getReferencedHFileName(final String fileName) { 227 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 228 if (!m.matches()) { 229 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 230 } 231 return (m.group(4)); 232 } 233 234 /** 235 * Get the Region name of the referenced link 236 * @param fileName HFileLink file name 237 * @return the name of the referenced Region 238 */ 239 public static String getReferencedRegionName(final String fileName) { 240 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 241 if (!m.matches()) { 242 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 243 } 244 return (m.group(3)); 245 } 246 247 /** 248 * Get the Table name of the referenced link 249 * @param fileName HFileLink file name 250 * @return the name of the referenced Table 251 */ 252 public static TableName getReferencedTableName(final String fileName) { 253 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 254 if (!m.matches()) { 255 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 256 } 257 return (TableName.valueOf(m.group(1), m.group(2))); 258 } 259 260 /** 261 * Create a new HFileLink name 262 * @param hfileRegionInfo - Linked HFile Region Info 263 * @param hfileName - Linked HFile name 264 * @return file name of the HFile Link 265 */ 266 public static String createHFileLinkName(final RegionInfo hfileRegionInfo, 267 final String hfileName) { 268 return createHFileLinkName(hfileRegionInfo.getTable(), hfileRegionInfo.getEncodedName(), 269 hfileName); 270 } 271 272 /** 273 * Create a new HFileLink name 274 * @param tableName - Linked HFile table name 275 * @param regionName - Linked HFile region name 276 * @param hfileName - Linked HFile name 277 * @return file name of the HFile Link 278 */ 279 public static String createHFileLinkName(final TableName tableName, final String regionName, 280 final String hfileName) { 281 String s = String.format("%s=%s-%s", 282 tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='), regionName, hfileName); 283 return s; 284 } 285 286 /** 287 * Create a new HFileLink 288 * <p> 289 * It also adds a back-reference to the hfile back-reference directory to simplify the 290 * reference-count and the cleaning process. 291 * @param conf {@link Configuration} to read for the archive directory name 292 * @param fs {@link FileSystem} on which to write the HFileLink 293 * @param dstFamilyPath - Destination path (table/region/cf/) 294 * @param hfileRegionInfo - Linked HFile Region Info 295 * @param hfileName - Linked HFile name 296 * @return the file link name. 297 * @throws IOException on file or parent directory creation failure. 298 */ 299 public static String create(final Configuration conf, final FileSystem fs, 300 final Path dstFamilyPath, final RegionInfo hfileRegionInfo, final String hfileName) 301 throws IOException { 302 return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true); 303 } 304 305 /** 306 * Create a new HFileLink 307 * <p> 308 * It also adds a back-reference to the hfile back-reference directory to simplify the 309 * reference-count and the cleaning process. 310 * @param conf {@link Configuration} to read for the archive directory name 311 * @param fs {@link FileSystem} on which to write the HFileLink 312 * @param dstFamilyPath - Destination path (table/region/cf/) 313 * @param hfileRegionInfo - Linked HFile Region Info 314 * @param hfileName - Linked HFile name 315 * @param createBackRef - Whether back reference should be created. Defaults to true. 316 * @return the file link name. 317 * @throws IOException on file or parent directory creation failure. 318 */ 319 public static String create(final Configuration conf, final FileSystem fs, 320 final Path dstFamilyPath, final RegionInfo hfileRegionInfo, final String hfileName, 321 final boolean createBackRef) throws IOException { 322 TableName linkedTable = hfileRegionInfo.getTable(); 323 String linkedRegion = hfileRegionInfo.getEncodedName(); 324 return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef); 325 } 326 327 /** 328 * Create a new HFileLink 329 * <p> 330 * It also adds a back-reference to the hfile back-reference directory to simplify the 331 * reference-count and the cleaning process. 332 * @param conf {@link Configuration} to read for the archive directory name 333 * @param fs {@link FileSystem} on which to write the HFileLink 334 * @param dstFamilyPath - Destination path (table/region/cf/) 335 * @param linkedTable - Linked Table Name 336 * @param linkedRegion - Linked Region Name 337 * @param hfileName - Linked HFile name 338 * @return the file link name. 339 * @throws IOException on file or parent directory creation failure. 340 */ 341 public static String create(final Configuration conf, final FileSystem fs, 342 final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, 343 final String hfileName) throws IOException { 344 return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true); 345 } 346 347 /** 348 * Create a new HFileLink. In the event of link creation failure, this method throws an 349 * IOException, so that the calling upper laying can decide on how to proceed with this. 350 * <p> 351 * It also adds a back-reference to the hfile back-reference directory to simplify the 352 * reference-count and the cleaning process. 353 * @param conf {@link Configuration} to read for the archive directory name 354 * @param fs {@link FileSystem} on which to write the HFileLink 355 * @param dstFamilyPath - Destination path (table/region/cf/) 356 * @param linkedTable - Linked Table Name 357 * @param linkedRegion - Linked Region Name 358 * @param hfileName - Linked HFile name 359 * @param createBackRef - Whether back reference should be created. Defaults to true. 360 * @return the file link name. 361 * @throws IOException on file or parent directory creation failure. 362 */ 363 public static String create(final Configuration conf, final FileSystem fs, 364 final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, 365 final String hfileName, final boolean createBackRef) throws IOException { 366 String familyName = dstFamilyPath.getName(); 367 String regionName = dstFamilyPath.getParent().getName(); 368 String tableName = 369 CommonFSUtils.getTableName(dstFamilyPath.getParent().getParent()).getNameAsString(); 370 371 return create(conf, fs, dstFamilyPath, familyName, tableName, regionName, linkedTable, 372 linkedRegion, hfileName, createBackRef); 373 } 374 375 /** 376 * Create a new HFileLink 377 * <p> 378 * It also adds a back-reference to the hfile back-reference directory to simplify the 379 * reference-count and the cleaning process. 380 * @param conf {@link Configuration} to read for the archive directory name 381 * @param fs {@link FileSystem} on which to write the HFileLink 382 * @param dstFamilyPath - Destination path (table/region/cf/) 383 * @param dstTableName - Destination table name 384 * @param dstRegionName - Destination region name 385 * @param linkedTable - Linked Table Name 386 * @param linkedRegion - Linked Region Name 387 * @param hfileName - Linked HFile name 388 * @param createBackRef - Whether back reference should be created. Defaults to true. 389 * @return the file link name. 390 * @throws IOException on file or parent directory creation failure 391 */ 392 public static String create(final Configuration conf, final FileSystem fs, 393 final Path dstFamilyPath, final String familyName, final String dstTableName, 394 final String dstRegionName, final TableName linkedTable, final String linkedRegion, 395 final String hfileName, final boolean createBackRef) throws IOException { 396 String name = createHFileLinkName(linkedTable, linkedRegion, hfileName); 397 String refName = createBackReferenceName(dstTableName, dstRegionName); 398 399 // Make sure the destination directory exists 400 fs.mkdirs(dstFamilyPath); 401 402 // Make sure the FileLink reference directory exists 403 Path archiveStoreDir = 404 HFileArchiveUtil.getStoreArchivePath(conf, linkedTable, linkedRegion, familyName); 405 Path backRefPath = null; 406 if (createBackRef) { 407 Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName); 408 fs.mkdirs(backRefssDir); 409 410 // Create the reference for the link 411 backRefPath = new Path(backRefssDir, refName); 412 fs.createNewFile(backRefPath); 413 } 414 try { 415 // Create the link 416 if (fs.createNewFile(new Path(dstFamilyPath, name))) { 417 return name; 418 } 419 } catch (IOException e) { 420 LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e); 421 // Revert the reference if the link creation failed 422 if (createBackRef) { 423 fs.delete(backRefPath, false); 424 } 425 throw e; 426 } 427 throw new IOException( 428 "File link=" + name + " already exists under " + dstFamilyPath + " folder."); 429 } 430 431 /** 432 * Create a new HFileLink starting from a hfileLink name 433 * <p> 434 * It also adds a back-reference to the hfile back-reference directory to simplify the 435 * reference-count and the cleaning process. 436 * @param conf {@link Configuration} to read for the archive directory name 437 * @param fs {@link FileSystem} on which to write the HFileLink 438 * @param dstFamilyPath - Destination path (table/region/cf/) 439 * @param hfileLinkName - HFileLink name (it contains hfile-region-table) 440 * @param createBackRef - Whether back reference should be created. Defaults to true. 441 * @return the file link name. 442 * @throws IOException on file or parent directory creation failure. 443 */ 444 public static String createFromHFileLink(final Configuration conf, final FileSystem fs, 445 final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef) 446 throws IOException { 447 Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName); 448 if (!m.matches()) { 449 throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!"); 450 } 451 return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)), m.group(3), 452 m.group(4), createBackRef); 453 } 454 455 /** 456 * Create the back reference name 457 */ 458 // package-private for testing 459 static String createBackReferenceName(final String tableNameStr, final String regionName) { 460 461 return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '='); 462 } 463 464 /** 465 * Get the full path of the HFile referenced by the back reference 466 * @param rootDir root hbase directory 467 * @param linkRefPath Link Back Reference path 468 * @return full path of the referenced hfile 469 */ 470 public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) { 471 Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName()); 472 TableName linkTableName = p.getFirst(); 473 String linkRegionName = p.getSecond(); 474 475 String hfileName = getBackReferenceFileName(linkRefPath.getParent()); 476 Path familyPath = linkRefPath.getParent().getParent(); 477 Path regionPath = familyPath.getParent(); 478 Path tablePath = regionPath.getParent(); 479 480 String linkName = 481 createHFileLinkName(CommonFSUtils.getTableName(tablePath), regionPath.getName(), hfileName); 482 Path linkTableDir = CommonFSUtils.getTableDir(rootDir, linkTableName); 483 Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName); 484 return new Path(new Path(regionDir, familyPath.getName()), linkName); 485 } 486 487 public static Pair<TableName, String> parseBackReferenceName(String name) { 488 int separatorIndex = name.indexOf('.'); 489 String linkRegionName = name.substring(0, separatorIndex); 490 String tableSubstr = name.substring(separatorIndex + 1).replace('=', TableName.NAMESPACE_DELIM); 491 TableName linkTableName = TableName.valueOf(tableSubstr); 492 return new Pair<>(linkTableName, linkRegionName); 493 } 494 495 /** 496 * Get the full path of the HFile referenced by the back reference 497 * @param conf {@link Configuration} to read for the archive directory name 498 * @param linkRefPath Link Back Reference path 499 * @return full path of the referenced hfile 500 * @throws IOException on unexpected error. 501 */ 502 public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath) 503 throws IOException { 504 return getHFileFromBackReference(CommonFSUtils.getRootDir(conf), linkRefPath); 505 } 506 507}