001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.io; 020 021import java.io.IOException; 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024import org.apache.hadoop.conf.Configuration; 025import org.apache.hadoop.fs.FileSystem; 026import org.apache.hadoop.fs.Path; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.TableName; 029import org.apache.hadoop.hbase.client.RegionInfo; 030import org.apache.hadoop.hbase.client.RegionInfoBuilder; 031import org.apache.hadoop.hbase.mob.MobConstants; 032import org.apache.hadoop.hbase.regionserver.HRegion; 033import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 034import org.apache.hadoop.hbase.util.CommonFSUtils; 035import org.apache.hadoop.hbase.util.HFileArchiveUtil; 036import org.apache.hadoop.hbase.util.Pair; 037import org.apache.yetus.audience.InterfaceAudience; 038import org.slf4j.Logger; 039import org.slf4j.LoggerFactory; 040 041/** 042 * HFileLink describes a link to an hfile. 043 * 044 * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive) 045 * HFileLink allows to access the referenced hfile regardless of the location where it is. 046 * 047 * <p>Searches for hfiles in the following order and locations: 048 * <ul> 049 * <li>/hbase/table/region/cf/hfile</li> 050 * <li>/hbase/.archive/table/region/cf/hfile</li> 051 * </ul> 052 * 053 * The link checks first in the original path if it is not present 054 * it fallbacks to the archived path. 055 */ 056@InterfaceAudience.Private 057@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_DOESNT_OVERRIDE_EQUALS", 058 justification="To be fixed but warning suppressed for now") 059public class HFileLink extends FileLink { 060 private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class); 061 062 /** 063 * A non-capture group, for HFileLink, so that this can be embedded. 064 * The HFileLink describe a link to an hfile in a different table/region 065 * and the name is in the form: table=region-hfile. 066 * <p> 067 * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid 068 * character for the table name. 069 * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name. 070 * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) 071 * and the bulk loaded (_SeqId_[0-9]+_) hfiles. 072 * 073 * <p>Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name 074 * and '4567' is region name and 'abcd' is filename. 075 */ 076 public static final String LINK_NAME_REGEX = 077 String.format("(?:(?:%s=)?)%s=%s-%s", 078 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 079 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX); 080 081 /** Define the HFile Link name parser in the form of: table=region-hfile */ 082 //made package private for testing 083 static final Pattern LINK_NAME_PATTERN = 084 Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$", 085 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 086 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX)); 087 088 /** 089 * The pattern should be used for hfile and reference links 090 * that can be found in /hbase/table/region/family/ 091 */ 092 private static final Pattern REF_OR_HFILE_LINK_PATTERN = 093 Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$", 094 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 095 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX)); 096 097 private final Path archivePath; 098 private final Path originPath; 099 private final Path mobPath; 100 private final Path tempPath; 101 102 /** 103 * Dead simple hfile link constructor 104 */ 105 public HFileLink(final Path originPath, final Path tempPath, final Path mobPath, 106 final Path archivePath) { 107 this.tempPath = tempPath; 108 this.originPath = originPath; 109 this.mobPath = mobPath; 110 this.archivePath = archivePath; 111 setLocations(originPath, tempPath, mobPath, archivePath); 112 } 113 114 115 /** 116 * @param conf {@link Configuration} from which to extract specific archive locations 117 * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile) 118 * @throws IOException on unexpected error. 119 */ 120 public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern) 121 throws IOException { 122 return buildFromHFileLinkPattern(CommonFSUtils.getRootDir(conf), 123 HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern); 124 } 125 126 127 128 /** 129 * @param rootDir Path to the root directory where hbase files are stored 130 * @param archiveDir Path to the hbase archive directory 131 * @param hFileLinkPattern The path of the HFile Link. 132 */ 133 public final static HFileLink buildFromHFileLinkPattern(final Path rootDir, 134 final Path archiveDir, 135 final Path hFileLinkPattern) { 136 Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern); 137 Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath); 138 Path originPath = new Path(rootDir, hfilePath); 139 Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath); 140 Path archivePath = new Path(archiveDir, hfilePath); 141 return new HFileLink(originPath, tempPath, mobPath, archivePath); 142 } 143 144 /** 145 * Create an HFileLink relative path for the table/region/family/hfile location 146 * @param table Table name 147 * @param region Region Name 148 * @param family Family Name 149 * @param hfile HFile Name 150 * @return the relative Path to open the specified table/region/family/hfile link 151 */ 152 public static Path createPath(final TableName table, final String region, 153 final String family, final String hfile) { 154 if (HFileLink.isHFileLink(hfile)) { 155 return new Path(family, hfile); 156 } 157 return new Path(family, HFileLink.createHFileLinkName(table, region, hfile)); 158 } 159 160 /** 161 * Create an HFileLink instance from table/region/family/hfile location 162 * @param conf {@link Configuration} from which to extract specific archive locations 163 * @param table Table name 164 * @param region Region Name 165 * @param family Family Name 166 * @param hfile HFile Name 167 * @return Link to the file with the specified table/region/family/hfile location 168 * @throws IOException on unexpected error. 169 */ 170 public static HFileLink build(final Configuration conf, final TableName table, 171 final String region, final String family, final String hfile) 172 throws IOException { 173 return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile)); 174 } 175 176 /** 177 * @return the origin path of the hfile. 178 */ 179 public Path getOriginPath() { 180 return this.originPath; 181 } 182 183 /** 184 * @return the path of the archived hfile. 185 */ 186 public Path getArchivePath() { 187 return this.archivePath; 188 } 189 190 /** 191 * @return the path of the mob hfiles. 192 */ 193 public Path getMobPath() { 194 return this.mobPath; 195 } 196 197 /** 198 * @param path Path to check. 199 * @return True if the path is a HFileLink. 200 */ 201 public static boolean isHFileLink(final Path path) { 202 return isHFileLink(path.getName()); 203 } 204 205 206 /** 207 * @param fileName File name to check. 208 * @return True if the path is a HFileLink. 209 */ 210 public static boolean isHFileLink(String fileName) { 211 Matcher m = LINK_NAME_PATTERN.matcher(fileName); 212 if (!m.matches()) { 213 return false; 214 } 215 return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null; 216 } 217 218 /** 219 * Convert a HFileLink path to a table relative path. 220 * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd 221 * becomes: /hbase/testtb/4567/cf/abcd 222 * 223 * @param path HFileLink path 224 * @return Relative table path 225 * @throws IOException on unexpected error. 226 */ 227 private static Path getHFileLinkPatternRelativePath(final Path path) { 228 // table=region-hfile 229 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName()); 230 if (!m.matches()) { 231 throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!"); 232 } 233 234 // Convert the HFileLink name into a real table/region/cf/hfile path. 235 TableName tableName = TableName.valueOf(m.group(1), m.group(2)); 236 String regionName = m.group(3); 237 String hfileName = m.group(4); 238 String familyName = path.getParent().getName(); 239 Path tableDir = CommonFSUtils.getTableDir(new Path("./"), tableName); 240 return new Path(tableDir, new Path(regionName, new Path(familyName, 241 hfileName))); 242 } 243 244 /** 245 * Get the HFile name of the referenced link 246 * 247 * @param fileName HFileLink file name 248 * @return the name of the referenced HFile 249 */ 250 public static String getReferencedHFileName(final String fileName) { 251 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 252 if (!m.matches()) { 253 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 254 } 255 return(m.group(4)); 256 } 257 258 /** 259 * Get the Region name of the referenced link 260 * 261 * @param fileName HFileLink file name 262 * @return the name of the referenced Region 263 */ 264 public static String getReferencedRegionName(final String fileName) { 265 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 266 if (!m.matches()) { 267 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 268 } 269 return(m.group(3)); 270 } 271 272 /** 273 * Get the Table name of the referenced link 274 * 275 * @param fileName HFileLink file name 276 * @return the name of the referenced Table 277 */ 278 public static TableName getReferencedTableName(final String fileName) { 279 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 280 if (!m.matches()) { 281 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 282 } 283 return(TableName.valueOf(m.group(1), m.group(2))); 284 } 285 286 /** 287 * Create a new HFileLink name 288 * 289 * @param hfileRegionInfo - Linked HFile Region Info 290 * @param hfileName - Linked HFile name 291 * @return file name of the HFile Link 292 */ 293 public static String createHFileLinkName(final RegionInfo hfileRegionInfo, 294 final String hfileName) { 295 return createHFileLinkName(hfileRegionInfo.getTable(), 296 hfileRegionInfo.getEncodedName(), hfileName); 297 } 298 299 /** 300 * Create a new HFileLink name 301 * 302 * @param tableName - Linked HFile table name 303 * @param regionName - Linked HFile region name 304 * @param hfileName - Linked HFile name 305 * @return file name of the HFile Link 306 */ 307 public static String createHFileLinkName(final TableName tableName, 308 final String regionName, final String hfileName) { 309 String s = String.format("%s=%s-%s", 310 tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='), 311 regionName, hfileName); 312 return s; 313 } 314 315 /** 316 * Create a new HFileLink 317 * 318 * <p>It also adds a back-reference to the hfile back-reference directory 319 * to simplify the reference-count and the cleaning process. 320 * 321 * @param conf {@link Configuration} to read for the archive directory name 322 * @param fs {@link FileSystem} on which to write the HFileLink 323 * @param dstFamilyPath - Destination path (table/region/cf/) 324 * @param hfileRegionInfo - Linked HFile Region Info 325 * @param hfileName - Linked HFile name 326 * @return true if the file is created, otherwise the file exists. 327 * @throws IOException on file or parent directory creation failure 328 */ 329 public static boolean create(final Configuration conf, final FileSystem fs, 330 final Path dstFamilyPath, final RegionInfo hfileRegionInfo, 331 final String hfileName) throws IOException { 332 return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true); 333 } 334 335 /** 336 * Create a new HFileLink 337 * 338 * <p>It also adds a back-reference to the hfile back-reference directory 339 * to simplify the reference-count and the cleaning process. 340 * 341 * @param conf {@link Configuration} to read for the archive directory name 342 * @param fs {@link FileSystem} on which to write the HFileLink 343 * @param dstFamilyPath - Destination path (table/region/cf/) 344 * @param hfileRegionInfo - Linked HFile Region Info 345 * @param hfileName - Linked HFile name 346 * @param createBackRef - Whether back reference should be created. Defaults to true. 347 * @return true if the file is created, otherwise the file exists. 348 * @throws IOException on file or parent directory creation failure 349 */ 350 public static boolean create(final Configuration conf, final FileSystem fs, 351 final Path dstFamilyPath, final RegionInfo hfileRegionInfo, 352 final String hfileName, final boolean createBackRef) throws IOException { 353 TableName linkedTable = hfileRegionInfo.getTable(); 354 String linkedRegion = hfileRegionInfo.getEncodedName(); 355 return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef); 356 } 357 358 /** 359 * Create a new HFileLink 360 * 361 * <p>It also adds a back-reference to the hfile back-reference directory 362 * to simplify the reference-count and the cleaning process. 363 * 364 * @param conf {@link Configuration} to read for the archive directory name 365 * @param fs {@link FileSystem} on which to write the HFileLink 366 * @param dstFamilyPath - Destination path (table/region/cf/) 367 * @param linkedTable - Linked Table Name 368 * @param linkedRegion - Linked Region Name 369 * @param hfileName - Linked HFile name 370 * @return true if the file is created, otherwise the file exists. 371 * @throws IOException on file or parent directory creation failure 372 */ 373 public static boolean create(final Configuration conf, final FileSystem fs, 374 final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, 375 final String hfileName) throws IOException { 376 return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true); 377 } 378 379 /** 380 * Create a new HFileLink 381 * 382 * <p>It also adds a back-reference to the hfile back-reference directory 383 * to simplify the reference-count and the cleaning process. 384 * 385 * @param conf {@link Configuration} to read for the archive directory name 386 * @param fs {@link FileSystem} on which to write the HFileLink 387 * @param dstFamilyPath - Destination path (table/region/cf/) 388 * @param linkedTable - Linked Table Name 389 * @param linkedRegion - Linked Region Name 390 * @param hfileName - Linked HFile name 391 * @param createBackRef - Whether back reference should be created. Defaults to true. 392 * @return true if the file is created, otherwise the file exists. 393 * @throws IOException on file or parent directory creation failure 394 */ 395 public static boolean create(final Configuration conf, final FileSystem fs, 396 final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, 397 final String hfileName, final boolean createBackRef) throws IOException { 398 String familyName = dstFamilyPath.getName(); 399 String regionName = dstFamilyPath.getParent().getName(); 400 String tableName = CommonFSUtils.getTableName(dstFamilyPath.getParent().getParent()) 401 .getNameAsString(); 402 403 String name = createHFileLinkName(linkedTable, linkedRegion, hfileName); 404 String refName = createBackReferenceName(tableName, regionName); 405 406 // Make sure the destination directory exists 407 fs.mkdirs(dstFamilyPath); 408 409 // Make sure the FileLink reference directory exists 410 Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf, 411 linkedTable, linkedRegion, familyName); 412 Path backRefPath = null; 413 if (createBackRef) { 414 Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName); 415 fs.mkdirs(backRefssDir); 416 417 // Create the reference for the link 418 backRefPath = new Path(backRefssDir, refName); 419 fs.createNewFile(backRefPath); 420 } 421 try { 422 // Create the link 423 return fs.createNewFile(new Path(dstFamilyPath, name)); 424 } catch (IOException e) { 425 LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e); 426 // Revert the reference if the link creation failed 427 if (createBackRef) { 428 fs.delete(backRefPath, false); 429 } 430 throw e; 431 } 432 } 433 434 /** 435 * Create a new HFileLink starting from a hfileLink name 436 * 437 * <p>It also adds a back-reference to the hfile back-reference directory 438 * to simplify the reference-count and the cleaning process. 439 * 440 * @param conf {@link Configuration} to read for the archive directory name 441 * @param fs {@link FileSystem} on which to write the HFileLink 442 * @param dstFamilyPath - Destination path (table/region/cf/) 443 * @param hfileLinkName - HFileLink name (it contains hfile-region-table) 444 * @return true if the file is created, otherwise the file exists. 445 * @throws IOException on file or parent directory creation failure 446 */ 447 public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs, 448 final Path dstFamilyPath, final String hfileLinkName) 449 throws IOException { 450 return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true); 451 } 452 453 /** 454 * Create a new HFileLink starting from a hfileLink name 455 * 456 * <p>It also adds a back-reference to the hfile back-reference directory 457 * to simplify the reference-count and the cleaning process. 458 * 459 * @param conf {@link Configuration} to read for the archive directory name 460 * @param fs {@link FileSystem} on which to write the HFileLink 461 * @param dstFamilyPath - Destination path (table/region/cf/) 462 * @param hfileLinkName - HFileLink name (it contains hfile-region-table) 463 * @param createBackRef - Whether back reference should be created. Defaults to true. 464 * @return true if the file is created, otherwise the file exists. 465 * @throws IOException on file or parent directory creation failure 466 */ 467 public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs, 468 final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef) 469 throws IOException { 470 Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName); 471 if (!m.matches()) { 472 throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!"); 473 } 474 return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)), 475 m.group(3), m.group(4), createBackRef); 476 } 477 478 /** 479 * Create the back reference name 480 */ 481 //package-private for testing 482 static String createBackReferenceName(final String tableNameStr, 483 final String regionName) { 484 485 return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '='); 486 } 487 488 /** 489 * Get the full path of the HFile referenced by the back reference 490 * 491 * @param rootDir root hbase directory 492 * @param linkRefPath Link Back Reference path 493 * @return full path of the referenced hfile 494 */ 495 public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) { 496 Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName()); 497 TableName linkTableName = p.getFirst(); 498 String linkRegionName = p.getSecond(); 499 500 String hfileName = getBackReferenceFileName(linkRefPath.getParent()); 501 Path familyPath = linkRefPath.getParent().getParent(); 502 Path regionPath = familyPath.getParent(); 503 Path tablePath = regionPath.getParent(); 504 505 String linkName = createHFileLinkName(CommonFSUtils.getTableName(tablePath), 506 regionPath.getName(), hfileName); 507 Path linkTableDir = CommonFSUtils.getTableDir(rootDir, linkTableName); 508 Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName); 509 return new Path(new Path(regionDir, familyPath.getName()), linkName); 510 } 511 512 public static Pair<TableName, String> parseBackReferenceName(String name) { 513 int separatorIndex = name.indexOf('.'); 514 String linkRegionName = name.substring(0, separatorIndex); 515 String tableSubstr = name.substring(separatorIndex + 1) 516 .replace('=', TableName.NAMESPACE_DELIM); 517 TableName linkTableName = TableName.valueOf(tableSubstr); 518 return new Pair<>(linkTableName, linkRegionName); 519 } 520 521 /** 522 * Get the full path of the HFile referenced by the back reference 523 * 524 * @param conf {@link Configuration} to read for the archive directory name 525 * @param linkRefPath Link Back Reference path 526 * @return full path of the referenced hfile 527 * @throws IOException on unexpected error. 528 */ 529 public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath) 530 throws IOException { 531 return getHFileFromBackReference(CommonFSUtils.getRootDir(conf), linkRefPath); 532 } 533 534}