001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.io; 020 021import java.io.IOException; 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.FileSystem; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.HConstants; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.client.RegionInfo; 031import org.apache.hadoop.hbase.client.RegionInfoBuilder; 032import org.apache.hadoop.hbase.mob.MobConstants; 033import org.apache.hadoop.hbase.regionserver.HRegion; 034import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 035import org.apache.hadoop.hbase.util.FSUtils; 036import org.apache.hadoop.hbase.util.HFileArchiveUtil; 037import org.apache.hadoop.hbase.util.Pair; 038import org.apache.yetus.audience.InterfaceAudience; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042/** 043 * HFileLink describes a link to an hfile. 044 * 045 * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive) 046 * HFileLink allows to access the referenced hfile regardless of the location where it is. 047 * 048 * <p>Searches for hfiles in the following order and locations: 049 * <ul> 050 * <li>/hbase/table/region/cf/hfile</li> 051 * <li>/hbase/.archive/table/region/cf/hfile</li> 052 * </ul> 053 * 054 * The link checks first in the original path if it is not present 055 * it fallbacks to the archived path. 056 */ 057@InterfaceAudience.Private 058@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_DOESNT_OVERRIDE_EQUALS", 059 justification="To be fixed but warning suppressed for now") 060public class HFileLink extends FileLink { 061 private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class); 062 063 /** 064 * A non-capture group, for HFileLink, so that this can be embedded. 065 * The HFileLink describe a link to an hfile in a different table/region 066 * and the name is in the form: table=region-hfile. 067 * <p> 068 * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid 069 * character for the table name. 070 * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name. 071 * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) 072 * and the bulk loaded (_SeqId_[0-9]+_) hfiles. 073 * 074 * <p>Here is an example name: /hbase/test/0123/cf/testtb=4567-abcd where 'testtb' is table name 075 * and '4567' is region name and 'abcd' is filename. 076 */ 077 public static final String LINK_NAME_REGEX = 078 String.format("(?:(?:%s=)?)%s=%s-%s", 079 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 080 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX); 081 082 /** Define the HFile Link name parser in the form of: table=region-hfile */ 083 //made package private for testing 084 static final Pattern LINK_NAME_PATTERN = 085 Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$", 086 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 087 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX)); 088 089 /** 090 * The pattern should be used for hfile and reference links 091 * that can be found in /hbase/table/region/family/ 092 */ 093 private static final Pattern REF_OR_HFILE_LINK_PATTERN = 094 Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$", 095 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 096 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX)); 097 098 private final Path archivePath; 099 private final Path originPath; 100 private final Path mobPath; 101 private final Path tempPath; 102 103 /** 104 * Dead simple hfile link constructor 105 */ 106 public HFileLink(final Path originPath, final Path tempPath, final Path mobPath, 107 final Path archivePath) { 108 this.tempPath = tempPath; 109 this.originPath = originPath; 110 this.mobPath = mobPath; 111 this.archivePath = archivePath; 112 setLocations(originPath, tempPath, mobPath, archivePath); 113 } 114 115 116 /** 117 * @param conf {@link Configuration} from which to extract specific archive locations 118 * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile) 119 * @throws IOException on unexpected error. 120 */ 121 public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern) 122 throws IOException { 123 return buildFromHFileLinkPattern(FSUtils.getRootDir(conf), 124 HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern); 125 } 126 127 128 129 /** 130 * @param rootDir Path to the root directory where hbase files are stored 131 * @param archiveDir Path to the hbase archive directory 132 * @param hFileLinkPattern The path of the HFile Link. 133 */ 134 public final static HFileLink buildFromHFileLinkPattern(final Path rootDir, 135 final Path archiveDir, 136 final Path hFileLinkPattern) { 137 Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern); 138 Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath); 139 Path originPath = new Path(rootDir, hfilePath); 140 Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath); 141 Path archivePath = new Path(archiveDir, hfilePath); 142 return new HFileLink(originPath, tempPath, mobPath, archivePath); 143 } 144 145 /** 146 * Create an HFileLink relative path for the table/region/family/hfile location 147 * @param table Table name 148 * @param region Region Name 149 * @param family Family Name 150 * @param hfile HFile Name 151 * @return the relative Path to open the specified table/region/family/hfile link 152 */ 153 public static Path createPath(final TableName table, final String region, 154 final String family, final String hfile) { 155 if (HFileLink.isHFileLink(hfile)) { 156 return new Path(family, hfile); 157 } 158 return new Path(family, HFileLink.createHFileLinkName(table, region, hfile)); 159 } 160 161 /** 162 * Create an HFileLink instance from table/region/family/hfile location 163 * @param conf {@link Configuration} from which to extract specific archive locations 164 * @param table Table name 165 * @param region Region Name 166 * @param family Family Name 167 * @param hfile HFile Name 168 * @return Link to the file with the specified table/region/family/hfile location 169 * @throws IOException on unexpected error. 170 */ 171 public static HFileLink build(final Configuration conf, final TableName table, 172 final String region, final String family, final String hfile) 173 throws IOException { 174 return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile)); 175 } 176 177 /** 178 * @return the origin path of the hfile. 179 */ 180 public Path getOriginPath() { 181 return this.originPath; 182 } 183 184 /** 185 * @return the path of the archived hfile. 186 */ 187 public Path getArchivePath() { 188 return this.archivePath; 189 } 190 191 /** 192 * @return the path of the mob hfiles. 193 */ 194 public Path getMobPath() { 195 return this.mobPath; 196 } 197 198 /** 199 * @param path Path to check. 200 * @return True if the path is a HFileLink. 201 */ 202 public static boolean isHFileLink(final Path path) { 203 return isHFileLink(path.getName()); 204 } 205 206 207 /** 208 * @param fileName File name to check. 209 * @return True if the path is a HFileLink. 210 */ 211 public static boolean isHFileLink(String fileName) { 212 Matcher m = LINK_NAME_PATTERN.matcher(fileName); 213 if (!m.matches()) return false; 214 return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null; 215 } 216 217 /** 218 * Convert a HFileLink path to a table relative path. 219 * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd 220 * becomes: /hbase/testtb/4567/cf/abcd 221 * 222 * @param path HFileLink path 223 * @return Relative table path 224 * @throws IOException on unexpected error. 225 */ 226 private static Path getHFileLinkPatternRelativePath(final Path path) { 227 // table=region-hfile 228 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName()); 229 if (!m.matches()) { 230 throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!"); 231 } 232 233 // Convert the HFileLink name into a real table/region/cf/hfile path. 234 TableName tableName = TableName.valueOf(m.group(1), m.group(2)); 235 String regionName = m.group(3); 236 String hfileName = m.group(4); 237 String familyName = path.getParent().getName(); 238 Path tableDir = FSUtils.getTableDir(new Path("./"), tableName); 239 return new Path(tableDir, new Path(regionName, new Path(familyName, 240 hfileName))); 241 } 242 243 /** 244 * Get the HFile name of the referenced link 245 * 246 * @param fileName HFileLink file name 247 * @return the name of the referenced HFile 248 */ 249 public static String getReferencedHFileName(final String fileName) { 250 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 251 if (!m.matches()) { 252 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 253 } 254 return(m.group(4)); 255 } 256 257 /** 258 * Get the Region name of the referenced link 259 * 260 * @param fileName HFileLink file name 261 * @return the name of the referenced Region 262 */ 263 public static String getReferencedRegionName(final String fileName) { 264 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 265 if (!m.matches()) { 266 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 267 } 268 return(m.group(3)); 269 } 270 271 /** 272 * Get the Table name of the referenced link 273 * 274 * @param fileName HFileLink file name 275 * @return the name of the referenced Table 276 */ 277 public static TableName getReferencedTableName(final String fileName) { 278 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 279 if (!m.matches()) { 280 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 281 } 282 return(TableName.valueOf(m.group(1), m.group(2))); 283 } 284 285 /** 286 * Create a new HFileLink name 287 * 288 * @param hfileRegionInfo - Linked HFile Region Info 289 * @param hfileName - Linked HFile name 290 * @return file name of the HFile Link 291 */ 292 public static String createHFileLinkName(final RegionInfo hfileRegionInfo, 293 final String hfileName) { 294 return createHFileLinkName(hfileRegionInfo.getTable(), 295 hfileRegionInfo.getEncodedName(), hfileName); 296 } 297 298 /** 299 * Create a new HFileLink name 300 * 301 * @param tableName - Linked HFile table name 302 * @param regionName - Linked HFile region name 303 * @param hfileName - Linked HFile name 304 * @return file name of the HFile Link 305 */ 306 public static String createHFileLinkName(final TableName tableName, 307 final String regionName, final String hfileName) { 308 String s = String.format("%s=%s-%s", 309 tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='), 310 regionName, hfileName); 311 return s; 312 } 313 314 /** 315 * Create a new HFileLink 316 * 317 * <p>It also adds a back-reference to the hfile back-reference directory 318 * to simplify the reference-count and the cleaning process. 319 * 320 * @param conf {@link Configuration} to read for the archive directory name 321 * @param fs {@link FileSystem} on which to write the HFileLink 322 * @param dstFamilyPath - Destination path (table/region/cf/) 323 * @param hfileRegionInfo - Linked HFile Region Info 324 * @param hfileName - Linked HFile name 325 * @return true if the file is created, otherwise the file exists. 326 * @throws IOException on file or parent directory creation failure 327 */ 328 public static boolean create(final Configuration conf, final FileSystem fs, 329 final Path dstFamilyPath, final RegionInfo hfileRegionInfo, 330 final String hfileName) throws IOException { 331 return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true); 332 } 333 334 /** 335 * Create a new HFileLink 336 * 337 * <p>It also adds a back-reference to the hfile back-reference directory 338 * to simplify the reference-count and the cleaning process. 339 * 340 * @param conf {@link Configuration} to read for the archive directory name 341 * @param fs {@link FileSystem} on which to write the HFileLink 342 * @param dstFamilyPath - Destination path (table/region/cf/) 343 * @param hfileRegionInfo - Linked HFile Region Info 344 * @param hfileName - Linked HFile name 345 * @param createBackRef - Whether back reference should be created. Defaults to true. 346 * @return true if the file is created, otherwise the file exists. 347 * @throws IOException on file or parent directory creation failure 348 */ 349 public static boolean create(final Configuration conf, final FileSystem fs, 350 final Path dstFamilyPath, final RegionInfo hfileRegionInfo, 351 final String hfileName, final boolean createBackRef) throws IOException { 352 TableName linkedTable = hfileRegionInfo.getTable(); 353 String linkedRegion = hfileRegionInfo.getEncodedName(); 354 return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef); 355 } 356 357 /** 358 * Create a new HFileLink 359 * 360 * <p>It also adds a back-reference to the hfile back-reference directory 361 * to simplify the reference-count and the cleaning process. 362 * 363 * @param conf {@link Configuration} to read for the archive directory name 364 * @param fs {@link FileSystem} on which to write the HFileLink 365 * @param dstFamilyPath - Destination path (table/region/cf/) 366 * @param linkedTable - Linked Table Name 367 * @param linkedRegion - Linked Region Name 368 * @param hfileName - Linked HFile name 369 * @return true if the file is created, otherwise the file exists. 370 * @throws IOException on file or parent directory creation failure 371 */ 372 public static boolean create(final Configuration conf, final FileSystem fs, 373 final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, 374 final String hfileName) throws IOException { 375 return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true); 376 } 377 378 /** 379 * Create a new HFileLink 380 * 381 * <p>It also adds a back-reference to the hfile back-reference directory 382 * to simplify the reference-count and the cleaning process. 383 * 384 * @param conf {@link Configuration} to read for the archive directory name 385 * @param fs {@link FileSystem} on which to write the HFileLink 386 * @param dstFamilyPath - Destination path (table/region/cf/) 387 * @param linkedTable - Linked Table Name 388 * @param linkedRegion - Linked Region Name 389 * @param hfileName - Linked HFile name 390 * @param createBackRef - Whether back reference should be created. Defaults to true. 391 * @return true if the file is created, otherwise the file exists. 392 * @throws IOException on file or parent directory creation failure 393 */ 394 public static boolean create(final Configuration conf, final FileSystem fs, 395 final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, 396 final String hfileName, final boolean createBackRef) throws IOException { 397 String familyName = dstFamilyPath.getName(); 398 String regionName = dstFamilyPath.getParent().getName(); 399 String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent()) 400 .getNameAsString(); 401 402 String name = createHFileLinkName(linkedTable, linkedRegion, hfileName); 403 String refName = createBackReferenceName(tableName, regionName); 404 405 // Make sure the destination directory exists 406 fs.mkdirs(dstFamilyPath); 407 408 // Make sure the FileLink reference directory exists 409 Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf, 410 linkedTable, linkedRegion, familyName); 411 Path backRefPath = null; 412 if (createBackRef) { 413 Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName); 414 fs.mkdirs(backRefssDir); 415 416 // Create the reference for the link 417 backRefPath = new Path(backRefssDir, refName); 418 fs.createNewFile(backRefPath); 419 } 420 try { 421 // Create the link 422 return fs.createNewFile(new Path(dstFamilyPath, name)); 423 } catch (IOException e) { 424 LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e); 425 // Revert the reference if the link creation failed 426 if (createBackRef) { 427 fs.delete(backRefPath, false); 428 } 429 throw e; 430 } 431 } 432 433 /** 434 * Create a new HFileLink starting from a hfileLink name 435 * 436 * <p>It also adds a back-reference to the hfile back-reference directory 437 * to simplify the reference-count and the cleaning process. 438 * 439 * @param conf {@link Configuration} to read for the archive directory name 440 * @param fs {@link FileSystem} on which to write the HFileLink 441 * @param dstFamilyPath - Destination path (table/region/cf/) 442 * @param hfileLinkName - HFileLink name (it contains hfile-region-table) 443 * @return true if the file is created, otherwise the file exists. 444 * @throws IOException on file or parent directory creation failure 445 */ 446 public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs, 447 final Path dstFamilyPath, final String hfileLinkName) 448 throws IOException { 449 return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true); 450 } 451 452 /** 453 * Create a new HFileLink starting from a hfileLink name 454 * 455 * <p>It also adds a back-reference to the hfile back-reference directory 456 * to simplify the reference-count and the cleaning process. 457 * 458 * @param conf {@link Configuration} to read for the archive directory name 459 * @param fs {@link FileSystem} on which to write the HFileLink 460 * @param dstFamilyPath - Destination path (table/region/cf/) 461 * @param hfileLinkName - HFileLink name (it contains hfile-region-table) 462 * @param createBackRef - Whether back reference should be created. Defaults to true. 463 * @return true if the file is created, otherwise the file exists. 464 * @throws IOException on file or parent directory creation failure 465 */ 466 public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs, 467 final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef) 468 throws IOException { 469 Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName); 470 if (!m.matches()) { 471 throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!"); 472 } 473 return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)), 474 m.group(3), m.group(4), createBackRef); 475 } 476 477 /** 478 * Create the back reference name 479 */ 480 //package-private for testing 481 static String createBackReferenceName(final String tableNameStr, 482 final String regionName) { 483 484 return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '='); 485 } 486 487 /** 488 * Get the full path of the HFile referenced by the back reference 489 * 490 * @param rootDir root hbase directory 491 * @param linkRefPath Link Back Reference path 492 * @return full path of the referenced hfile 493 */ 494 public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) { 495 Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName()); 496 TableName linkTableName = p.getFirst(); 497 String linkRegionName = p.getSecond(); 498 499 String hfileName = getBackReferenceFileName(linkRefPath.getParent()); 500 Path familyPath = linkRefPath.getParent().getParent(); 501 Path regionPath = familyPath.getParent(); 502 Path tablePath = regionPath.getParent(); 503 504 String linkName = createHFileLinkName(FSUtils.getTableName(tablePath), 505 regionPath.getName(), hfileName); 506 Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName); 507 Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName); 508 return new Path(new Path(regionDir, familyPath.getName()), linkName); 509 } 510 511 public static Pair<TableName, String> parseBackReferenceName(String name) { 512 int separatorIndex = name.indexOf('.'); 513 String linkRegionName = name.substring(0, separatorIndex); 514 String tableSubstr = name.substring(separatorIndex + 1) 515 .replace('=', TableName.NAMESPACE_DELIM); 516 TableName linkTableName = TableName.valueOf(tableSubstr); 517 return new Pair<>(linkTableName, linkRegionName); 518 } 519 520 /** 521 * Get the full path of the HFile referenced by the back reference 522 * 523 * @param conf {@link Configuration} to read for the archive directory name 524 * @param linkRefPath Link Back Reference path 525 * @return full path of the referenced hfile 526 * @throws IOException on unexpected error. 527 */ 528 public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath) 529 throws IOException { 530 return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath); 531 } 532 533}