001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.io; 020 021import java.io.IOException; 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.FileSystem; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.HConstants; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.client.RegionInfo; 031import org.apache.hadoop.hbase.client.RegionInfoBuilder; 032import org.apache.hadoop.hbase.mob.MobConstants; 033import org.apache.hadoop.hbase.regionserver.HRegion; 034import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 035import org.apache.hadoop.hbase.util.FSUtils; 036import org.apache.hadoop.hbase.util.HFileArchiveUtil; 037import org.apache.hadoop.hbase.util.Pair; 038import org.apache.yetus.audience.InterfaceAudience; 039import org.slf4j.Logger; 040import org.slf4j.LoggerFactory; 041 042/** 043 * HFileLink describes a link to an hfile. 044 * 045 * An hfile can be served from a region or from the hfile archive directory (/hbase/.archive) 046 * HFileLink allows to access the referenced hfile regardless of the location where it is. 047 * 048 * <p>Searches for hfiles in the following order and locations: 049 * <ul> 050 * <li>/hbase/table/region/cf/hfile</li> 051 * <li>/hbase/.archive/table/region/cf/hfile</li> 052 * </ul> 053 * 054 * The link checks first in the original path if it is not present 055 * it fallbacks to the archived path. 056 */ 057@InterfaceAudience.Private 058@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="EQ_DOESNT_OVERRIDE_EQUALS", 059 justification="To be fixed but warning suppressed for now") 060public class HFileLink extends FileLink { 061 private static final Logger LOG = LoggerFactory.getLogger(HFileLink.class); 062 063 /** 064 * A non-capture group, for HFileLink, so that this can be embedded. 065 * The HFileLink describe a link to an hfile in a different table/region 066 * and the name is in the form: table=region-hfile. 067 * <p> 068 * Table name is ([\p{IsAlphabetic}\p{Digit}][\p{IsAlphabetic}\p{Digit}.-]*), so '=' is an invalid 069 * character for the table name. 070 * Region name is ([a-f0-9]+), so '-' is an invalid character for the region name. 071 * HFile is ([0-9a-f]+(?:_SeqId_[0-9]+_)?) covering the plain hfiles (uuid) 072 * and the bulk loaded (_SeqId_[0-9]+_) hfiles. 073 */ 074 public static final String LINK_NAME_REGEX = 075 String.format("(?:(?:%s=)?)%s=%s-%s", 076 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 077 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX); 078 079 /** Define the HFile Link name parser in the form of: table=region-hfile */ 080 //made package private for testing 081 static final Pattern LINK_NAME_PATTERN = 082 Pattern.compile(String.format("^(?:(%s)(?:\\=))?(%s)=(%s)-(%s)$", 083 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 084 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX, StoreFileInfo.HFILE_NAME_REGEX)); 085 086 /** 087 * The pattern should be used for hfile and reference links 088 * that can be found in /hbase/table/region/family/ 089 */ 090 private static final Pattern REF_OR_HFILE_LINK_PATTERN = 091 Pattern.compile(String.format("^(?:(%s)(?:=))?(%s)=(%s)-(.+)$", 092 TableName.VALID_NAMESPACE_REGEX, TableName.VALID_TABLE_QUALIFIER_REGEX, 093 RegionInfoBuilder.ENCODED_REGION_NAME_REGEX)); 094 095 private final Path archivePath; 096 private final Path originPath; 097 private final Path mobPath; 098 private final Path tempPath; 099 100 /** 101 * Dead simple hfile link constructor 102 */ 103 public HFileLink(final Path originPath, final Path tempPath, final Path mobPath, 104 final Path archivePath) { 105 this.tempPath = tempPath; 106 this.originPath = originPath; 107 this.mobPath = mobPath; 108 this.archivePath = archivePath; 109 setLocations(originPath, tempPath, mobPath, archivePath); 110 } 111 112 113 /** 114 * @param conf {@link Configuration} from which to extract specific archive locations 115 * @param hFileLinkPattern The path ending with a HFileLink pattern. (table=region-hfile) 116 * @throws IOException on unexpected error. 117 */ 118 public static final HFileLink buildFromHFileLinkPattern(Configuration conf, Path hFileLinkPattern) 119 throws IOException { 120 return buildFromHFileLinkPattern(FSUtils.getRootDir(conf), 121 HFileArchiveUtil.getArchivePath(conf), hFileLinkPattern); 122 } 123 124 125 126 /** 127 * @param rootDir Path to the root directory where hbase files are stored 128 * @param archiveDir Path to the hbase archive directory 129 * @param hFileLinkPattern The path of the HFile Link. 130 */ 131 public final static HFileLink buildFromHFileLinkPattern(final Path rootDir, 132 final Path archiveDir, 133 final Path hFileLinkPattern) { 134 Path hfilePath = getHFileLinkPatternRelativePath(hFileLinkPattern); 135 Path tempPath = new Path(new Path(rootDir, HConstants.HBASE_TEMP_DIRECTORY), hfilePath); 136 Path originPath = new Path(rootDir, hfilePath); 137 Path mobPath = new Path(new Path(rootDir, MobConstants.MOB_DIR_NAME), hfilePath); 138 Path archivePath = new Path(archiveDir, hfilePath); 139 return new HFileLink(originPath, tempPath, mobPath, archivePath); 140 } 141 142 /** 143 * Create an HFileLink relative path for the table/region/family/hfile location 144 * @param table Table name 145 * @param region Region Name 146 * @param family Family Name 147 * @param hfile HFile Name 148 * @return the relative Path to open the specified table/region/family/hfile link 149 */ 150 public static Path createPath(final TableName table, final String region, 151 final String family, final String hfile) { 152 if (HFileLink.isHFileLink(hfile)) { 153 return new Path(family, hfile); 154 } 155 return new Path(family, HFileLink.createHFileLinkName(table, region, hfile)); 156 } 157 158 /** 159 * Create an HFileLink instance from table/region/family/hfile location 160 * @param conf {@link Configuration} from which to extract specific archive locations 161 * @param table Table name 162 * @param region Region Name 163 * @param family Family Name 164 * @param hfile HFile Name 165 * @return Link to the file with the specified table/region/family/hfile location 166 * @throws IOException on unexpected error. 167 */ 168 public static HFileLink build(final Configuration conf, final TableName table, 169 final String region, final String family, final String hfile) 170 throws IOException { 171 return HFileLink.buildFromHFileLinkPattern(conf, createPath(table, region, family, hfile)); 172 } 173 174 /** 175 * @return the origin path of the hfile. 176 */ 177 public Path getOriginPath() { 178 return this.originPath; 179 } 180 181 /** 182 * @return the path of the archived hfile. 183 */ 184 public Path getArchivePath() { 185 return this.archivePath; 186 } 187 188 /** 189 * @return the path of the mob hfiles. 190 */ 191 public Path getMobPath() { 192 return this.mobPath; 193 } 194 195 /** 196 * @param path Path to check. 197 * @return True if the path is a HFileLink. 198 */ 199 public static boolean isHFileLink(final Path path) { 200 return isHFileLink(path.getName()); 201 } 202 203 204 /** 205 * @param fileName File name to check. 206 * @return True if the path is a HFileLink. 207 */ 208 public static boolean isHFileLink(String fileName) { 209 Matcher m = LINK_NAME_PATTERN.matcher(fileName); 210 if (!m.matches()) return false; 211 return m.groupCount() > 2 && m.group(4) != null && m.group(3) != null && m.group(2) != null; 212 } 213 214 /** 215 * Convert a HFileLink path to a table relative path. 216 * e.g. the link: /hbase/test/0123/cf/testtb=4567-abcd 217 * becomes: /hbase/testtb/4567/cf/abcd 218 * 219 * @param path HFileLink path 220 * @return Relative table path 221 * @throws IOException on unexpected error. 222 */ 223 private static Path getHFileLinkPatternRelativePath(final Path path) { 224 // table=region-hfile 225 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(path.getName()); 226 if (!m.matches()) { 227 throw new IllegalArgumentException(path.getName() + " is not a valid HFileLink pattern!"); 228 } 229 230 // Convert the HFileLink name into a real table/region/cf/hfile path. 231 TableName tableName = TableName.valueOf(m.group(1), m.group(2)); 232 String regionName = m.group(3); 233 String hfileName = m.group(4); 234 String familyName = path.getParent().getName(); 235 Path tableDir = FSUtils.getTableDir(new Path("./"), tableName); 236 return new Path(tableDir, new Path(regionName, new Path(familyName, 237 hfileName))); 238 } 239 240 /** 241 * Get the HFile name of the referenced link 242 * 243 * @param fileName HFileLink file name 244 * @return the name of the referenced HFile 245 */ 246 public static String getReferencedHFileName(final String fileName) { 247 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 248 if (!m.matches()) { 249 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 250 } 251 return(m.group(4)); 252 } 253 254 /** 255 * Get the Region name of the referenced link 256 * 257 * @param fileName HFileLink file name 258 * @return the name of the referenced Region 259 */ 260 public static String getReferencedRegionName(final String fileName) { 261 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 262 if (!m.matches()) { 263 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 264 } 265 return(m.group(3)); 266 } 267 268 /** 269 * Get the Table name of the referenced link 270 * 271 * @param fileName HFileLink file name 272 * @return the name of the referenced Table 273 */ 274 public static TableName getReferencedTableName(final String fileName) { 275 Matcher m = REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 276 if (!m.matches()) { 277 throw new IllegalArgumentException(fileName + " is not a valid HFileLink name!"); 278 } 279 return(TableName.valueOf(m.group(1), m.group(2))); 280 } 281 282 /** 283 * Create a new HFileLink name 284 * 285 * @param hfileRegionInfo - Linked HFile Region Info 286 * @param hfileName - Linked HFile name 287 * @return file name of the HFile Link 288 */ 289 public static String createHFileLinkName(final RegionInfo hfileRegionInfo, 290 final String hfileName) { 291 return createHFileLinkName(hfileRegionInfo.getTable(), 292 hfileRegionInfo.getEncodedName(), hfileName); 293 } 294 295 /** 296 * Create a new HFileLink name 297 * 298 * @param tableName - Linked HFile table name 299 * @param regionName - Linked HFile region name 300 * @param hfileName - Linked HFile name 301 * @return file name of the HFile Link 302 */ 303 public static String createHFileLinkName(final TableName tableName, 304 final String regionName, final String hfileName) { 305 String s = String.format("%s=%s-%s", 306 tableName.getNameAsString().replace(TableName.NAMESPACE_DELIM, '='), 307 regionName, hfileName); 308 return s; 309 } 310 311 /** 312 * Create a new HFileLink 313 * 314 * <p>It also adds a back-reference to the hfile back-reference directory 315 * to simplify the reference-count and the cleaning process. 316 * 317 * @param conf {@link Configuration} to read for the archive directory name 318 * @param fs {@link FileSystem} on which to write the HFileLink 319 * @param dstFamilyPath - Destination path (table/region/cf/) 320 * @param hfileRegionInfo - Linked HFile Region Info 321 * @param hfileName - Linked HFile name 322 * @return true if the file is created, otherwise the file exists. 323 * @throws IOException on file or parent directory creation failure 324 */ 325 public static boolean create(final Configuration conf, final FileSystem fs, 326 final Path dstFamilyPath, final RegionInfo hfileRegionInfo, 327 final String hfileName) throws IOException { 328 return create(conf, fs, dstFamilyPath, hfileRegionInfo, hfileName, true); 329 } 330 331 /** 332 * Create a new HFileLink 333 * 334 * <p>It also adds a back-reference to the hfile back-reference directory 335 * to simplify the reference-count and the cleaning process. 336 * 337 * @param conf {@link Configuration} to read for the archive directory name 338 * @param fs {@link FileSystem} on which to write the HFileLink 339 * @param dstFamilyPath - Destination path (table/region/cf/) 340 * @param hfileRegionInfo - Linked HFile Region Info 341 * @param hfileName - Linked HFile name 342 * @param createBackRef - Whether back reference should be created. Defaults to true. 343 * @return true if the file is created, otherwise the file exists. 344 * @throws IOException on file or parent directory creation failure 345 */ 346 public static boolean create(final Configuration conf, final FileSystem fs, 347 final Path dstFamilyPath, final RegionInfo hfileRegionInfo, 348 final String hfileName, final boolean createBackRef) throws IOException { 349 TableName linkedTable = hfileRegionInfo.getTable(); 350 String linkedRegion = hfileRegionInfo.getEncodedName(); 351 return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, createBackRef); 352 } 353 354 /** 355 * Create a new HFileLink 356 * 357 * <p>It also adds a back-reference to the hfile back-reference directory 358 * to simplify the reference-count and the cleaning process. 359 * 360 * @param conf {@link Configuration} to read for the archive directory name 361 * @param fs {@link FileSystem} on which to write the HFileLink 362 * @param dstFamilyPath - Destination path (table/region/cf/) 363 * @param linkedTable - Linked Table Name 364 * @param linkedRegion - Linked Region Name 365 * @param hfileName - Linked HFile name 366 * @return true if the file is created, otherwise the file exists. 367 * @throws IOException on file or parent directory creation failure 368 */ 369 public static boolean create(final Configuration conf, final FileSystem fs, 370 final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, 371 final String hfileName) throws IOException { 372 return create(conf, fs, dstFamilyPath, linkedTable, linkedRegion, hfileName, true); 373 } 374 375 /** 376 * Create a new HFileLink 377 * 378 * <p>It also adds a back-reference to the hfile back-reference directory 379 * to simplify the reference-count and the cleaning process. 380 * 381 * @param conf {@link Configuration} to read for the archive directory name 382 * @param fs {@link FileSystem} on which to write the HFileLink 383 * @param dstFamilyPath - Destination path (table/region/cf/) 384 * @param linkedTable - Linked Table Name 385 * @param linkedRegion - Linked Region Name 386 * @param hfileName - Linked HFile name 387 * @param createBackRef - Whether back reference should be created. Defaults to true. 388 * @return true if the file is created, otherwise the file exists. 389 * @throws IOException on file or parent directory creation failure 390 */ 391 public static boolean create(final Configuration conf, final FileSystem fs, 392 final Path dstFamilyPath, final TableName linkedTable, final String linkedRegion, 393 final String hfileName, final boolean createBackRef) throws IOException { 394 String familyName = dstFamilyPath.getName(); 395 String regionName = dstFamilyPath.getParent().getName(); 396 String tableName = FSUtils.getTableName(dstFamilyPath.getParent().getParent()) 397 .getNameAsString(); 398 399 String name = createHFileLinkName(linkedTable, linkedRegion, hfileName); 400 String refName = createBackReferenceName(tableName, regionName); 401 402 // Make sure the destination directory exists 403 fs.mkdirs(dstFamilyPath); 404 405 // Make sure the FileLink reference directory exists 406 Path archiveStoreDir = HFileArchiveUtil.getStoreArchivePath(conf, 407 linkedTable, linkedRegion, familyName); 408 Path backRefPath = null; 409 if (createBackRef) { 410 Path backRefssDir = getBackReferencesDir(archiveStoreDir, hfileName); 411 fs.mkdirs(backRefssDir); 412 413 // Create the reference for the link 414 backRefPath = new Path(backRefssDir, refName); 415 fs.createNewFile(backRefPath); 416 } 417 try { 418 // Create the link 419 return fs.createNewFile(new Path(dstFamilyPath, name)); 420 } catch (IOException e) { 421 LOG.error("couldn't create the link=" + name + " for " + dstFamilyPath, e); 422 // Revert the reference if the link creation failed 423 if (createBackRef) { 424 fs.delete(backRefPath, false); 425 } 426 throw e; 427 } 428 } 429 430 /** 431 * Create a new HFileLink starting from a hfileLink name 432 * 433 * <p>It also adds a back-reference to the hfile back-reference directory 434 * to simplify the reference-count and the cleaning process. 435 * 436 * @param conf {@link Configuration} to read for the archive directory name 437 * @param fs {@link FileSystem} on which to write the HFileLink 438 * @param dstFamilyPath - Destination path (table/region/cf/) 439 * @param hfileLinkName - HFileLink name (it contains hfile-region-table) 440 * @return true if the file is created, otherwise the file exists. 441 * @throws IOException on file or parent directory creation failure 442 */ 443 public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs, 444 final Path dstFamilyPath, final String hfileLinkName) 445 throws IOException { 446 return createFromHFileLink(conf, fs, dstFamilyPath, hfileLinkName, true); 447 } 448 449 /** 450 * Create a new HFileLink starting from a hfileLink name 451 * 452 * <p>It also adds a back-reference to the hfile back-reference directory 453 * to simplify the reference-count and the cleaning process. 454 * 455 * @param conf {@link Configuration} to read for the archive directory name 456 * @param fs {@link FileSystem} on which to write the HFileLink 457 * @param dstFamilyPath - Destination path (table/region/cf/) 458 * @param hfileLinkName - HFileLink name (it contains hfile-region-table) 459 * @param createBackRef - Whether back reference should be created. Defaults to true. 460 * @return true if the file is created, otherwise the file exists. 461 * @throws IOException on file or parent directory creation failure 462 */ 463 public static boolean createFromHFileLink(final Configuration conf, final FileSystem fs, 464 final Path dstFamilyPath, final String hfileLinkName, final boolean createBackRef) 465 throws IOException { 466 Matcher m = LINK_NAME_PATTERN.matcher(hfileLinkName); 467 if (!m.matches()) { 468 throw new IllegalArgumentException(hfileLinkName + " is not a valid HFileLink name!"); 469 } 470 return create(conf, fs, dstFamilyPath, TableName.valueOf(m.group(1), m.group(2)), 471 m.group(3), m.group(4), createBackRef); 472 } 473 474 /** 475 * Create the back reference name 476 */ 477 //package-private for testing 478 static String createBackReferenceName(final String tableNameStr, 479 final String regionName) { 480 481 return regionName + "." + tableNameStr.replace(TableName.NAMESPACE_DELIM, '='); 482 } 483 484 /** 485 * Get the full path of the HFile referenced by the back reference 486 * 487 * @param rootDir root hbase directory 488 * @param linkRefPath Link Back Reference path 489 * @return full path of the referenced hfile 490 */ 491 public static Path getHFileFromBackReference(final Path rootDir, final Path linkRefPath) { 492 Pair<TableName, String> p = parseBackReferenceName(linkRefPath.getName()); 493 TableName linkTableName = p.getFirst(); 494 String linkRegionName = p.getSecond(); 495 496 String hfileName = getBackReferenceFileName(linkRefPath.getParent()); 497 Path familyPath = linkRefPath.getParent().getParent(); 498 Path regionPath = familyPath.getParent(); 499 Path tablePath = regionPath.getParent(); 500 501 String linkName = createHFileLinkName(FSUtils.getTableName(tablePath), 502 regionPath.getName(), hfileName); 503 Path linkTableDir = FSUtils.getTableDir(rootDir, linkTableName); 504 Path regionDir = HRegion.getRegionDir(linkTableDir, linkRegionName); 505 return new Path(new Path(regionDir, familyPath.getName()), linkName); 506 } 507 508 static Pair<TableName, String> parseBackReferenceName(String name) { 509 int separatorIndex = name.indexOf('.'); 510 String linkRegionName = name.substring(0, separatorIndex); 511 String tableSubstr = name.substring(separatorIndex + 1) 512 .replace('=', TableName.NAMESPACE_DELIM); 513 TableName linkTableName = TableName.valueOf(tableSubstr); 514 return new Pair<>(linkTableName, linkRegionName); 515 } 516 517 /** 518 * Get the full path of the HFile referenced by the back reference 519 * 520 * @param conf {@link Configuration} to read for the archive directory name 521 * @param linkRefPath Link Back Reference path 522 * @return full path of the referenced hfile 523 * @throws IOException on unexpected error. 524 */ 525 public static Path getHFileFromBackReference(final Configuration conf, final Path linkRefPath) 526 throws IOException { 527 return getHFileFromBackReference(FSUtils.getRootDir(conf), linkRefPath); 528 } 529 530}