001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.io.HFileLink.LINK_NAME_PATTERN; 021 022import edu.umd.cs.findbugs.annotations.Nullable; 023import java.io.FileNotFoundException; 024import java.io.IOException; 025import java.io.InterruptedIOException; 026import java.util.ArrayList; 027import java.util.Collection; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.Objects; 032import java.util.Optional; 033import java.util.UUID; 034import java.util.regex.Matcher; 035import org.apache.hadoop.conf.Configuration; 036import org.apache.hadoop.fs.FSDataInputStream; 037import org.apache.hadoop.fs.FSDataOutputStream; 038import org.apache.hadoop.fs.FileStatus; 039import org.apache.hadoop.fs.FileSystem; 040import org.apache.hadoop.fs.FileUtil; 041import org.apache.hadoop.fs.LocatedFileStatus; 042import org.apache.hadoop.fs.Path; 043import org.apache.hadoop.fs.permission.FsPermission; 044import org.apache.hadoop.hbase.Cell; 045import org.apache.hadoop.hbase.ExtendedCell; 046import org.apache.hadoop.hbase.HConstants; 047import org.apache.hadoop.hbase.PrivateCellUtil; 048import org.apache.hadoop.hbase.TableName; 049import org.apache.hadoop.hbase.backup.HFileArchiver; 050import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 051import org.apache.hadoop.hbase.client.RegionInfo; 052import org.apache.hadoop.hbase.client.TableDescriptor; 053import org.apache.hadoop.hbase.fs.HFileSystem; 054import org.apache.hadoop.hbase.io.HFileLink; 055import org.apache.hadoop.hbase.io.Reference; 056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 057import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 058import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 059import org.apache.hadoop.hbase.util.Bytes; 060import org.apache.hadoop.hbase.util.CommonFSUtils; 061import org.apache.hadoop.hbase.util.FSUtils; 062import org.apache.hadoop.hbase.util.Pair; 063import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; 064import org.apache.yetus.audience.InterfaceAudience; 065import org.slf4j.Logger; 066import org.slf4j.LoggerFactory; 067 068import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 069 070/** 071 * View to an on-disk Region. Provides the set of methods necessary to interact with the on-disk 072 * region data. 073 */ 074@InterfaceAudience.Private 075public class HRegionFileSystem { 076 private static final Logger LOG = LoggerFactory.getLogger(HRegionFileSystem.class); 077 078 /** Name of the region info file that resides just under the region directory. */ 079 public final static String REGION_INFO_FILE = ".regioninfo"; 080 081 /** Temporary subdirectory of the region directory used for merges. */ 082 public static final String REGION_MERGES_DIR = ".merges"; 083 084 /** Temporary subdirectory of the region directory used for splits. */ 085 public static final String REGION_SPLITS_DIR = ".splits"; 086 087 /** Temporary subdirectory of the region directory used for compaction output. */ 088 static final String REGION_TEMP_DIR = ".tmp"; 089 090 private final RegionInfo regionInfo; 091 // regionInfo for interacting with FS (getting encodedName, etc) 092 final RegionInfo regionInfoForFs; 093 final Configuration conf; 094 private final Path tableDir; 095 final FileSystem fs; 096 private final Path regionDir; 097 098 /** 099 * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the 100 * client level. 101 */ 102 private final int hdfsClientRetriesNumber; 103 private final int baseSleepBeforeRetries; 104 private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10; 105 private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000; 106 107 /** 108 * Create a view to the on-disk region 109 * @param conf the {@link Configuration} to use 110 * @param fs {@link FileSystem} that contains the region 111 * @param tableDir {@link Path} to where the table is being stored 112 * @param regionInfo {@link RegionInfo} for region 113 */ 114 HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir, 115 final RegionInfo regionInfo) { 116 this.fs = fs; 117 this.conf = conf; 118 this.tableDir = Objects.requireNonNull(tableDir, "tableDir is null"); 119 this.regionInfo = Objects.requireNonNull(regionInfo, "regionInfo is null"); 120 this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo); 121 this.regionDir = FSUtils.getRegionDirFromTableDir(tableDir, regionInfo); 122 this.hdfsClientRetriesNumber = 123 conf.getInt("hdfs.client.retries.number", DEFAULT_HDFS_CLIENT_RETRIES_NUMBER); 124 this.baseSleepBeforeRetries = 125 conf.getInt("hdfs.client.sleep.before.retries", DEFAULT_BASE_SLEEP_BEFORE_RETRIES); 126 } 127 128 /** Returns the underlying {@link FileSystem} */ 129 public FileSystem getFileSystem() { 130 return this.fs; 131 } 132 133 /** Returns the {@link RegionInfo} that describe this on-disk region view */ 134 public RegionInfo getRegionInfo() { 135 return this.regionInfo; 136 } 137 138 public RegionInfo getRegionInfoForFS() { 139 return this.regionInfoForFs; 140 } 141 142 /** Returns {@link Path} to the region's root directory. */ 143 public Path getTableDir() { 144 return this.tableDir; 145 } 146 147 /** Returns {@link Path} to the region directory. */ 148 public Path getRegionDir() { 149 return regionDir; 150 } 151 152 // =========================================================================== 153 // Temp Helpers 154 // =========================================================================== 155 /** Returns {@link Path} to the region's temp directory, used for file creations */ 156 public Path getTempDir() { 157 return new Path(getRegionDir(), REGION_TEMP_DIR); 158 } 159 160 /** 161 * Clean up any temp detritus that may have been left around from previous operation attempts. 162 */ 163 void cleanupTempDir() throws IOException { 164 deleteDir(getTempDir()); 165 } 166 167 // =========================================================================== 168 // Store/StoreFile Helpers 169 // =========================================================================== 170 /** 171 * Returns the directory path of the specified family 172 * @param familyName Column Family Name 173 * @return {@link Path} to the directory of the specified family 174 */ 175 public Path getStoreDir(final String familyName) { 176 return new Path(this.getRegionDir(), familyName); 177 } 178 179 /** 180 * @param tabledir {@link Path} to where the table is being stored 181 * @param hri {@link RegionInfo} for the region. 182 * @param family {@link ColumnFamilyDescriptor} describing the column family 183 * @return Path to family/Store home directory. 184 */ 185 public static Path getStoreHomedir(final Path tabledir, final RegionInfo hri, 186 final byte[] family) { 187 return getStoreHomedir(tabledir, hri.getEncodedName(), family); 188 } 189 190 /** 191 * @param tabledir {@link Path} to where the table is being stored 192 * @param encodedName Encoded region name. 193 * @param family {@link ColumnFamilyDescriptor} describing the column family 194 * @return Path to family/Store home directory. 195 */ 196 public static Path getStoreHomedir(final Path tabledir, final String encodedName, 197 final byte[] family) { 198 return new Path(tabledir, new Path(encodedName, Bytes.toString(family))); 199 } 200 201 /** 202 * Create the store directory for the specified family name 203 * @param familyName Column Family Name 204 * @return {@link Path} to the directory of the specified family 205 * @throws IOException if the directory creation fails. 206 */ 207 Path createStoreDir(final String familyName) throws IOException { 208 Path storeDir = getStoreDir(familyName); 209 if (!fs.exists(storeDir) && !createDir(storeDir)) 210 throw new IOException("Failed creating " + storeDir); 211 return storeDir; 212 } 213 214 /** 215 * Set the directory of CF to the specified storage policy. <br> 216 * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>, 217 * <i>"COLD"</i> <br> 218 * <br> 219 * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details. 220 * @param familyName The name of column family. 221 * @param policyName The name of the storage policy: 'HOT', 'COLD', etc. See hadoop 2.6+ 222 * org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g 'COLD', 223 * 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'. 224 */ 225 public void setStoragePolicy(String familyName, String policyName) { 226 CommonFSUtils.setStoragePolicy(this.fs, getStoreDir(familyName), policyName); 227 } 228 229 /** 230 * Set storage policy for a whole region. <br> 231 * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>, 232 * <i>"COLD"</i> <br> 233 * <br> 234 * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details. 235 * @param policyName The name of the storage policy: 'HOT', 'COLD', etc. See hadoop 2.6+ 236 * org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g 'COLD', 237 * 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'. 238 */ 239 public void setStoragePolicy(String policyName) { 240 CommonFSUtils.setStoragePolicy(this.fs, getRegionDir(), policyName); 241 } 242 243 /** 244 * Get the storage policy of the directory of CF. 245 * @param familyName The name of column family. 246 * @return Storage policy name, or {@code null} if not using {@link HFileSystem} or exception 247 * thrown when trying to get policy 248 */ 249 @Nullable 250 public String getStoragePolicyName(String familyName) { 251 if (this.fs instanceof HFileSystem) { 252 Path storeDir = getStoreDir(familyName); 253 return ((HFileSystem) this.fs).getStoragePolicyName(storeDir); 254 } 255 256 return null; 257 } 258 259 /** 260 * Returns the store files' LocatedFileStatus which available for the family. This methods 261 * performs the filtering based on the valid store files. 262 * @param familyName Column Family Name 263 * @return a list of store files' LocatedFileStatus for the specified family. 264 */ 265 public static List<LocatedFileStatus> getStoreFilesLocatedStatus(final HRegionFileSystem regionfs, 266 final String familyName, final boolean validate) throws IOException { 267 Path familyDir = regionfs.getStoreDir(familyName); 268 List<LocatedFileStatus> locatedFileStatuses = 269 CommonFSUtils.listLocatedStatus(regionfs.getFileSystem(), familyDir); 270 if (locatedFileStatuses == null) { 271 if (LOG.isTraceEnabled()) { 272 LOG.trace("No StoreFiles for: " + familyDir); 273 } 274 return null; 275 } 276 277 List<LocatedFileStatus> validStoreFiles = Lists.newArrayList(); 278 for (LocatedFileStatus status : locatedFileStatuses) { 279 if (validate && !StoreFileInfo.isValid(status)) { 280 // recovered.hfiles directory is expected inside CF path when hbase.wal.split.to.hfile to 281 // true, refer HBASE-23740 282 if (!HConstants.RECOVERED_HFILES_DIR.equals(status.getPath().getName())) { 283 LOG.warn("Invalid StoreFile: {}", status.getPath()); 284 } 285 } else { 286 validStoreFiles.add(status); 287 } 288 } 289 return validStoreFiles; 290 } 291 292 /** 293 * Return Qualified Path of the specified family/file 294 * @param familyName Column Family Name 295 * @param fileName File Name 296 * @return The qualified Path for the specified family/file 297 */ 298 Path getStoreFilePath(final String familyName, final String fileName) { 299 Path familyDir = getStoreDir(familyName); 300 return new Path(familyDir, fileName).makeQualified(fs.getUri(), fs.getWorkingDirectory()); 301 } 302 303 /** 304 * Return the store file information of the specified family/file. 305 * @param familyName Column Family Name 306 * @param fileName File Name 307 * @return The {@link StoreFileInfo} for the specified family/file 308 */ 309 StoreFileInfo getStoreFileInfo(final String familyName, final String fileName, 310 final StoreFileTracker tracker) throws IOException { 311 Path familyDir = getStoreDir(familyName); 312 return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo, regionInfoForFs, 313 familyName, new Path(familyDir, fileName), tracker); 314 } 315 316 /** Returns the set of families present on disk n */ 317 public Collection<String> getFamilies() throws IOException { 318 FileStatus[] fds = 319 CommonFSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs)); 320 if (fds == null) return null; 321 322 ArrayList<String> families = new ArrayList<>(fds.length); 323 for (FileStatus status : fds) { 324 families.add(status.getPath().getName()); 325 } 326 327 return families; 328 } 329 330 /** 331 * Remove the region family from disk, archiving the store files. 332 * @param familyName Column Family Name 333 * @throws IOException if an error occours during the archiving 334 */ 335 public void deleteFamily(final String familyName) throws IOException { 336 // archive family store files 337 HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName)); 338 339 // delete the family folder 340 Path familyDir = getStoreDir(familyName); 341 if (fs.exists(familyDir) && !deleteDir(familyDir)) 342 throw new IOException("Could not delete family " + familyName + " from FileSystem for region " 343 + regionInfoForFs.getRegionNameAsString() + "(" + regionInfoForFs.getEncodedName() + ")"); 344 } 345 346 /** 347 * Generate a unique file name, used by createTempName() and commitStoreFile() 348 * @param suffix extra information to append to the generated name 349 * @return Unique file name 350 */ 351 private static String generateUniqueName(final String suffix) { 352 String name = UUID.randomUUID().toString().replaceAll("-", ""); 353 if (suffix != null) name += suffix; 354 return name; 355 } 356 357 /** 358 * Generate a unique temporary Path. Used in conjuction with commitStoreFile() to get a safer file 359 * creation. <code> 360 * Path file = fs.createTempName(); 361 * ...StoreFile.Writer(file)... 362 * fs.commitStoreFile("family", file); 363 * </code> 364 * @return Unique {@link Path} of the temporary file 365 */ 366 public Path createTempName() { 367 return createTempName(null); 368 } 369 370 /** 371 * Generate a unique temporary Path. Used in conjuction with commitStoreFile() to get a safer file 372 * creation. <code> 373 * Path file = fs.createTempName(); 374 * ...StoreFile.Writer(file)... 375 * fs.commitStoreFile("family", file); 376 * </code> 377 * @param suffix extra information to append to the generated name 378 * @return Unique {@link Path} of the temporary file 379 */ 380 public Path createTempName(final String suffix) { 381 return new Path(getTempDir(), generateUniqueName(suffix)); 382 } 383 384 /** 385 * Move the file from a build/temp location to the main family store directory. 386 * @param familyName Family that will gain the file 387 * @param buildPath {@link Path} to the file to commit. 388 * @return The new {@link Path} of the committed file 389 */ 390 public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException { 391 Path dstPath = preCommitStoreFile(familyName, buildPath, -1, false); 392 return commitStoreFile(buildPath, dstPath); 393 } 394 395 /** 396 * Generate the filename in the main family store directory for moving the file from a build/temp 397 * location. 398 * @param familyName Family that will gain the file 399 * @param buildPath {@link Path} to the file to commit. 400 * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence 401 * number) 402 * @param generateNewName False if you want to keep the buildPath name 403 * @return The new {@link Path} of the to be committed file 404 */ 405 private Path preCommitStoreFile(final String familyName, final Path buildPath, final long seqNum, 406 final boolean generateNewName) throws IOException { 407 Path storeDir = getStoreDir(familyName); 408 if (!fs.exists(storeDir) && !createDir(storeDir)) 409 throw new IOException("Failed creating " + storeDir); 410 411 String name = buildPath.getName(); 412 if (generateNewName) { 413 name = generateUniqueName((seqNum < 0) ? null : StoreFileInfo.formatBulkloadSeqId(seqNum)); 414 } 415 Path dstPath = new Path(storeDir, name); 416 if (!fs.exists(buildPath)) { 417 throw new FileNotFoundException(buildPath.toString()); 418 } 419 if (LOG.isDebugEnabled()) { 420 LOG.debug("Committing " + buildPath + " as " + dstPath); 421 } 422 return dstPath; 423 } 424 425 /* 426 * Moves file from staging dir to region dir 427 * @param buildPath {@link Path} to the file to commit. 428 * @param dstPath {@link Path} to the file under region dir 429 * @return The {@link Path} of the committed file 430 */ 431 Path commitStoreFile(final Path buildPath, Path dstPath) throws IOException { 432 // rename is not necessary in case of direct-insert stores 433 if (buildPath.equals(dstPath)) { 434 return dstPath; 435 } 436 // buildPath exists, therefore not doing an exists() check. 437 if (!rename(buildPath, dstPath)) { 438 throw new IOException("Failed rename of " + buildPath + " to " + dstPath); 439 } 440 return dstPath; 441 } 442 443 /** 444 * Archives the specified store file from the specified family. 445 * @param familyName Family that contains the store files 446 * @param filePath {@link Path} to the store file to remove 447 * @throws IOException if the archiving fails 448 */ 449 public void removeStoreFile(final String familyName, final Path filePath) throws IOException { 450 HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs, this.tableDir, 451 Bytes.toBytes(familyName), filePath); 452 } 453 454 /** 455 * Closes and archives the specified store files from the specified family. 456 * @param familyName Family that contains the store files 457 * @param storeFiles set of store files to remove 458 * @throws IOException if the archiving fails 459 */ 460 public void removeStoreFiles(String familyName, Collection<HStoreFile> storeFiles) 461 throws IOException { 462 HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs, this.tableDir, 463 Bytes.toBytes(familyName), storeFiles); 464 } 465 466 /** 467 * Bulk load: Add a specified store file to the specified family. If the source file is on the 468 * same different file-system is moved from the source location to the destination location, 469 * otherwise is copied over. 470 * @param familyName Family that will gain the file 471 * @param srcPath {@link Path} to the file to import 472 * @param seqNum Bulk Load sequence number 473 * @return The destination {@link Path} of the bulk loaded file 474 */ 475 Pair<Path, Path> bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum) 476 throws IOException { 477 // Copy the file if it's on another filesystem 478 FileSystem srcFs = srcPath.getFileSystem(conf); 479 srcPath = srcFs.resolvePath(srcPath); 480 FileSystem realSrcFs = srcPath.getFileSystem(conf); 481 FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem) fs).getBackingFs() : fs; 482 483 // We can't compare FileSystem instances as equals() includes UGI instance 484 // as part of the comparison and won't work when doing SecureBulkLoad 485 // TODO deal with viewFS 486 if (!FSUtils.isSameHdfs(conf, realSrcFs, desFs)) { 487 LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " 488 + "the destination store. Copying file over to destination filesystem."); 489 Path tmpPath = createTempName(); 490 FileUtil.copy(realSrcFs, srcPath, fs, tmpPath, false, conf); 491 LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath); 492 srcPath = tmpPath; 493 } 494 495 return new Pair<>(srcPath, preCommitStoreFile(familyName, srcPath, seqNum, true)); 496 } 497 498 // =========================================================================== 499 // Splits Helpers 500 // =========================================================================== 501 502 public Path getSplitsDir(final RegionInfo hri) { 503 return new Path(getTableDir(), hri.getEncodedName()); 504 } 505 506 /** 507 * Remove daughter region 508 * @param regionInfo daughter {@link RegionInfo} 509 */ 510 void cleanupDaughterRegion(final RegionInfo regionInfo) throws IOException { 511 Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName()); 512 if (this.fs.exists(regionDir) && !deleteDir(regionDir)) { 513 throw new IOException("Failed delete of " + regionDir); 514 } 515 } 516 517 /** 518 * Commit a daughter region, moving it from the split temporary directory to the proper location 519 * in the filesystem. 520 * @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo} 521 */ 522 public Path commitDaughterRegion(final RegionInfo regionInfo, List<Path> allRegionFiles, 523 MasterProcedureEnv env) throws IOException { 524 Path regionDir = this.getSplitsDir(regionInfo); 525 if (fs.exists(regionDir)) { 526 // Write HRI to a file in case we need to recover hbase:meta 527 Path regionInfoFile = new Path(regionDir, REGION_INFO_FILE); 528 byte[] regionInfoContent = getRegionInfoFileContent(regionInfo); 529 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent); 530 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 531 env.getMasterConfiguration(), fs, getTableDir(), regionInfo, false); 532 insertRegionFilesIntoStoreTracker(allRegionFiles, env, regionFs); 533 } 534 return regionDir; 535 } 536 537 private void insertRegionFilesIntoStoreTracker(List<Path> allFiles, MasterProcedureEnv env, 538 HRegionFileSystem regionFs) throws IOException { 539 TableDescriptor tblDesc = 540 env.getMasterServices().getTableDescriptors().get(regionInfo.getTable()); 541 // we need to map trackers per store 542 Map<String, StoreFileTracker> trackerMap = new HashMap<>(); 543 // we need to map store files per store 544 Map<String, List<StoreFileInfo>> fileInfoMap = new HashMap<>(); 545 for (Path file : allFiles) { 546 String familyName = file.getParent().getName(); 547 trackerMap.computeIfAbsent(familyName, t -> StoreFileTrackerFactory.create(conf, tblDesc, 548 tblDesc.getColumnFamily(Bytes.toBytes(familyName)), regionFs)); 549 fileInfoMap.computeIfAbsent(familyName, l -> new ArrayList<>()); 550 List<StoreFileInfo> infos = fileInfoMap.get(familyName); 551 infos.add(trackerMap.get(familyName).getStoreFileInfo(file, true)); 552 } 553 for (Map.Entry<String, StoreFileTracker> entry : trackerMap.entrySet()) { 554 entry.getValue().add(fileInfoMap.get(entry.getKey())); 555 } 556 } 557 558 /** 559 * Creates region split daughter directories under the table dir. If the daughter regions already 560 * exist, for example, in the case of a recovery from a previous failed split procedure, this 561 * method deletes the given region dir recursively, then recreates it again. 562 */ 563 public void createSplitsDir(RegionInfo daughterA, RegionInfo daughterB) throws IOException { 564 Path daughterADir = getSplitsDir(daughterA); 565 if (fs.exists(daughterADir) && !deleteDir(daughterADir)) { 566 throw new IOException("Failed deletion of " + daughterADir + " before creating them again."); 567 568 } 569 if (!createDir(daughterADir)) { 570 throw new IOException("Failed create of " + daughterADir); 571 } 572 Path daughterBDir = getSplitsDir(daughterB); 573 if (fs.exists(daughterBDir) && !deleteDir(daughterBDir)) { 574 throw new IOException("Failed deletion of " + daughterBDir + " before creating them again."); 575 576 } 577 if (!createDir(daughterBDir)) { 578 throw new IOException("Failed create of " + daughterBDir); 579 } 580 } 581 582 /** 583 * Write out a split reference. Package local so it doesnt leak out of regionserver. 584 * @param hri {@link RegionInfo} of the destination 585 * @param familyName Column Family Name 586 * @param f File to split. 587 * @param splitRow Split Row 588 * @param top True if we are referring to the top half of the hfile. 589 * @param splitPolicy A split policy instance; be careful! May not be full populated; e.g. if this 590 * method is invoked on the Master side, then the RegionSplitPolicy will NOT 591 * have a reference to a Region. 592 * @return Path to created reference. 593 */ 594 public Path splitStoreFile(RegionInfo hri, String familyName, HStoreFile f, byte[] splitRow, 595 boolean top, RegionSplitPolicy splitPolicy, StoreFileTracker tracker) throws IOException { 596 Path splitDir = new Path(getSplitsDir(hri), familyName); 597 // Add the referred-to regions name as a dot separated suffix. 598 // See REF_NAME_REGEX regex above. The referred-to regions name is 599 // up in the path of the passed in <code>f</code> -- parentdir is family, 600 // then the directory above is the region name. 601 String parentRegionName = regionInfoForFs.getEncodedName(); 602 // Write reference with same file id only with the other region name as 603 // suffix and into the new region location (under same family). 604 Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName); 605 if (fs.exists(p)) { 606 LOG.warn("Found an already existing split file for {}. Assuming this is a recovery.", p); 607 return p; 608 } 609 boolean createLinkFile = false; 610 if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) { 611 // Check whether the split row lies in the range of the store file 612 // If it is outside the range, return directly. 613 f.initReader(); 614 try { 615 Cell splitKey = PrivateCellUtil.createFirstOnRow(splitRow); 616 Optional<ExtendedCell> lastKey = f.getLastKey(); 617 Optional<ExtendedCell> firstKey = f.getFirstKey(); 618 if (top) { 619 // check if larger than last key. 620 // If lastKey is null means storefile is empty. 621 if (!lastKey.isPresent()) { 622 return null; 623 } 624 if (f.getComparator().compare(splitKey, lastKey.get()) > 0) { 625 return null; 626 } 627 if (firstKey.isPresent() && f.getComparator().compare(splitKey, firstKey.get()) <= 0) { 628 LOG.debug("Will create HFileLink file for {}, top=true", f.getPath()); 629 createLinkFile = true; 630 } 631 } else { 632 // check if smaller than first key 633 // If firstKey is null means storefile is empty. 634 if (!firstKey.isPresent()) { 635 return null; 636 } 637 if (f.getComparator().compare(splitKey, firstKey.get()) < 0) { 638 return null; 639 } 640 if (lastKey.isPresent() && f.getComparator().compare(splitKey, lastKey.get()) >= 0) { 641 LOG.debug("Will create HFileLink file for {}, top=false", f.getPath()); 642 createLinkFile = true; 643 } 644 } 645 } finally { 646 f.closeStoreFile(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true); 647 } 648 } 649 if (createLinkFile) { 650 // create HFileLink file instead of Reference file for child 651 String hfileName = f.getPath().getName(); 652 TableName linkedTable = regionInfoForFs.getTable(); 653 String linkedRegion = regionInfoForFs.getEncodedName(); 654 try { 655 if (HFileLink.isHFileLink(hfileName)) { 656 Matcher m = LINK_NAME_PATTERN.matcher(hfileName); 657 if (!m.matches()) { 658 throw new IllegalArgumentException(hfileName + " is not a valid HFileLink name!"); 659 } 660 linkedTable = TableName.valueOf(m.group(1), m.group(2)); 661 linkedRegion = m.group(3); 662 hfileName = m.group(4); 663 } 664 // must create back reference here 665 tracker.createHFileLink(linkedTable, linkedRegion, hfileName, true); 666 Path path = 667 new Path(splitDir, HFileLink.createHFileLinkName(linkedTable, linkedRegion, hfileName)); 668 LOG.info("Created linkFile:" + path.toString() + " for child: " + hri.getEncodedName() 669 + ", parent: " + regionInfoForFs.getEncodedName()); 670 return path; 671 } catch (IOException e) { 672 // if create HFileLink file failed, then just skip the error and create Reference file 673 LOG.error("Create link file for " + hfileName + " for child " + hri.getEncodedName() 674 + "failed, will create Reference file", e); 675 } 676 } 677 // A reference to the bottom half of the hsf store file. 678 Reference r = 679 top ? Reference.createTopReference(splitRow) : Reference.createBottomReference(splitRow); 680 tracker.createReference(r, p); 681 return p; 682 } 683 684 // =========================================================================== 685 // Merge Helpers 686 // =========================================================================== 687 688 Path getMergesDir(final RegionInfo hri) { 689 return new Path(getTableDir(), hri.getEncodedName()); 690 } 691 692 /** 693 * Remove merged region 694 * @param mergedRegion {@link RegionInfo} 695 */ 696 public void cleanupMergedRegion(final RegionInfo mergedRegion) throws IOException { 697 Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName()); 698 if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) { 699 throw new IOException("Failed delete of " + regionDir); 700 } 701 } 702 703 static boolean mkdirs(FileSystem fs, Configuration conf, Path dir) throws IOException { 704 if ( 705 FSUtils.isDistributedFileSystem(fs) 706 || !conf.getBoolean(HConstants.ENABLE_DATA_FILE_UMASK, false) 707 ) { 708 return fs.mkdirs(dir); 709 } 710 FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY); 711 return fs.mkdirs(dir, perms); 712 } 713 714 /** 715 * Write out a merge reference under the given merges directory. 716 * @param mergingRegion {@link RegionInfo} for one of the regions being merged. 717 * @param familyName Column Family Name 718 * @param f File to create reference. 719 * @return Path to created reference. 720 * @throws IOException if the merge write fails. 721 */ 722 public Path mergeStoreFile(RegionInfo mergingRegion, String familyName, HStoreFile f, 723 StoreFileTracker tracker) throws IOException { 724 Path referenceDir = new Path(getMergesDir(regionInfoForFs), familyName); 725 // A whole reference to the store file. 726 Reference r = Reference.createTopReference(mergingRegion.getStartKey()); 727 // Add the referred-to regions name as a dot separated suffix. 728 // See REF_NAME_REGEX regex above. The referred-to regions name is 729 // up in the path of the passed in <code>f</code> -- parentdir is family, 730 // then the directory above is the region name. 731 String mergingRegionName = mergingRegion.getEncodedName(); 732 // Write reference with same file id only with the other region name as 733 // suffix and into the new region location (under same family). 734 Path p = new Path(referenceDir, f.getPath().getName() + "." + mergingRegionName); 735 tracker.createReference(r, p); 736 return p; 737 } 738 739 /** 740 * Commit a merged region, making it ready for use. 741 */ 742 public void commitMergedRegion(List<Path> allMergedFiles, MasterProcedureEnv env) 743 throws IOException { 744 Path regionDir = getMergesDir(regionInfoForFs); 745 if (regionDir != null && fs.exists(regionDir)) { 746 // Write HRI to a file in case we need to recover hbase:meta 747 Path regionInfoFile = new Path(regionDir, REGION_INFO_FILE); 748 byte[] regionInfoContent = getRegionInfoFileContent(regionInfo); 749 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent); 750 insertRegionFilesIntoStoreTracker(allMergedFiles, env, this); 751 } 752 } 753 754 // =========================================================================== 755 // Create/Open/Delete Helpers 756 // =========================================================================== 757 758 /** Returns Content of the file we write out to the filesystem under a region */ 759 private static byte[] getRegionInfoFileContent(final RegionInfo hri) throws IOException { 760 return RegionInfo.toDelimitedByteArray(hri); 761 } 762 763 /** 764 * Create a {@link RegionInfo} from the serialized version on-disk. 765 * @param fs {@link FileSystem} that contains the Region Info file 766 * @param regionDir {@link Path} to the Region Directory that contains the Info file 767 * @return An {@link RegionInfo} instance gotten from the Region Info file. 768 * @throws IOException if an error occurred during file open/read operation. 769 */ 770 public static RegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir) 771 throws IOException { 772 FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE)); 773 try { 774 return RegionInfo.parseFrom(in); 775 } finally { 776 in.close(); 777 } 778 } 779 780 /** 781 * Write the .regioninfo file on-disk. 782 * <p/> 783 * Overwrites if exists already. 784 */ 785 private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs, 786 final Path regionInfoFile, final byte[] content) throws IOException { 787 // First check to get the permissions 788 FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY); 789 // Write the RegionInfo file content 790 try (FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null)) { 791 out.write(content); 792 } 793 } 794 795 /** 796 * Write out an info file under the stored region directory. Useful recovering mangled regions. If 797 * the regionInfo already exists on-disk, then we fast exit. 798 */ 799 void checkRegionInfoOnFilesystem() throws IOException { 800 // Compose the content of the file so we can compare to length in filesystem. If not same, 801 // rewrite it (it may have been written in the old format using Writables instead of pb). The 802 // pb version is much shorter -- we write now w/o the toString version -- so checking length 803 // only should be sufficient. I don't want to read the file every time to check if it pb 804 // serialized. 805 byte[] content = getRegionInfoFileContent(regionInfoForFs); 806 807 // Verify if the region directory exists before opening a region. We need to do this since if 808 // the region directory doesn't exist we will re-create the region directory and a new HRI 809 // when HRegion.openHRegion() is called. 810 try { 811 FileStatus status = fs.getFileStatus(getRegionDir()); 812 } catch (FileNotFoundException e) { 813 LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() 814 + " on table " + regionInfo.getTable()); 815 } 816 817 try { 818 Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE); 819 FileStatus status = fs.getFileStatus(regionInfoFile); 820 if (status != null && status.getLen() == content.length) { 821 // Then assume the content good and move on. 822 // NOTE: that the length is not sufficient to define the the content matches. 823 return; 824 } 825 826 LOG.info("Rewriting .regioninfo file at: " + regionInfoFile); 827 if (!fs.delete(regionInfoFile, false)) { 828 throw new IOException("Unable to remove existing " + regionInfoFile); 829 } 830 } catch (FileNotFoundException e) { 831 LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() 832 + " on table " + regionInfo.getTable()); 833 } 834 835 // Write HRI to a file in case we need to recover hbase:meta 836 writeRegionInfoOnFilesystem(content, true); 837 } 838 839 /** 840 * Write out an info file under the region directory. Useful recovering mangled regions. 841 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation. 842 */ 843 private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException { 844 byte[] content = getRegionInfoFileContent(regionInfoForFs); 845 writeRegionInfoOnFilesystem(content, useTempDir); 846 } 847 848 /** 849 * Write out an info file under the region directory. Useful recovering mangled regions. 850 * @param regionInfoContent serialized version of the {@link RegionInfo} 851 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file 852 * creation. 853 */ 854 private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent, final boolean useTempDir) 855 throws IOException { 856 Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE); 857 if (useTempDir) { 858 // Create in tmpDir and then move into place in case we crash after 859 // create but before close. If we don't successfully close the file, 860 // subsequent region reopens will fail the below because create is 861 // registered in NN. 862 863 // And then create the file 864 Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE); 865 866 // If datanode crashes or if the RS goes down just before the close is called while trying to 867 // close the created regioninfo file in the .tmp directory then on next 868 // creation we will be getting AlreadyCreatedException. 869 // Hence delete and create the file if exists. 870 if (CommonFSUtils.isExists(fs, tmpPath)) { 871 CommonFSUtils.delete(fs, tmpPath, true); 872 } 873 874 // Write HRI to a file in case we need to recover hbase:meta 875 writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent); 876 877 // Move the created file to the original path 878 if (fs.exists(tmpPath) && !rename(tmpPath, regionInfoFile)) { 879 throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile); 880 } 881 } else { 882 // Write HRI to a file in case we need to recover hbase:meta 883 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent); 884 } 885 } 886 887 /** 888 * Create a new Region on file-system. 889 * @param conf the {@link Configuration} to use 890 * @param fs {@link FileSystem} from which to add the region 891 * @param tableDir {@link Path} to where the table is being stored 892 * @param regionInfo {@link RegionInfo} for region to be added 893 * @throws IOException if the region creation fails due to a FileSystem exception. 894 */ 895 public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf, 896 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException { 897 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 898 899 // We only create a .regioninfo and the region directory if this is the default region replica 900 if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 901 Path regionDir = regionFs.getRegionDir(); 902 if (fs.exists(regionDir)) { 903 LOG.warn("Trying to create a region that already exists on disk: " + regionDir); 904 } else { 905 // Create the region directory 906 if (!createDirOnFileSystem(fs, conf, regionDir)) { 907 LOG.warn("Unable to create the region directory: " + regionDir); 908 throw new IOException("Unable to create region directory: " + regionDir); 909 } 910 } 911 912 // Write HRI to a file in case we need to recover hbase:meta 913 regionFs.writeRegionInfoOnFilesystem(false); 914 } else { 915 if (LOG.isDebugEnabled()) 916 LOG.debug("Skipping creation of .regioninfo file for " + regionInfo); 917 } 918 return regionFs; 919 } 920 921 /** 922 * Open Region from file-system. 923 * @param conf the {@link Configuration} to use 924 * @param fs {@link FileSystem} from which to add the region 925 * @param tableDir {@link Path} to where the table is being stored 926 * @param regionInfo {@link RegionInfo} for region to be added 927 * @param readOnly True if you don't want to edit the region data 928 * @throws IOException if the region creation fails due to a FileSystem exception. 929 */ 930 public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf, 931 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo, boolean readOnly) 932 throws IOException { 933 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 934 Path regionDir = regionFs.getRegionDir(); 935 936 if (!fs.exists(regionDir)) { 937 LOG.warn("Trying to open a region that do not exists on disk: " + regionDir); 938 throw new IOException("The specified region do not exists on disk: " + regionDir); 939 } 940 941 if (!readOnly) { 942 // Cleanup temporary directories 943 regionFs.cleanupTempDir(); 944 945 // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta 946 // Only create HRI if we are the default replica 947 if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 948 regionFs.checkRegionInfoOnFilesystem(); 949 } else { 950 if (LOG.isDebugEnabled()) { 951 LOG.debug("Skipping creation of .regioninfo file for " + regionInfo); 952 } 953 } 954 } 955 956 return regionFs; 957 } 958 959 /** 960 * Remove the region from the table directory, archiving the region's hfiles. 961 * @param conf the {@link Configuration} to use 962 * @param fs {@link FileSystem} from which to remove the region 963 * @param tableDir {@link Path} to where the table is being stored 964 * @param regionInfo {@link RegionInfo} for region to be deleted 965 * @throws IOException if the request cannot be completed 966 */ 967 public static void deleteRegionFromFileSystem(final Configuration conf, final FileSystem fs, 968 final Path tableDir, final RegionInfo regionInfo) throws IOException { 969 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 970 Path regionDir = regionFs.getRegionDir(); 971 972 if (!fs.exists(regionDir)) { 973 LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir); 974 return; 975 } 976 977 if (LOG.isDebugEnabled()) { 978 LOG.debug("DELETING region " + regionDir); 979 } 980 981 // Archive region 982 Path rootDir = CommonFSUtils.getRootDir(conf); 983 HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir); 984 985 // Delete empty region dir 986 if (!fs.delete(regionDir, true)) { 987 LOG.warn("Failed delete of " + regionDir); 988 } 989 } 990 991 /** 992 * Creates a directory. Assumes the user has already checked for this directory existence. 993 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks 994 * whether the directory exists or not, and returns true if it exists. 995 */ 996 boolean createDir(Path dir) throws IOException { 997 int i = 0; 998 IOException lastIOE = null; 999 do { 1000 try { 1001 return mkdirs(fs, conf, dir); 1002 } catch (IOException ioe) { 1003 lastIOE = ioe; 1004 if (fs.exists(dir)) return true; // directory is present 1005 try { 1006 sleepBeforeRetry("Create Directory", i + 1); 1007 } catch (InterruptedException e) { 1008 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 1009 } 1010 } 1011 } while (++i <= hdfsClientRetriesNumber); 1012 throw new IOException("Exception in createDir", lastIOE); 1013 } 1014 1015 /** 1016 * Renames a directory. Assumes the user has already checked for this directory existence. 1017 * @return true if rename is successful. 1018 */ 1019 boolean rename(Path srcpath, Path dstPath) throws IOException { 1020 IOException lastIOE = null; 1021 int i = 0; 1022 do { 1023 try { 1024 return fs.rename(srcpath, dstPath); 1025 } catch (IOException ioe) { 1026 lastIOE = ioe; 1027 if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move 1028 // dir is not there, retry after some time. 1029 try { 1030 sleepBeforeRetry("Rename Directory", i + 1); 1031 } catch (InterruptedException e) { 1032 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 1033 } 1034 } 1035 } while (++i <= hdfsClientRetriesNumber); 1036 1037 throw new IOException("Exception in rename", lastIOE); 1038 } 1039 1040 /** 1041 * Deletes a directory. Assumes the user has already checked for this directory existence. 1042 * @return true if the directory is deleted. 1043 */ 1044 boolean deleteDir(Path dir) throws IOException { 1045 IOException lastIOE = null; 1046 int i = 0; 1047 do { 1048 try { 1049 return fs.delete(dir, true); 1050 } catch (IOException ioe) { 1051 lastIOE = ioe; 1052 if (!fs.exists(dir)) return true; 1053 // dir is there, retry deleting after some time. 1054 try { 1055 sleepBeforeRetry("Delete Directory", i + 1); 1056 } catch (InterruptedException e) { 1057 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 1058 } 1059 } 1060 } while (++i <= hdfsClientRetriesNumber); 1061 1062 throw new IOException("Exception in DeleteDir", lastIOE); 1063 } 1064 1065 /** 1066 * sleeping logic; handles the interrupt exception. 1067 */ 1068 private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException { 1069 sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber); 1070 } 1071 1072 /** 1073 * Creates a directory for a filesystem and configuration object. Assumes the user has already 1074 * checked for this directory existence. 1075 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks 1076 * whether the directory exists or not, and returns true if it exists. 1077 */ 1078 private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir) 1079 throws IOException { 1080 int i = 0; 1081 IOException lastIOE = null; 1082 int hdfsClientRetriesNumber = 1083 conf.getInt("hdfs.client.retries.number", DEFAULT_HDFS_CLIENT_RETRIES_NUMBER); 1084 int baseSleepBeforeRetries = 1085 conf.getInt("hdfs.client.sleep.before.retries", DEFAULT_BASE_SLEEP_BEFORE_RETRIES); 1086 do { 1087 try { 1088 return fs.mkdirs(dir); 1089 } catch (IOException ioe) { 1090 lastIOE = ioe; 1091 if (fs.exists(dir)) return true; // directory is present 1092 try { 1093 sleepBeforeRetry("Create Directory", i + 1, baseSleepBeforeRetries, 1094 hdfsClientRetriesNumber); 1095 } catch (InterruptedException e) { 1096 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 1097 } 1098 } 1099 } while (++i <= hdfsClientRetriesNumber); 1100 1101 throw new IOException("Exception in createDir", lastIOE); 1102 } 1103 1104 /** 1105 * sleeping logic for static methods; handles the interrupt exception. Keeping a static version 1106 * for this to avoid re-looking for the integer values. 1107 */ 1108 private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries, 1109 int hdfsClientRetriesNumber) throws InterruptedException { 1110 if (sleepMultiplier > hdfsClientRetriesNumber) { 1111 if (LOG.isDebugEnabled()) { 1112 LOG.debug(msg + ", retries exhausted"); 1113 } 1114 return; 1115 } 1116 if (LOG.isDebugEnabled()) { 1117 LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier); 1118 } 1119 Thread.sleep((long) baseSleepBeforeRetries * sleepMultiplier); 1120 } 1121 1122 public static HRegionFileSystem create(final Configuration conf, final FileSystem fs, 1123 final Path tableDir, final RegionInfo regionInfo) throws IOException { 1124 return new HRegionFileSystem(conf, fs, tableDir, regionInfo); 1125 } 1126}