001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.regionserver; 021 022import edu.umd.cs.findbugs.annotations.Nullable; 023import java.io.FileNotFoundException; 024import java.io.IOException; 025import java.io.InterruptedIOException; 026import java.util.ArrayList; 027import java.util.Collection; 028import java.util.List; 029import java.util.Optional; 030import java.util.UUID; 031import org.apache.hadoop.conf.Configuration; 032import org.apache.hadoop.fs.FSDataInputStream; 033import org.apache.hadoop.fs.FSDataOutputStream; 034import org.apache.hadoop.fs.FileStatus; 035import org.apache.hadoop.fs.FileSystem; 036import org.apache.hadoop.fs.FileUtil; 037import org.apache.hadoop.fs.LocatedFileStatus; 038import org.apache.hadoop.fs.Path; 039import org.apache.hadoop.fs.permission.FsPermission; 040import org.apache.hadoop.hbase.Cell; 041import org.apache.hadoop.hbase.HConstants; 042import org.apache.hadoop.hbase.PrivateCellUtil; 043import org.apache.hadoop.hbase.backup.HFileArchiver; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 045import org.apache.hadoop.hbase.client.RegionInfo; 046import org.apache.hadoop.hbase.client.TableDescriptor; 047import org.apache.hadoop.hbase.fs.HFileSystem; 048import org.apache.hadoop.hbase.io.Reference; 049import org.apache.hadoop.hbase.util.Bytes; 050import org.apache.hadoop.hbase.util.FSHDFSUtils; 051import org.apache.hadoop.hbase.util.FSUtils; 052import org.apache.hadoop.hbase.util.Pair; 053import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; 054import org.apache.yetus.audience.InterfaceAudience; 055import org.slf4j.Logger; 056import org.slf4j.LoggerFactory; 057 058import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 059import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 060 061/** 062 * View to an on-disk Region. 063 * Provides the set of methods necessary to interact with the on-disk region data. 064 */ 065@InterfaceAudience.Private 066public class HRegionFileSystem { 067 private static final Logger LOG = LoggerFactory.getLogger(HRegionFileSystem.class); 068 069 /** Name of the region info file that resides just under the region directory. */ 070 public final static String REGION_INFO_FILE = ".regioninfo"; 071 072 /** Temporary subdirectory of the region directory used for merges. */ 073 public static final String REGION_MERGES_DIR = ".merges"; 074 075 /** Temporary subdirectory of the region directory used for splits. */ 076 public static final String REGION_SPLITS_DIR = ".splits"; 077 078 /** Temporary subdirectory of the region directory used for compaction output. */ 079 @VisibleForTesting static final String REGION_TEMP_DIR = ".tmp"; 080 081 private final RegionInfo regionInfo; 082 //regionInfo for interacting with FS (getting encodedName, etc) 083 private final RegionInfo regionInfoForFs; 084 private final Configuration conf; 085 private final Path tableDir; 086 private final FileSystem fs; 087 088 /** 089 * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the 090 * client level. 091 */ 092 private final int hdfsClientRetriesNumber; 093 private final int baseSleepBeforeRetries; 094 private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10; 095 private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000; 096 097 /** 098 * Create a view to the on-disk region 099 * @param conf the {@link Configuration} to use 100 * @param fs {@link FileSystem} that contains the region 101 * @param tableDir {@link Path} to where the table is being stored 102 * @param regionInfo {@link RegionInfo} for region 103 */ 104 HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir, 105 final RegionInfo regionInfo) { 106 this.fs = fs; 107 this.conf = conf; 108 this.tableDir = tableDir; 109 this.regionInfo = regionInfo; 110 this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo); 111 this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number", 112 DEFAULT_HDFS_CLIENT_RETRIES_NUMBER); 113 this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries", 114 DEFAULT_BASE_SLEEP_BEFORE_RETRIES); 115 } 116 117 /** @return the underlying {@link FileSystem} */ 118 public FileSystem getFileSystem() { 119 return this.fs; 120 } 121 122 /** @return the {@link RegionInfo} that describe this on-disk region view */ 123 public RegionInfo getRegionInfo() { 124 return this.regionInfo; 125 } 126 127 public RegionInfo getRegionInfoForFS() { 128 return this.regionInfoForFs; 129 } 130 131 /** @return {@link Path} to the region's root directory. */ 132 public Path getTableDir() { 133 return this.tableDir; 134 } 135 136 /** @return {@link Path} to the region directory. */ 137 public Path getRegionDir() { 138 return new Path(this.tableDir, this.regionInfoForFs.getEncodedName()); 139 } 140 141 // =========================================================================== 142 // Temp Helpers 143 // =========================================================================== 144 /** @return {@link Path} to the region's temp directory, used for file creations */ 145 Path getTempDir() { 146 return new Path(getRegionDir(), REGION_TEMP_DIR); 147 } 148 149 /** 150 * Clean up any temp detritus that may have been left around from previous operation attempts. 151 */ 152 void cleanupTempDir() throws IOException { 153 deleteDir(getTempDir()); 154 } 155 156 // =========================================================================== 157 // Store/StoreFile Helpers 158 // =========================================================================== 159 /** 160 * Returns the directory path of the specified family 161 * @param familyName Column Family Name 162 * @return {@link Path} to the directory of the specified family 163 */ 164 public Path getStoreDir(final String familyName) { 165 return new Path(this.getRegionDir(), familyName); 166 } 167 168 /** 169 * Create the store directory for the specified family name 170 * @param familyName Column Family Name 171 * @return {@link Path} to the directory of the specified family 172 * @throws IOException if the directory creation fails. 173 */ 174 Path createStoreDir(final String familyName) throws IOException { 175 Path storeDir = getStoreDir(familyName); 176 if(!fs.exists(storeDir) && !createDir(storeDir)) 177 throw new IOException("Failed creating "+storeDir); 178 return storeDir; 179 } 180 181 /** 182 * Set the directory of CF to the specified storage policy. <br> 183 * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>, 184 * <i>"COLD"</i> <br> 185 * <br> 186 * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details. 187 * @param familyName The name of column family. 188 * @param policyName The name of the storage policy: 'HOT', 'COLD', etc. 189 * See see hadoop 2.6+ org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g 190 * 'COLD', 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'. 191 */ 192 public void setStoragePolicy(String familyName, String policyName) { 193 FSUtils.setStoragePolicy(this.fs, getStoreDir(familyName), policyName); 194 } 195 196 /** 197 * Get the storage policy of the directory of CF. 198 * @param familyName The name of column family. 199 * @return Storage policy name, or {@code null} if not using {@link HFileSystem} or exception 200 * thrown when trying to get policy 201 */ 202 @Nullable 203 public String getStoragePolicyName(String familyName) { 204 if (this.fs instanceof HFileSystem) { 205 Path storeDir = getStoreDir(familyName); 206 return ((HFileSystem) this.fs).getStoragePolicyName(storeDir); 207 } 208 209 return null; 210 } 211 212 /** 213 * Returns the store files available for the family. 214 * This methods performs the filtering based on the valid store files. 215 * @param familyName Column Family Name 216 * @return a set of {@link StoreFileInfo} for the specified family. 217 */ 218 public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException { 219 return getStoreFiles(Bytes.toString(familyName)); 220 } 221 222 public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException { 223 return getStoreFiles(familyName, true); 224 } 225 226 /** 227 * Returns the store files available for the family. 228 * This methods performs the filtering based on the valid store files. 229 * @param familyName Column Family Name 230 * @return a set of {@link StoreFileInfo} for the specified family. 231 */ 232 public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate) 233 throws IOException { 234 Path familyDir = getStoreDir(familyName); 235 FileStatus[] files = FSUtils.listStatus(this.fs, familyDir); 236 if (files == null) { 237 if (LOG.isTraceEnabled()) { 238 LOG.trace("No StoreFiles for: " + familyDir); 239 } 240 return null; 241 } 242 243 ArrayList<StoreFileInfo> storeFiles = new ArrayList<>(files.length); 244 for (FileStatus status: files) { 245 if (validate && !StoreFileInfo.isValid(status)) { 246 LOG.warn("Invalid StoreFile: " + status.getPath()); 247 continue; 248 } 249 StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo, 250 regionInfoForFs, familyName, status.getPath()); 251 storeFiles.add(info); 252 253 } 254 return storeFiles; 255 } 256 257 /** 258 * Returns the store files' LocatedFileStatus which available for the family. 259 * This methods performs the filtering based on the valid store files. 260 * @param familyName Column Family Name 261 * @return a list of store files' LocatedFileStatus for the specified family. 262 */ 263 public static List<LocatedFileStatus> getStoreFilesLocatedStatus( 264 final HRegionFileSystem regionfs, final String familyName, 265 final boolean validate) throws IOException { 266 Path familyDir = regionfs.getStoreDir(familyName); 267 List<LocatedFileStatus> locatedFileStatuses = FSUtils.listLocatedStatus( 268 regionfs.getFileSystem(), familyDir); 269 if (locatedFileStatuses == null) { 270 if (LOG.isTraceEnabled()) { 271 LOG.trace("No StoreFiles for: " + familyDir); 272 } 273 return null; 274 } 275 276 List<LocatedFileStatus> validStoreFiles = Lists.newArrayList(); 277 for (LocatedFileStatus status : locatedFileStatuses) { 278 if (validate && !StoreFileInfo.isValid(status)) { 279 LOG.warn("Invalid StoreFile: " + status.getPath()); 280 } else { 281 validStoreFiles.add(status); 282 } 283 } 284 return validStoreFiles; 285 } 286 287 /** 288 * Return Qualified Path of the specified family/file 289 * 290 * @param familyName Column Family Name 291 * @param fileName File Name 292 * @return The qualified Path for the specified family/file 293 */ 294 Path getStoreFilePath(final String familyName, final String fileName) { 295 Path familyDir = getStoreDir(familyName); 296 return new Path(familyDir, fileName).makeQualified(fs.getUri(), fs.getWorkingDirectory()); 297 } 298 299 /** 300 * Return the store file information of the specified family/file. 301 * 302 * @param familyName Column Family Name 303 * @param fileName File Name 304 * @return The {@link StoreFileInfo} for the specified family/file 305 */ 306 StoreFileInfo getStoreFileInfo(final String familyName, final String fileName) 307 throws IOException { 308 Path familyDir = getStoreDir(familyName); 309 return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo, 310 regionInfoForFs, familyName, new Path(familyDir, fileName)); 311 } 312 313 /** 314 * Returns true if the specified family has reference files 315 * @param familyName Column Family Name 316 * @return true if family contains reference files 317 * @throws IOException 318 */ 319 public boolean hasReferences(final String familyName) throws IOException { 320 Path storeDir = getStoreDir(familyName); 321 FileStatus[] files = FSUtils.listStatus(fs, storeDir); 322 if (files != null) { 323 for(FileStatus stat: files) { 324 if(stat.isDirectory()) { 325 continue; 326 } 327 if (StoreFileInfo.isReference(stat.getPath())) { 328 LOG.trace("Reference {}", stat.getPath()); 329 return true; 330 } 331 } 332 } 333 return false; 334 } 335 336 /** 337 * Check whether region has Reference file 338 * @param htd table desciptor of the region 339 * @return true if region has reference file 340 * @throws IOException 341 */ 342 public boolean hasReferences(final TableDescriptor htd) throws IOException { 343 for (ColumnFamilyDescriptor family : htd.getColumnFamilies()) { 344 if (hasReferences(family.getNameAsString())) { 345 return true; 346 } 347 } 348 return false; 349 } 350 351 /** 352 * @return the set of families present on disk 353 * @throws IOException 354 */ 355 public Collection<String> getFamilies() throws IOException { 356 FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs)); 357 if (fds == null) return null; 358 359 ArrayList<String> families = new ArrayList<>(fds.length); 360 for (FileStatus status: fds) { 361 families.add(status.getPath().getName()); 362 } 363 364 return families; 365 } 366 367 /** 368 * Remove the region family from disk, archiving the store files. 369 * @param familyName Column Family Name 370 * @throws IOException if an error occours during the archiving 371 */ 372 public void deleteFamily(final String familyName) throws IOException { 373 // archive family store files 374 HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName)); 375 376 // delete the family folder 377 Path familyDir = getStoreDir(familyName); 378 if(fs.exists(familyDir) && !deleteDir(familyDir)) 379 throw new IOException("Could not delete family " + familyName 380 + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "(" 381 + regionInfoForFs.getEncodedName() + ")"); 382 } 383 384 /** 385 * Generate a unique file name, used by createTempName() and commitStoreFile() 386 * @param suffix extra information to append to the generated name 387 * @return Unique file name 388 */ 389 private static String generateUniqueName(final String suffix) { 390 String name = UUID.randomUUID().toString().replaceAll("-", ""); 391 if (suffix != null) name += suffix; 392 return name; 393 } 394 395 /** 396 * Generate a unique temporary Path. Used in conjuction with commitStoreFile() 397 * to get a safer file creation. 398 * <code> 399 * Path file = fs.createTempName(); 400 * ...StoreFile.Writer(file)... 401 * fs.commitStoreFile("family", file); 402 * </code> 403 * 404 * @return Unique {@link Path} of the temporary file 405 */ 406 public Path createTempName() { 407 return createTempName(null); 408 } 409 410 /** 411 * Generate a unique temporary Path. Used in conjuction with commitStoreFile() 412 * to get a safer file creation. 413 * <code> 414 * Path file = fs.createTempName(); 415 * ...StoreFile.Writer(file)... 416 * fs.commitStoreFile("family", file); 417 * </code> 418 * 419 * @param suffix extra information to append to the generated name 420 * @return Unique {@link Path} of the temporary file 421 */ 422 public Path createTempName(final String suffix) { 423 return new Path(getTempDir(), generateUniqueName(suffix)); 424 } 425 426 /** 427 * Move the file from a build/temp location to the main family store directory. 428 * @param familyName Family that will gain the file 429 * @param buildPath {@link Path} to the file to commit. 430 * @return The new {@link Path} of the committed file 431 * @throws IOException 432 */ 433 public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException { 434 Path dstPath = preCommitStoreFile(familyName, buildPath, -1, false); 435 return commitStoreFile(buildPath, dstPath); 436 } 437 438 /** 439 * Generate the filename in the main family store directory for moving the file from a build/temp 440 * location. 441 * @param familyName Family that will gain the file 442 * @param buildPath {@link Path} to the file to commit. 443 * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number) 444 * @param generateNewName False if you want to keep the buildPath name 445 * @return The new {@link Path} of the to be committed file 446 * @throws IOException 447 */ 448 private Path preCommitStoreFile(final String familyName, final Path buildPath, 449 final long seqNum, final boolean generateNewName) throws IOException { 450 Path storeDir = getStoreDir(familyName); 451 if(!fs.exists(storeDir) && !createDir(storeDir)) 452 throw new IOException("Failed creating " + storeDir); 453 454 String name = buildPath.getName(); 455 if (generateNewName) { 456 name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_"); 457 } 458 Path dstPath = new Path(storeDir, name); 459 if (!fs.exists(buildPath)) { 460 throw new FileNotFoundException(buildPath.toString()); 461 } 462 if (LOG.isDebugEnabled()) { 463 LOG.debug("Committing " + buildPath + " as " + dstPath); 464 } 465 return dstPath; 466 } 467 468 /* 469 * Moves file from staging dir to region dir 470 * @param buildPath {@link Path} to the file to commit. 471 * @param dstPath {@link Path} to the file under region dir 472 * @return The {@link Path} of the committed file 473 * @throws IOException 474 */ 475 Path commitStoreFile(final Path buildPath, Path dstPath) throws IOException { 476 // buildPath exists, therefore not doing an exists() check. 477 if (!rename(buildPath, dstPath)) { 478 throw new IOException("Failed rename of " + buildPath + " to " + dstPath); 479 } 480 return dstPath; 481 } 482 483 /** 484 * Archives the specified store file from the specified family. 485 * @param familyName Family that contains the store files 486 * @param filePath {@link Path} to the store file to remove 487 * @throws IOException if the archiving fails 488 */ 489 public void removeStoreFile(final String familyName, final Path filePath) 490 throws IOException { 491 HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs, 492 this.tableDir, Bytes.toBytes(familyName), filePath); 493 } 494 495 /** 496 * Closes and archives the specified store files from the specified family. 497 * @param familyName Family that contains the store files 498 * @param storeFiles set of store files to remove 499 * @throws IOException if the archiving fails 500 */ 501 public void removeStoreFiles(String familyName, Collection<HStoreFile> storeFiles) 502 throws IOException { 503 HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs, 504 this.tableDir, Bytes.toBytes(familyName), storeFiles); 505 } 506 507 /** 508 * Bulk load: Add a specified store file to the specified family. 509 * If the source file is on the same different file-system is moved from the 510 * source location to the destination location, otherwise is copied over. 511 * 512 * @param familyName Family that will gain the file 513 * @param srcPath {@link Path} to the file to import 514 * @param seqNum Bulk Load sequence number 515 * @return The destination {@link Path} of the bulk loaded file 516 * @throws IOException 517 */ 518 Pair<Path, Path> bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum) 519 throws IOException { 520 // Copy the file if it's on another filesystem 521 FileSystem srcFs = srcPath.getFileSystem(conf); 522 srcPath = srcFs.resolvePath(srcPath); 523 FileSystem realSrcFs = srcPath.getFileSystem(conf); 524 FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs; 525 526 // We can't compare FileSystem instances as equals() includes UGI instance 527 // as part of the comparison and won't work when doing SecureBulkLoad 528 // TODO deal with viewFS 529 if (!FSHDFSUtils.isSameHdfs(conf, realSrcFs, desFs)) { 530 LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " + 531 "the destination store. Copying file over to destination filesystem."); 532 Path tmpPath = createTempName(); 533 FileUtil.copy(realSrcFs, srcPath, fs, tmpPath, false, conf); 534 LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath); 535 srcPath = tmpPath; 536 } 537 538 return new Pair<>(srcPath, preCommitStoreFile(familyName, srcPath, seqNum, true)); 539 } 540 541 // =========================================================================== 542 // Splits Helpers 543 // =========================================================================== 544 /** @return {@link Path} to the temp directory used during split operations */ 545 Path getSplitsDir() { 546 return new Path(getRegionDir(), REGION_SPLITS_DIR); 547 } 548 549 public Path getSplitsDir(final RegionInfo hri) { 550 return new Path(getSplitsDir(), hri.getEncodedName()); 551 } 552 553 /** 554 * Clean up any split detritus that may have been left around from previous split attempts. 555 */ 556 void cleanupSplitsDir() throws IOException { 557 deleteDir(getSplitsDir()); 558 } 559 560 /** 561 * Clean up any split detritus that may have been left around from previous 562 * split attempts. 563 * Call this method on initial region deploy. 564 * @throws IOException 565 */ 566 void cleanupAnySplitDetritus() throws IOException { 567 Path splitdir = this.getSplitsDir(); 568 if (!fs.exists(splitdir)) return; 569 // Look at the splitdir. It could have the encoded names of the daughter 570 // regions we tried to make. See if the daughter regions actually got made 571 // out under the tabledir. If here under splitdir still, then the split did 572 // not complete. Try and do cleanup. This code WILL NOT catch the case 573 // where we successfully created daughter a but regionserver crashed during 574 // the creation of region b. In this case, there'll be an orphan daughter 575 // dir in the filesystem. TOOD: Fix. 576 FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs)); 577 if (daughters != null) { 578 for (FileStatus daughter: daughters) { 579 Path daughterDir = new Path(getTableDir(), daughter.getPath().getName()); 580 if (fs.exists(daughterDir) && !deleteDir(daughterDir)) { 581 throw new IOException("Failed delete of " + daughterDir); 582 } 583 } 584 } 585 cleanupSplitsDir(); 586 LOG.info("Cleaned up old failed split transaction detritus: " + splitdir); 587 } 588 589 /** 590 * Remove daughter region 591 * @param regionInfo daughter {@link RegionInfo} 592 * @throws IOException 593 */ 594 void cleanupDaughterRegion(final RegionInfo regionInfo) throws IOException { 595 Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName()); 596 if (this.fs.exists(regionDir) && !deleteDir(regionDir)) { 597 throw new IOException("Failed delete of " + regionDir); 598 } 599 } 600 601 /** 602 * Commit a daughter region, moving it from the split temporary directory 603 * to the proper location in the filesystem. 604 * 605 * @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo} 606 * @throws IOException 607 */ 608 public Path commitDaughterRegion(final RegionInfo regionInfo) 609 throws IOException { 610 Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName()); 611 Path daughterTmpDir = this.getSplitsDir(regionInfo); 612 613 if (fs.exists(daughterTmpDir)) { 614 615 // Write HRI to a file in case we need to recover hbase:meta 616 Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE); 617 byte[] regionInfoContent = getRegionInfoFileContent(regionInfo); 618 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent); 619 620 // Move the daughter temp dir to the table dir 621 if (!rename(daughterTmpDir, regionDir)) { 622 throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir); 623 } 624 } 625 626 return regionDir; 627 } 628 629 /** 630 * Create the region splits directory. 631 */ 632 public void createSplitsDir(RegionInfo daughterA, RegionInfo daughterB) throws IOException { 633 Path splitdir = getSplitsDir(); 634 if (fs.exists(splitdir)) { 635 LOG.info("The " + splitdir + " directory exists. Hence deleting it to recreate it"); 636 if (!deleteDir(splitdir)) { 637 throw new IOException("Failed deletion of " + splitdir + " before creating them again."); 638 } 639 } 640 // splitDir doesn't exists now. No need to do an exists() call for it. 641 if (!createDir(splitdir)) { 642 throw new IOException("Failed create of " + splitdir); 643 } 644 Path daughterATmpDir = getSplitsDir(daughterA); 645 if (!createDir(daughterATmpDir)) { 646 throw new IOException("Failed create of " + daughterATmpDir); 647 } 648 Path daughterBTmpDir = getSplitsDir(daughterB); 649 if (!createDir(daughterBTmpDir)) { 650 throw new IOException("Failed create of " + daughterBTmpDir); 651 } 652 } 653 654 /** 655 * Write out a split reference. Package local so it doesnt leak out of 656 * regionserver. 657 * @param hri {@link RegionInfo} of the destination 658 * @param familyName Column Family Name 659 * @param f File to split. 660 * @param splitRow Split Row 661 * @param top True if we are referring to the top half of the hfile. 662 * @param splitPolicy A split policy instance; be careful! May not be full populated; e.g. if 663 * this method is invoked on the Master side, then the RegionSplitPolicy will 664 * NOT have a reference to a Region. 665 * @return Path to created reference. 666 * @throws IOException 667 */ 668 public Path splitStoreFile(RegionInfo hri, String familyName, HStoreFile f, byte[] splitRow, 669 boolean top, RegionSplitPolicy splitPolicy) throws IOException { 670 if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) { 671 // Check whether the split row lies in the range of the store file 672 // If it is outside the range, return directly. 673 f.initReader(); 674 try { 675 if (top) { 676 //check if larger than last key. 677 Cell splitKey = PrivateCellUtil.createFirstOnRow(splitRow); 678 Optional<Cell> lastKey = f.getLastKey(); 679 // If lastKey is null means storefile is empty. 680 if (!lastKey.isPresent()) { 681 return null; 682 } 683 if (f.getComparator().compare(splitKey, lastKey.get()) > 0) { 684 return null; 685 } 686 } else { 687 //check if smaller than first key 688 Cell splitKey = PrivateCellUtil.createLastOnRow(splitRow); 689 Optional<Cell> firstKey = f.getFirstKey(); 690 // If firstKey is null means storefile is empty. 691 if (!firstKey.isPresent()) { 692 return null; 693 } 694 if (f.getComparator().compare(splitKey, firstKey.get()) < 0) { 695 return null; 696 } 697 } 698 } finally { 699 f.closeStoreFile(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true); 700 } 701 } 702 703 Path splitDir = new Path(getSplitsDir(hri), familyName); 704 // A reference to the bottom half of the hsf store file. 705 Reference r = 706 top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow); 707 // Add the referred-to regions name as a dot separated suffix. 708 // See REF_NAME_REGEX regex above. The referred-to regions name is 709 // up in the path of the passed in <code>f</code> -- parentdir is family, 710 // then the directory above is the region name. 711 String parentRegionName = regionInfoForFs.getEncodedName(); 712 // Write reference with same file id only with the other region name as 713 // suffix and into the new region location (under same family). 714 Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName); 715 return r.write(fs, p); 716 } 717 718 // =========================================================================== 719 // Merge Helpers 720 // =========================================================================== 721 /** @return {@link Path} to the temp directory used during merge operations */ 722 public Path getMergesDir() { 723 return new Path(getRegionDir(), REGION_MERGES_DIR); 724 } 725 726 Path getMergesDir(final RegionInfo hri) { 727 return new Path(getMergesDir(), hri.getEncodedName()); 728 } 729 730 /** 731 * Clean up any merge detritus that may have been left around from previous merge attempts. 732 */ 733 void cleanupMergesDir() throws IOException { 734 deleteDir(getMergesDir()); 735 } 736 737 /** 738 * Remove merged region 739 * @param mergedRegion {@link RegionInfo} 740 * @throws IOException 741 */ 742 public void cleanupMergedRegion(final RegionInfo mergedRegion) throws IOException { 743 Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName()); 744 if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) { 745 throw new IOException("Failed delete of " + regionDir); 746 } 747 } 748 749 static boolean mkdirs(FileSystem fs, Configuration conf, Path dir) throws IOException { 750 if (FSUtils.isDistributedFileSystem(fs) || 751 !conf.getBoolean(HConstants.ENABLE_DATA_FILE_UMASK, false)) { 752 return fs.mkdirs(dir); 753 } 754 FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY); 755 return fs.mkdirs(dir, perms); 756 } 757 758 /** 759 * Create the region merges directory, a temporary directory to accumulate 760 * merges in. 761 * @throws IOException If merges dir already exists or we fail to create it. 762 * @see HRegionFileSystem#cleanupMergesDir() 763 */ 764 public void createMergesDir() throws IOException { 765 Path mergesdir = getMergesDir(); 766 if (fs.exists(mergesdir)) { 767 LOG.info("{} directory exists. Deleting it to recreate it anew", mergesdir); 768 if (!fs.delete(mergesdir, true)) { 769 throw new IOException("Failed deletion of " + mergesdir + " before recreate."); 770 } 771 } 772 if (!mkdirs(fs, conf, mergesdir)) { 773 throw new IOException("Failed create of " + mergesdir); 774 } 775 } 776 777 /** 778 * Write out a merge reference under the given merges directory. Package local 779 * so it doesnt leak out of regionserver. 780 * @param mergedRegion {@link RegionInfo} of the merged region 781 * @param familyName Column Family Name 782 * @param f File to create reference. 783 * @param mergedDir 784 * @return Path to created reference. 785 * @throws IOException 786 */ 787 public Path mergeStoreFile(RegionInfo mergedRegion, String familyName, HStoreFile f, 788 Path mergedDir) throws IOException { 789 Path referenceDir = new Path(new Path(mergedDir, 790 mergedRegion.getEncodedName()), familyName); 791 // A whole reference to the store file. 792 Reference r = Reference.createTopReference(regionInfoForFs.getStartKey()); 793 // Add the referred-to regions name as a dot separated suffix. 794 // See REF_NAME_REGEX regex above. The referred-to regions name is 795 // up in the path of the passed in <code>f</code> -- parentdir is family, 796 // then the directory above is the region name. 797 String mergingRegionName = regionInfoForFs.getEncodedName(); 798 // Write reference with same file id only with the other region name as 799 // suffix and into the new region location (under same family). 800 Path p = new Path(referenceDir, f.getPath().getName() + "." 801 + mergingRegionName); 802 return r.write(fs, p); 803 } 804 805 /** 806 * Commit a merged region, moving it from the merges temporary directory to 807 * the proper location in the filesystem. 808 * @param mergedRegionInfo merged region {@link RegionInfo} 809 * @throws IOException 810 */ 811 public void commitMergedRegion(final RegionInfo mergedRegionInfo) throws IOException { 812 Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName()); 813 Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo); 814 // Move the tmp dir to the expected location 815 if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) { 816 if (!fs.rename(mergedRegionTmpDir, regionDir)) { 817 throw new IOException("Unable to rename " + mergedRegionTmpDir + " to " 818 + regionDir); 819 } 820 } 821 } 822 823 // =========================================================================== 824 // Create/Open/Delete Helpers 825 // =========================================================================== 826 /** 827 * Log the current state of the region 828 * @param LOG log to output information 829 * @throws IOException if an unexpected exception occurs 830 */ 831 void logFileSystemState(final Logger LOG) throws IOException { 832 FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG); 833 } 834 835 /** 836 * @param hri 837 * @return Content of the file we write out to the filesystem under a region 838 * @throws IOException 839 */ 840 private static byte[] getRegionInfoFileContent(final RegionInfo hri) throws IOException { 841 return RegionInfo.toDelimitedByteArray(hri); 842 } 843 844 /** 845 * Create a {@link RegionInfo} from the serialized version on-disk. 846 * @param fs {@link FileSystem} that contains the Region Info file 847 * @param regionDir {@link Path} to the Region Directory that contains the Info file 848 * @return An {@link RegionInfo} instance gotten from the Region Info file. 849 * @throws IOException if an error occurred during file open/read operation. 850 */ 851 public static RegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir) 852 throws IOException { 853 FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE)); 854 try { 855 return RegionInfo.parseFrom(in); 856 } finally { 857 in.close(); 858 } 859 } 860 861 /** 862 * Write the .regioninfo file on-disk. 863 * Overwrites if exists already. 864 */ 865 private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs, 866 final Path regionInfoFile, final byte[] content) throws IOException { 867 // First check to get the permissions 868 FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY); 869 // Write the RegionInfo file content 870 FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null); 871 try { 872 out.write(content); 873 } finally { 874 out.close(); 875 } 876 } 877 878 /** 879 * Write out an info file under the stored region directory. Useful recovering mangled regions. 880 * If the regionInfo already exists on-disk, then we fast exit. 881 */ 882 void checkRegionInfoOnFilesystem() throws IOException { 883 // Compose the content of the file so we can compare to length in filesystem. If not same, 884 // rewrite it (it may have been written in the old format using Writables instead of pb). The 885 // pb version is much shorter -- we write now w/o the toString version -- so checking length 886 // only should be sufficient. I don't want to read the file every time to check if it pb 887 // serialized. 888 byte[] content = getRegionInfoFileContent(regionInfoForFs); 889 890 // Verify if the region directory exists before opening a region. We need to do this since if 891 // the region directory doesn't exist we will re-create the region directory and a new HRI 892 // when HRegion.openHRegion() is called. 893 try { 894 FileStatus status = fs.getFileStatus(getRegionDir()); 895 } catch (FileNotFoundException e) { 896 LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() + 897 " on table " + regionInfo.getTable()); 898 } 899 900 try { 901 Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE); 902 FileStatus status = fs.getFileStatus(regionInfoFile); 903 if (status != null && status.getLen() == content.length) { 904 // Then assume the content good and move on. 905 // NOTE: that the length is not sufficient to define the the content matches. 906 return; 907 } 908 909 LOG.info("Rewriting .regioninfo file at: " + regionInfoFile); 910 if (!fs.delete(regionInfoFile, false)) { 911 throw new IOException("Unable to remove existing " + regionInfoFile); 912 } 913 } catch (FileNotFoundException e) { 914 LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() + 915 " on table " + regionInfo.getTable()); 916 } 917 918 // Write HRI to a file in case we need to recover hbase:meta 919 writeRegionInfoOnFilesystem(content, true); 920 } 921 922 /** 923 * Write out an info file under the region directory. Useful recovering mangled regions. 924 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation. 925 */ 926 private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException { 927 byte[] content = getRegionInfoFileContent(regionInfoForFs); 928 writeRegionInfoOnFilesystem(content, useTempDir); 929 } 930 931 /** 932 * Write out an info file under the region directory. Useful recovering mangled regions. 933 * @param regionInfoContent serialized version of the {@link RegionInfo} 934 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation. 935 */ 936 private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent, 937 final boolean useTempDir) throws IOException { 938 Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE); 939 if (useTempDir) { 940 // Create in tmpDir and then move into place in case we crash after 941 // create but before close. If we don't successfully close the file, 942 // subsequent region reopens will fail the below because create is 943 // registered in NN. 944 945 // And then create the file 946 Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE); 947 948 // If datanode crashes or if the RS goes down just before the close is called while trying to 949 // close the created regioninfo file in the .tmp directory then on next 950 // creation we will be getting AlreadyCreatedException. 951 // Hence delete and create the file if exists. 952 if (FSUtils.isExists(fs, tmpPath)) { 953 FSUtils.delete(fs, tmpPath, true); 954 } 955 956 // Write HRI to a file in case we need to recover hbase:meta 957 writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent); 958 959 // Move the created file to the original path 960 if (fs.exists(tmpPath) && !rename(tmpPath, regionInfoFile)) { 961 throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile); 962 } 963 } else { 964 // Write HRI to a file in case we need to recover hbase:meta 965 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent); 966 } 967 } 968 969 /** 970 * Create a new Region on file-system. 971 * @param conf the {@link Configuration} to use 972 * @param fs {@link FileSystem} from which to add the region 973 * @param tableDir {@link Path} to where the table is being stored 974 * @param regionInfo {@link RegionInfo} for region to be added 975 * @throws IOException if the region creation fails due to a FileSystem exception. 976 */ 977 public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf, 978 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException { 979 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 980 981 // We only create a .regioninfo and the region directory if this is the default region replica 982 if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 983 Path regionDir = regionFs.getRegionDir(); 984 if (fs.exists(regionDir)) { 985 LOG.warn("Trying to create a region that already exists on disk: " + regionDir); 986 } else { 987 // Create the region directory 988 if (!createDirOnFileSystem(fs, conf, regionDir)) { 989 LOG.warn("Unable to create the region directory: " + regionDir); 990 throw new IOException("Unable to create region directory: " + regionDir); 991 } 992 } 993 994 // Write HRI to a file in case we need to recover hbase:meta 995 regionFs.writeRegionInfoOnFilesystem(false); 996 } else { 997 if (LOG.isDebugEnabled()) 998 LOG.debug("Skipping creation of .regioninfo file for " + regionInfo); 999 } 1000 return regionFs; 1001 } 1002 1003 /** 1004 * Open Region from file-system. 1005 * @param conf the {@link Configuration} to use 1006 * @param fs {@link FileSystem} from which to add the region 1007 * @param tableDir {@link Path} to where the table is being stored 1008 * @param regionInfo {@link RegionInfo} for region to be added 1009 * @param readOnly True if you don't want to edit the region data 1010 * @throws IOException if the region creation fails due to a FileSystem exception. 1011 */ 1012 public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf, 1013 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo, boolean readOnly) 1014 throws IOException { 1015 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 1016 Path regionDir = regionFs.getRegionDir(); 1017 1018 if (!fs.exists(regionDir)) { 1019 LOG.warn("Trying to open a region that do not exists on disk: " + regionDir); 1020 throw new IOException("The specified region do not exists on disk: " + regionDir); 1021 } 1022 1023 if (!readOnly) { 1024 // Cleanup temporary directories 1025 regionFs.cleanupTempDir(); 1026 regionFs.cleanupSplitsDir(); 1027 regionFs.cleanupMergesDir(); 1028 1029 // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta 1030 // Only create HRI if we are the default replica 1031 if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 1032 regionFs.checkRegionInfoOnFilesystem(); 1033 } else { 1034 if (LOG.isDebugEnabled()) { 1035 LOG.debug("Skipping creation of .regioninfo file for " + regionInfo); 1036 } 1037 } 1038 } 1039 1040 return regionFs; 1041 } 1042 1043 /** 1044 * Remove the region from the table directory, archiving the region's hfiles. 1045 * @param conf the {@link Configuration} to use 1046 * @param fs {@link FileSystem} from which to remove the region 1047 * @param tableDir {@link Path} to where the table is being stored 1048 * @param regionInfo {@link RegionInfo} for region to be deleted 1049 * @throws IOException if the request cannot be completed 1050 */ 1051 public static void deleteRegionFromFileSystem(final Configuration conf, 1052 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException { 1053 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 1054 Path regionDir = regionFs.getRegionDir(); 1055 1056 if (!fs.exists(regionDir)) { 1057 LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir); 1058 return; 1059 } 1060 1061 if (LOG.isDebugEnabled()) { 1062 LOG.debug("DELETING region " + regionDir); 1063 } 1064 1065 // Archive region 1066 Path rootDir = FSUtils.getRootDir(conf); 1067 HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir); 1068 1069 // Delete empty region dir 1070 if (!fs.delete(regionDir, true)) { 1071 LOG.warn("Failed delete of " + regionDir); 1072 } 1073 } 1074 1075 /** 1076 * Creates a directory. Assumes the user has already checked for this directory existence. 1077 * @param dir 1078 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks 1079 * whether the directory exists or not, and returns true if it exists. 1080 * @throws IOException 1081 */ 1082 boolean createDir(Path dir) throws IOException { 1083 int i = 0; 1084 IOException lastIOE = null; 1085 do { 1086 try { 1087 return mkdirs(fs, conf, dir); 1088 } catch (IOException ioe) { 1089 lastIOE = ioe; 1090 if (fs.exists(dir)) return true; // directory is present 1091 try { 1092 sleepBeforeRetry("Create Directory", i+1); 1093 } catch (InterruptedException e) { 1094 throw (InterruptedIOException)new InterruptedIOException().initCause(e); 1095 } 1096 } 1097 } while (++i <= hdfsClientRetriesNumber); 1098 throw new IOException("Exception in createDir", lastIOE); 1099 } 1100 1101 /** 1102 * Renames a directory. Assumes the user has already checked for this directory existence. 1103 * @param srcpath 1104 * @param dstPath 1105 * @return true if rename is successful. 1106 * @throws IOException 1107 */ 1108 boolean rename(Path srcpath, Path dstPath) throws IOException { 1109 IOException lastIOE = null; 1110 int i = 0; 1111 do { 1112 try { 1113 return fs.rename(srcpath, dstPath); 1114 } catch (IOException ioe) { 1115 lastIOE = ioe; 1116 if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move 1117 // dir is not there, retry after some time. 1118 try { 1119 sleepBeforeRetry("Rename Directory", i+1); 1120 } catch (InterruptedException e) { 1121 throw (InterruptedIOException)new InterruptedIOException().initCause(e); 1122 } 1123 } 1124 } while (++i <= hdfsClientRetriesNumber); 1125 1126 throw new IOException("Exception in rename", lastIOE); 1127 } 1128 1129 /** 1130 * Deletes a directory. Assumes the user has already checked for this directory existence. 1131 * @param dir 1132 * @return true if the directory is deleted. 1133 * @throws IOException 1134 */ 1135 boolean deleteDir(Path dir) throws IOException { 1136 IOException lastIOE = null; 1137 int i = 0; 1138 do { 1139 try { 1140 return fs.delete(dir, true); 1141 } catch (IOException ioe) { 1142 lastIOE = ioe; 1143 if (!fs.exists(dir)) return true; 1144 // dir is there, retry deleting after some time. 1145 try { 1146 sleepBeforeRetry("Delete Directory", i+1); 1147 } catch (InterruptedException e) { 1148 throw (InterruptedIOException)new InterruptedIOException().initCause(e); 1149 } 1150 } 1151 } while (++i <= hdfsClientRetriesNumber); 1152 1153 throw new IOException("Exception in DeleteDir", lastIOE); 1154 } 1155 1156 /** 1157 * sleeping logic; handles the interrupt exception. 1158 */ 1159 private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException { 1160 sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber); 1161 } 1162 1163 /** 1164 * Creates a directory for a filesystem and configuration object. Assumes the user has already 1165 * checked for this directory existence. 1166 * @param fs 1167 * @param conf 1168 * @param dir 1169 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks 1170 * whether the directory exists or not, and returns true if it exists. 1171 * @throws IOException 1172 */ 1173 private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir) 1174 throws IOException { 1175 int i = 0; 1176 IOException lastIOE = null; 1177 int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number", 1178 DEFAULT_HDFS_CLIENT_RETRIES_NUMBER); 1179 int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries", 1180 DEFAULT_BASE_SLEEP_BEFORE_RETRIES); 1181 do { 1182 try { 1183 return fs.mkdirs(dir); 1184 } catch (IOException ioe) { 1185 lastIOE = ioe; 1186 if (fs.exists(dir)) return true; // directory is present 1187 try { 1188 sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber); 1189 } catch (InterruptedException e) { 1190 throw (InterruptedIOException)new InterruptedIOException().initCause(e); 1191 } 1192 } 1193 } while (++i <= hdfsClientRetriesNumber); 1194 1195 throw new IOException("Exception in createDir", lastIOE); 1196 } 1197 1198 /** 1199 * sleeping logic for static methods; handles the interrupt exception. Keeping a static version 1200 * for this to avoid re-looking for the integer values. 1201 */ 1202 private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries, 1203 int hdfsClientRetriesNumber) throws InterruptedException { 1204 if (sleepMultiplier > hdfsClientRetriesNumber) { 1205 if (LOG.isDebugEnabled()) { 1206 LOG.debug(msg + ", retries exhausted"); 1207 } 1208 return; 1209 } 1210 if (LOG.isDebugEnabled()) { 1211 LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier); 1212 } 1213 Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier); 1214 } 1215}