001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.io.HFileLink.LINK_NAME_PATTERN; 021 022import edu.umd.cs.findbugs.annotations.Nullable; 023import java.io.FileNotFoundException; 024import java.io.IOException; 025import java.io.InterruptedIOException; 026import java.util.ArrayList; 027import java.util.Collection; 028import java.util.HashMap; 029import java.util.List; 030import java.util.Map; 031import java.util.Objects; 032import java.util.Optional; 033import java.util.UUID; 034import java.util.regex.Matcher; 035import org.apache.hadoop.conf.Configuration; 036import org.apache.hadoop.fs.FSDataInputStream; 037import org.apache.hadoop.fs.FSDataOutputStream; 038import org.apache.hadoop.fs.FileStatus; 039import org.apache.hadoop.fs.FileSystem; 040import org.apache.hadoop.fs.FileUtil; 041import org.apache.hadoop.fs.LocatedFileStatus; 042import org.apache.hadoop.fs.Path; 043import org.apache.hadoop.fs.permission.FsPermission; 044import org.apache.hadoop.hbase.Cell; 045import org.apache.hadoop.hbase.ExtendedCell; 046import org.apache.hadoop.hbase.HConstants; 047import org.apache.hadoop.hbase.PrivateCellUtil; 048import org.apache.hadoop.hbase.TableName; 049import org.apache.hadoop.hbase.backup.HFileArchiver; 050import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 051import org.apache.hadoop.hbase.client.RegionInfo; 052import org.apache.hadoop.hbase.client.TableDescriptor; 053import org.apache.hadoop.hbase.fs.HFileSystem; 054import org.apache.hadoop.hbase.io.HFileLink; 055import org.apache.hadoop.hbase.io.Reference; 056import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; 057import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 058import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 059import org.apache.hadoop.hbase.util.Bytes; 060import org.apache.hadoop.hbase.util.CommonFSUtils; 061import org.apache.hadoop.hbase.util.FSUtils; 062import org.apache.hadoop.hbase.util.Pair; 063import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; 064import org.apache.yetus.audience.InterfaceAudience; 065import org.slf4j.Logger; 066import org.slf4j.LoggerFactory; 067 068import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 069 070/** 071 * View to an on-disk Region. Provides the set of methods necessary to interact with the on-disk 072 * region data. 073 */ 074@InterfaceAudience.Private 075public class HRegionFileSystem { 076 private static final Logger LOG = LoggerFactory.getLogger(HRegionFileSystem.class); 077 078 /** Name of the region info file that resides just under the region directory. */ 079 public final static String REGION_INFO_FILE = ".regioninfo"; 080 081 /** Temporary subdirectory of the region directory used for merges. */ 082 public static final String REGION_MERGES_DIR = ".merges"; 083 084 /** Temporary subdirectory of the region directory used for splits. */ 085 public static final String REGION_SPLITS_DIR = ".splits"; 086 087 /** Temporary subdirectory of the region directory used for compaction output. */ 088 static final String REGION_TEMP_DIR = ".tmp"; 089 090 private final RegionInfo regionInfo; 091 // regionInfo for interacting with FS (getting encodedName, etc) 092 final RegionInfo regionInfoForFs; 093 final Configuration conf; 094 private final Path tableDir; 095 final FileSystem fs; 096 private final Path regionDir; 097 098 /** 099 * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the 100 * client level. 101 */ 102 private final int hdfsClientRetriesNumber; 103 private final int baseSleepBeforeRetries; 104 private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10; 105 private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000; 106 107 /** 108 * Create a view to the on-disk region 109 * @param conf the {@link Configuration} to use 110 * @param fs {@link FileSystem} that contains the region 111 * @param tableDir {@link Path} to where the table is being stored 112 * @param regionInfo {@link RegionInfo} for region 113 */ 114 HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir, 115 final RegionInfo regionInfo) { 116 this.fs = fs; 117 this.conf = conf; 118 this.tableDir = Objects.requireNonNull(tableDir, "tableDir is null"); 119 this.regionInfo = Objects.requireNonNull(regionInfo, "regionInfo is null"); 120 this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo); 121 this.regionDir = FSUtils.getRegionDirFromTableDir(tableDir, regionInfo); 122 this.hdfsClientRetriesNumber = 123 conf.getInt("hdfs.client.retries.number", DEFAULT_HDFS_CLIENT_RETRIES_NUMBER); 124 this.baseSleepBeforeRetries = 125 conf.getInt("hdfs.client.sleep.before.retries", DEFAULT_BASE_SLEEP_BEFORE_RETRIES); 126 } 127 128 /** Returns the underlying {@link FileSystem} */ 129 public FileSystem getFileSystem() { 130 return this.fs; 131 } 132 133 /** Returns the {@link RegionInfo} that describe this on-disk region view */ 134 public RegionInfo getRegionInfo() { 135 return this.regionInfo; 136 } 137 138 public RegionInfo getRegionInfoForFS() { 139 return this.regionInfoForFs; 140 } 141 142 /** Returns {@link Path} to the region's root directory. */ 143 public Path getTableDir() { 144 return this.tableDir; 145 } 146 147 /** Returns {@link Path} to the region directory. */ 148 public Path getRegionDir() { 149 return regionDir; 150 } 151 152 // =========================================================================== 153 // Temp Helpers 154 // =========================================================================== 155 /** Returns {@link Path} to the region's temp directory, used for file creations */ 156 public Path getTempDir() { 157 return new Path(getRegionDir(), REGION_TEMP_DIR); 158 } 159 160 /** 161 * Clean up any temp detritus that may have been left around from previous operation attempts. 162 */ 163 void cleanupTempDir() throws IOException { 164 deleteDir(getTempDir()); 165 } 166 167 // =========================================================================== 168 // Store/StoreFile Helpers 169 // =========================================================================== 170 /** 171 * Returns the directory path of the specified family 172 * @param familyName Column Family Name 173 * @return {@link Path} to the directory of the specified family 174 */ 175 public Path getStoreDir(final String familyName) { 176 return new Path(this.getRegionDir(), familyName); 177 } 178 179 /** 180 * @param tabledir {@link Path} to where the table is being stored 181 * @param hri {@link RegionInfo} for the region. 182 * @param family {@link ColumnFamilyDescriptor} describing the column family 183 * @return Path to family/Store home directory. 184 */ 185 public static Path getStoreHomedir(final Path tabledir, final RegionInfo hri, 186 final byte[] family) { 187 return getStoreHomedir(tabledir, hri.getEncodedName(), family); 188 } 189 190 /** 191 * @param tabledir {@link Path} to where the table is being stored 192 * @param encodedName Encoded region name. 193 * @param family {@link ColumnFamilyDescriptor} describing the column family 194 * @return Path to family/Store home directory. 195 */ 196 public static Path getStoreHomedir(final Path tabledir, final String encodedName, 197 final byte[] family) { 198 return new Path(tabledir, new Path(encodedName, Bytes.toString(family))); 199 } 200 201 /** 202 * Create the store directory for the specified family name 203 * @param familyName Column Family Name 204 * @return {@link Path} to the directory of the specified family 205 * @throws IOException if the directory creation fails. 206 */ 207 Path createStoreDir(final String familyName) throws IOException { 208 Path storeDir = getStoreDir(familyName); 209 if (!fs.exists(storeDir) && !createDir(storeDir)) 210 throw new IOException("Failed creating " + storeDir); 211 return storeDir; 212 } 213 214 /** 215 * Set the directory of CF to the specified storage policy. <br> 216 * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>, 217 * <i>"COLD"</i> <br> 218 * <br> 219 * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details. 220 * @param familyName The name of column family. 221 * @param policyName The name of the storage policy: 'HOT', 'COLD', etc. See hadoop 2.6+ 222 * org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g 'COLD', 223 * 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'. 224 */ 225 public void setStoragePolicy(String familyName, String policyName) { 226 CommonFSUtils.setStoragePolicy(this.fs, getStoreDir(familyName), policyName); 227 } 228 229 /** 230 * Set storage policy for a whole region. <br> 231 * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>, 232 * <i>"COLD"</i> <br> 233 * <br> 234 * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details. 235 * @param policyName The name of the storage policy: 'HOT', 'COLD', etc. See hadoop 2.6+ 236 * org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g 'COLD', 237 * 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'. 238 */ 239 public void setStoragePolicy(String policyName) { 240 CommonFSUtils.setStoragePolicy(this.fs, getRegionDir(), policyName); 241 } 242 243 /** 244 * Get the storage policy of the directory of CF. 245 * @param familyName The name of column family. 246 * @return Storage policy name, or {@code null} if not using {@link HFileSystem} or exception 247 * thrown when trying to get policy 248 */ 249 @Nullable 250 public String getStoragePolicyName(String familyName) { 251 if (this.fs instanceof HFileSystem) { 252 Path storeDir = getStoreDir(familyName); 253 return ((HFileSystem) this.fs).getStoragePolicyName(storeDir); 254 } 255 256 return null; 257 } 258 259 /** 260 * Returns the store files' LocatedFileStatus which available for the family. This methods 261 * performs the filtering based on the valid store files. 262 * @param familyName Column Family Name 263 * @return a list of store files' LocatedFileStatus for the specified family. 264 */ 265 public static List<LocatedFileStatus> getStoreFilesLocatedStatus(final HRegionFileSystem regionfs, 266 final String familyName, final boolean validate) throws IOException { 267 Path familyDir = regionfs.getStoreDir(familyName); 268 List<LocatedFileStatus> locatedFileStatuses = 269 CommonFSUtils.listLocatedStatus(regionfs.getFileSystem(), familyDir); 270 if (locatedFileStatuses == null) { 271 if (LOG.isTraceEnabled()) { 272 LOG.trace("No StoreFiles for: " + familyDir); 273 } 274 return null; 275 } 276 277 List<LocatedFileStatus> validStoreFiles = Lists.newArrayList(); 278 for (LocatedFileStatus status : locatedFileStatuses) { 279 if (validate && !StoreFileInfo.isValid(status)) { 280 // recovered.hfiles directory is expected inside CF path when hbase.wal.split.to.hfile to 281 // true, refer HBASE-23740 282 if (!HConstants.RECOVERED_HFILES_DIR.equals(status.getPath().getName())) { 283 LOG.warn("Invalid StoreFile: {}", status.getPath()); 284 } 285 } else { 286 validStoreFiles.add(status); 287 } 288 } 289 return validStoreFiles; 290 } 291 292 /** 293 * Return Qualified Path of the specified family/file 294 * @param familyName Column Family Name 295 * @param fileName File Name 296 * @return The qualified Path for the specified family/file 297 */ 298 Path getStoreFilePath(final String familyName, final String fileName) { 299 Path familyDir = getStoreDir(familyName); 300 return new Path(familyDir, fileName).makeQualified(fs.getUri(), fs.getWorkingDirectory()); 301 } 302 303 /** 304 * Return the store file information of the specified family/file. 305 * @param familyName Column Family Name 306 * @param fileName File Name 307 * @return The {@link StoreFileInfo} for the specified family/file 308 */ 309 StoreFileInfo getStoreFileInfo(final String familyName, final String fileName, 310 final StoreFileTracker tracker) throws IOException { 311 Path familyDir = getStoreDir(familyName); 312 return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo, regionInfoForFs, 313 familyName, new Path(familyDir, fileName), tracker); 314 } 315 316 /** Returns the set of families present on disk n */ 317 public Collection<String> getFamilies() throws IOException { 318 FileStatus[] fds = 319 CommonFSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs)); 320 if (fds == null) return null; 321 322 ArrayList<String> families = new ArrayList<>(fds.length); 323 for (FileStatus status : fds) { 324 families.add(status.getPath().getName()); 325 } 326 327 return families; 328 } 329 330 /** 331 * Remove the region family from disk, archiving the store files. 332 * @param familyName Column Family Name 333 * @throws IOException if an error occours during the archiving 334 */ 335 public void deleteFamily(final String familyName) throws IOException { 336 // archive family store files 337 HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName)); 338 339 // delete the family folder 340 Path familyDir = getStoreDir(familyName); 341 if (fs.exists(familyDir) && !deleteDir(familyDir)) 342 throw new IOException("Could not delete family " + familyName + " from FileSystem for region " 343 + regionInfoForFs.getRegionNameAsString() + "(" + regionInfoForFs.getEncodedName() + ")"); 344 } 345 346 /** 347 * Generate a unique file name, used by createTempName() and commitStoreFile() 348 * @param suffix extra information to append to the generated name 349 * @return Unique file name 350 */ 351 private static String generateUniqueName(final String suffix) { 352 String name = UUID.randomUUID().toString().replaceAll("-", ""); 353 if (suffix != null) name += suffix; 354 return name; 355 } 356 357 /** 358 * Generate a unique temporary Path. Used in conjuction with commitStoreFile() to get a safer file 359 * creation. <code> 360 * Path file = fs.createTempName(); 361 * ...StoreFile.Writer(file)... 362 * fs.commitStoreFile("family", file); 363 * </code> 364 * @return Unique {@link Path} of the temporary file 365 */ 366 public Path createTempName() { 367 return createTempName(null); 368 } 369 370 /** 371 * Generate a unique temporary Path. Used in conjuction with commitStoreFile() to get a safer file 372 * creation. <code> 373 * Path file = fs.createTempName(); 374 * ...StoreFile.Writer(file)... 375 * fs.commitStoreFile("family", file); 376 * </code> 377 * @param suffix extra information to append to the generated name 378 * @return Unique {@link Path} of the temporary file 379 */ 380 public Path createTempName(final String suffix) { 381 return new Path(getTempDir(), generateUniqueName(suffix)); 382 } 383 384 /** 385 * Move the file from a build/temp location to the main family store directory. 386 * @param familyName Family that will gain the file 387 * @param buildPath {@link Path} to the file to commit. 388 * @return The new {@link Path} of the committed file 389 */ 390 public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException { 391 Path dstPath = preCommitStoreFile(familyName, buildPath, -1, false); 392 return commitStoreFile(buildPath, dstPath); 393 } 394 395 /** 396 * Generate the filename in the main family store directory for moving the file from a build/temp 397 * location. 398 * @param familyName Family that will gain the file 399 * @param buildPath {@link Path} to the file to commit. 400 * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence 401 * number) 402 * @param generateNewName False if you want to keep the buildPath name 403 * @return The new {@link Path} of the to be committed file 404 */ 405 private Path preCommitStoreFile(final String familyName, final Path buildPath, final long seqNum, 406 final boolean generateNewName) throws IOException { 407 Path storeDir = getStoreDir(familyName); 408 if (!fs.exists(storeDir) && !createDir(storeDir)) 409 throw new IOException("Failed creating " + storeDir); 410 411 String name = buildPath.getName(); 412 if (generateNewName) { 413 name = generateUniqueName((seqNum < 0) ? null : StoreFileInfo.formatBulkloadSeqId(seqNum)); 414 } 415 Path dstPath = new Path(storeDir, name); 416 if (!fs.exists(buildPath)) { 417 throw new FileNotFoundException(buildPath.toString()); 418 } 419 if (LOG.isDebugEnabled()) { 420 LOG.debug("Committing " + buildPath + " as " + dstPath); 421 } 422 return dstPath; 423 } 424 425 /* 426 * Moves file from staging dir to region dir 427 * @param buildPath {@link Path} to the file to commit. 428 * @param dstPath {@link Path} to the file under region dir 429 * @return The {@link Path} of the committed file 430 */ 431 Path commitStoreFile(final Path buildPath, Path dstPath) throws IOException { 432 // rename is not necessary in case of direct-insert stores 433 if (buildPath.equals(dstPath)) { 434 return dstPath; 435 } 436 // buildPath exists, therefore not doing an exists() check. 437 if (!rename(buildPath, dstPath)) { 438 throw new IOException("Failed rename of " + buildPath + " to " + dstPath); 439 } 440 return dstPath; 441 } 442 443 /** 444 * Bulk load: Add a specified store file to the specified family. If the source file is on the 445 * same different file-system is moved from the source location to the destination location, 446 * otherwise is copied over. 447 * @param familyName Family that will gain the file 448 * @param srcPath {@link Path} to the file to import 449 * @param seqNum Bulk Load sequence number 450 * @return The destination {@link Path} of the bulk loaded file 451 */ 452 Pair<Path, Path> bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum) 453 throws IOException { 454 // Copy the file if it's on another filesystem 455 FileSystem srcFs = srcPath.getFileSystem(conf); 456 srcPath = srcFs.resolvePath(srcPath); 457 FileSystem realSrcFs = srcPath.getFileSystem(conf); 458 FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem) fs).getBackingFs() : fs; 459 460 // We can't compare FileSystem instances as equals() includes UGI instance 461 // as part of the comparison and won't work when doing SecureBulkLoad 462 // TODO deal with viewFS 463 if (!FSUtils.isSameHdfs(conf, realSrcFs, desFs)) { 464 LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " 465 + "the destination store. Copying file over to destination filesystem."); 466 Path tmpPath = createTempName(); 467 FileUtil.copy(realSrcFs, srcPath, fs, tmpPath, false, conf); 468 LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath); 469 srcPath = tmpPath; 470 } 471 472 return new Pair<>(srcPath, preCommitStoreFile(familyName, srcPath, seqNum, true)); 473 } 474 475 // =========================================================================== 476 // Splits Helpers 477 // =========================================================================== 478 479 public Path getSplitsDir(final RegionInfo hri) { 480 return new Path(getTableDir(), hri.getEncodedName()); 481 } 482 483 /** 484 * Remove daughter region 485 * @param regionInfo daughter {@link RegionInfo} 486 */ 487 void cleanupDaughterRegion(final RegionInfo regionInfo) throws IOException { 488 Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName()); 489 if (this.fs.exists(regionDir) && !deleteDir(regionDir)) { 490 throw new IOException("Failed delete of " + regionDir); 491 } 492 } 493 494 /** 495 * Commit a daughter region, moving it from the split temporary directory to the proper location 496 * in the filesystem. 497 * @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo} 498 */ 499 public Path commitDaughterRegion(final RegionInfo regionInfo, List<Path> allRegionFiles, 500 MasterProcedureEnv env) throws IOException { 501 Path regionDir = this.getSplitsDir(regionInfo); 502 if (fs.exists(regionDir)) { 503 // Write HRI to a file in case we need to recover hbase:meta 504 Path regionInfoFile = new Path(regionDir, REGION_INFO_FILE); 505 byte[] regionInfoContent = getRegionInfoFileContent(regionInfo); 506 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent); 507 HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem( 508 env.getMasterConfiguration(), fs, getTableDir(), regionInfo, false); 509 insertRegionFilesIntoStoreTracker(allRegionFiles, env, regionFs); 510 } 511 return regionDir; 512 } 513 514 private void insertRegionFilesIntoStoreTracker(List<Path> allFiles, MasterProcedureEnv env, 515 HRegionFileSystem regionFs) throws IOException { 516 TableDescriptor tblDesc = 517 env.getMasterServices().getTableDescriptors().get(regionInfo.getTable()); 518 // we need to map trackers per store 519 Map<String, StoreFileTracker> trackerMap = new HashMap<>(); 520 // we need to map store files per store 521 Map<String, List<StoreFileInfo>> fileInfoMap = new HashMap<>(); 522 for (Path file : allFiles) { 523 String familyName = file.getParent().getName(); 524 trackerMap.computeIfAbsent(familyName, t -> StoreFileTrackerFactory.create(conf, tblDesc, 525 tblDesc.getColumnFamily(Bytes.toBytes(familyName)), regionFs)); 526 fileInfoMap.computeIfAbsent(familyName, l -> new ArrayList<>()); 527 List<StoreFileInfo> infos = fileInfoMap.get(familyName); 528 infos.add(trackerMap.get(familyName).getStoreFileInfo(file, true)); 529 } 530 for (Map.Entry<String, StoreFileTracker> entry : trackerMap.entrySet()) { 531 entry.getValue().add(fileInfoMap.get(entry.getKey())); 532 } 533 } 534 535 /** 536 * Creates region split daughter directories under the table dir. If the daughter regions already 537 * exist, for example, in the case of a recovery from a previous failed split procedure, this 538 * method deletes the given region dir recursively, then recreates it again. 539 */ 540 public void createSplitsDir(RegionInfo daughterA, RegionInfo daughterB) throws IOException { 541 Path daughterADir = getSplitsDir(daughterA); 542 if (fs.exists(daughterADir) && !deleteDir(daughterADir)) { 543 throw new IOException("Failed deletion of " + daughterADir + " before creating them again."); 544 545 } 546 if (!createDir(daughterADir)) { 547 throw new IOException("Failed create of " + daughterADir); 548 } 549 Path daughterBDir = getSplitsDir(daughterB); 550 if (fs.exists(daughterBDir) && !deleteDir(daughterBDir)) { 551 throw new IOException("Failed deletion of " + daughterBDir + " before creating them again."); 552 553 } 554 if (!createDir(daughterBDir)) { 555 throw new IOException("Failed create of " + daughterBDir); 556 } 557 } 558 559 /** 560 * Write out a split reference. Package local so it doesnt leak out of regionserver. 561 * @param hri {@link RegionInfo} of the destination 562 * @param familyName Column Family Name 563 * @param f File to split. 564 * @param splitRow Split Row 565 * @param top True if we are referring to the top half of the hfile. 566 * @param splitPolicy A split policy instance; be careful! May not be full populated; e.g. if this 567 * method is invoked on the Master side, then the RegionSplitPolicy will NOT 568 * have a reference to a Region. 569 * @return Path to created reference. 570 */ 571 public Path splitStoreFile(RegionInfo hri, String familyName, HStoreFile f, byte[] splitRow, 572 boolean top, RegionSplitPolicy splitPolicy, StoreFileTracker tracker) throws IOException { 573 Path splitDir = new Path(getSplitsDir(hri), familyName); 574 // Add the referred-to regions name as a dot separated suffix. 575 // See REF_NAME_REGEX regex above. The referred-to regions name is 576 // up in the path of the passed in <code>f</code> -- parentdir is family, 577 // then the directory above is the region name. 578 String parentRegionName = regionInfoForFs.getEncodedName(); 579 // Write reference with same file id only with the other region name as 580 // suffix and into the new region location (under same family). 581 Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName); 582 if (fs.exists(p)) { 583 LOG.warn("Found an already existing split file for {}. Assuming this is a recovery.", p); 584 return p; 585 } 586 boolean createLinkFile = false; 587 if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) { 588 // Check whether the split row lies in the range of the store file 589 // If it is outside the range, return directly. 590 f.initReader(); 591 try { 592 Cell splitKey = PrivateCellUtil.createFirstOnRow(splitRow); 593 Optional<ExtendedCell> lastKey = f.getLastKey(); 594 Optional<ExtendedCell> firstKey = f.getFirstKey(); 595 if (top) { 596 // check if larger than last key. 597 // If lastKey is null means storefile is empty. 598 if (!lastKey.isPresent()) { 599 return null; 600 } 601 if (f.getComparator().compare(splitKey, lastKey.get()) > 0) { 602 return null; 603 } 604 if (firstKey.isPresent() && f.getComparator().compare(splitKey, firstKey.get()) <= 0) { 605 LOG.debug("Will create HFileLink file for {}, top=true", f.getPath()); 606 createLinkFile = true; 607 } 608 } else { 609 // check if smaller than first key 610 // If firstKey is null means storefile is empty. 611 if (!firstKey.isPresent()) { 612 return null; 613 } 614 if (f.getComparator().compare(splitKey, firstKey.get()) < 0) { 615 return null; 616 } 617 if (lastKey.isPresent() && f.getComparator().compare(splitKey, lastKey.get()) >= 0) { 618 LOG.debug("Will create HFileLink file for {}, top=false", f.getPath()); 619 createLinkFile = true; 620 } 621 } 622 } finally { 623 f.closeStoreFile(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true); 624 } 625 } 626 if (createLinkFile) { 627 // create HFileLink file instead of Reference file for child 628 String hfileName = f.getPath().getName(); 629 TableName linkedTable = regionInfoForFs.getTable(); 630 String linkedRegion = regionInfoForFs.getEncodedName(); 631 try { 632 if (HFileLink.isHFileLink(hfileName)) { 633 Matcher m = LINK_NAME_PATTERN.matcher(hfileName); 634 if (!m.matches()) { 635 throw new IllegalArgumentException(hfileName + " is not a valid HFileLink name!"); 636 } 637 linkedTable = TableName.valueOf(m.group(1), m.group(2)); 638 linkedRegion = m.group(3); 639 hfileName = m.group(4); 640 } 641 // must create back reference here 642 tracker.createHFileLink(linkedTable, linkedRegion, hfileName, true); 643 Path path = 644 new Path(splitDir, HFileLink.createHFileLinkName(linkedTable, linkedRegion, hfileName)); 645 LOG.info("Created linkFile:" + path.toString() + " for child: " + hri.getEncodedName() 646 + ", parent: " + regionInfoForFs.getEncodedName()); 647 return path; 648 } catch (IOException e) { 649 // if create HFileLink file failed, then just skip the error and create Reference file 650 LOG.error("Create link file for " + hfileName + " for child " + hri.getEncodedName() 651 + "failed, will create Reference file", e); 652 } 653 } 654 // A reference to the bottom half of the hsf store file. 655 Reference r = 656 top ? Reference.createTopReference(splitRow) : Reference.createBottomReference(splitRow); 657 tracker.createReference(r, p); 658 return p; 659 } 660 661 // =========================================================================== 662 // Merge Helpers 663 // =========================================================================== 664 665 Path getMergesDir(final RegionInfo hri) { 666 return new Path(getTableDir(), hri.getEncodedName()); 667 } 668 669 /** 670 * Remove merged region 671 * @param mergedRegion {@link RegionInfo} 672 */ 673 public void cleanupMergedRegion(final RegionInfo mergedRegion) throws IOException { 674 Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName()); 675 if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) { 676 throw new IOException("Failed delete of " + regionDir); 677 } 678 } 679 680 static boolean mkdirs(FileSystem fs, Configuration conf, Path dir) throws IOException { 681 if ( 682 FSUtils.isDistributedFileSystem(fs) 683 || !conf.getBoolean(HConstants.ENABLE_DATA_FILE_UMASK, false) 684 ) { 685 return fs.mkdirs(dir); 686 } 687 FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY); 688 return fs.mkdirs(dir, perms); 689 } 690 691 /** 692 * Write out a merge reference under the given merges directory. 693 * @param mergingRegion {@link RegionInfo} for one of the regions being merged. 694 * @param familyName Column Family Name 695 * @param f File to create reference. 696 * @return Path to created reference. 697 * @throws IOException if the merge write fails. 698 */ 699 public Path mergeStoreFile(RegionInfo mergingRegion, String familyName, HStoreFile f, 700 StoreFileTracker tracker) throws IOException { 701 Path referenceDir = new Path(getMergesDir(regionInfoForFs), familyName); 702 // A whole reference to the store file. 703 Reference r = Reference.createTopReference(mergingRegion.getStartKey()); 704 // Add the referred-to regions name as a dot separated suffix. 705 // See REF_NAME_REGEX regex above. The referred-to regions name is 706 // up in the path of the passed in <code>f</code> -- parentdir is family, 707 // then the directory above is the region name. 708 String mergingRegionName = mergingRegion.getEncodedName(); 709 // Write reference with same file id only with the other region name as 710 // suffix and into the new region location (under same family). 711 Path p = new Path(referenceDir, f.getPath().getName() + "." + mergingRegionName); 712 tracker.createReference(r, p); 713 return p; 714 } 715 716 /** 717 * Commit a merged region, making it ready for use. 718 */ 719 public void commitMergedRegion(List<Path> allMergedFiles, MasterProcedureEnv env) 720 throws IOException { 721 Path regionDir = getMergesDir(regionInfoForFs); 722 if (regionDir != null && fs.exists(regionDir)) { 723 // Write HRI to a file in case we need to recover hbase:meta 724 Path regionInfoFile = new Path(regionDir, REGION_INFO_FILE); 725 byte[] regionInfoContent = getRegionInfoFileContent(regionInfo); 726 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent); 727 insertRegionFilesIntoStoreTracker(allMergedFiles, env, this); 728 } 729 } 730 731 // =========================================================================== 732 // Create/Open/Delete Helpers 733 // =========================================================================== 734 735 /** Returns Content of the file we write out to the filesystem under a region */ 736 private static byte[] getRegionInfoFileContent(final RegionInfo hri) throws IOException { 737 return RegionInfo.toDelimitedByteArray(hri); 738 } 739 740 /** 741 * Create a {@link RegionInfo} from the serialized version on-disk. 742 * @param fs {@link FileSystem} that contains the Region Info file 743 * @param regionDir {@link Path} to the Region Directory that contains the Info file 744 * @return An {@link RegionInfo} instance gotten from the Region Info file. 745 * @throws IOException if an error occurred during file open/read operation. 746 */ 747 public static RegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir) 748 throws IOException { 749 FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE)); 750 try { 751 return RegionInfo.parseFrom(in); 752 } finally { 753 in.close(); 754 } 755 } 756 757 /** 758 * Write the .regioninfo file on-disk. 759 * <p/> 760 * Overwrites if exists already. 761 */ 762 private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs, 763 final Path regionInfoFile, final byte[] content) throws IOException { 764 // First check to get the permissions 765 FsPermission perms = CommonFSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY); 766 // Write the RegionInfo file content 767 try (FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null)) { 768 out.write(content); 769 } 770 } 771 772 /** 773 * Write out an info file under the stored region directory. Useful recovering mangled regions. If 774 * the regionInfo already exists on-disk, then we fast exit. 775 */ 776 void checkRegionInfoOnFilesystem() throws IOException { 777 // Compose the content of the file so we can compare to length in filesystem. If not same, 778 // rewrite it (it may have been written in the old format using Writables instead of pb). The 779 // pb version is much shorter -- we write now w/o the toString version -- so checking length 780 // only should be sufficient. I don't want to read the file every time to check if it pb 781 // serialized. 782 byte[] content = getRegionInfoFileContent(regionInfoForFs); 783 784 // Verify if the region directory exists before opening a region. We need to do this since if 785 // the region directory doesn't exist we will re-create the region directory and a new HRI 786 // when HRegion.openHRegion() is called. 787 try { 788 FileStatus status = fs.getFileStatus(getRegionDir()); 789 } catch (FileNotFoundException e) { 790 LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() 791 + " on table " + regionInfo.getTable()); 792 } 793 794 try { 795 Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE); 796 FileStatus status = fs.getFileStatus(regionInfoFile); 797 if (status != null && status.getLen() == content.length) { 798 // Then assume the content good and move on. 799 // NOTE: that the length is not sufficient to define the the content matches. 800 return; 801 } 802 803 LOG.info("Rewriting .regioninfo file at: " + regionInfoFile); 804 if (!fs.delete(regionInfoFile, false)) { 805 throw new IOException("Unable to remove existing " + regionInfoFile); 806 } 807 } catch (FileNotFoundException e) { 808 LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() 809 + " on table " + regionInfo.getTable()); 810 } 811 812 // Write HRI to a file in case we need to recover hbase:meta 813 writeRegionInfoOnFilesystem(content, true); 814 } 815 816 /** 817 * Write out an info file under the region directory. Useful recovering mangled regions. 818 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation. 819 */ 820 private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException { 821 byte[] content = getRegionInfoFileContent(regionInfoForFs); 822 writeRegionInfoOnFilesystem(content, useTempDir); 823 } 824 825 /** 826 * Write out an info file under the region directory. Useful recovering mangled regions. 827 * @param regionInfoContent serialized version of the {@link RegionInfo} 828 * @param useTempDir indicate whether or not using the region .tmp dir for a safer file 829 * creation. 830 */ 831 private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent, final boolean useTempDir) 832 throws IOException { 833 Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE); 834 if (useTempDir) { 835 // Create in tmpDir and then move into place in case we crash after 836 // create but before close. If we don't successfully close the file, 837 // subsequent region reopens will fail the below because create is 838 // registered in NN. 839 840 // And then create the file 841 Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE); 842 843 // If datanode crashes or if the RS goes down just before the close is called while trying to 844 // close the created regioninfo file in the .tmp directory then on next 845 // creation we will be getting AlreadyCreatedException. 846 // Hence delete and create the file if exists. 847 if (CommonFSUtils.isExists(fs, tmpPath)) { 848 CommonFSUtils.delete(fs, tmpPath, true); 849 } 850 851 // Write HRI to a file in case we need to recover hbase:meta 852 writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent); 853 854 // Move the created file to the original path 855 if (fs.exists(tmpPath) && !rename(tmpPath, regionInfoFile)) { 856 throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile); 857 } 858 } else { 859 // Write HRI to a file in case we need to recover hbase:meta 860 writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent); 861 } 862 } 863 864 /** 865 * Create a new Region on file-system. 866 * @param conf the {@link Configuration} to use 867 * @param fs {@link FileSystem} from which to add the region 868 * @param tableDir {@link Path} to where the table is being stored 869 * @param regionInfo {@link RegionInfo} for region to be added 870 * @throws IOException if the region creation fails due to a FileSystem exception. 871 */ 872 public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf, 873 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException { 874 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 875 876 // We only create a .regioninfo and the region directory if this is the default region replica 877 if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 878 Path regionDir = regionFs.getRegionDir(); 879 if (fs.exists(regionDir)) { 880 LOG.warn("Trying to create a region that already exists on disk: " + regionDir); 881 } else { 882 // Create the region directory 883 if (!createDirOnFileSystem(fs, conf, regionDir)) { 884 LOG.warn("Unable to create the region directory: " + regionDir); 885 throw new IOException("Unable to create region directory: " + regionDir); 886 } 887 } 888 889 // Write HRI to a file in case we need to recover hbase:meta 890 regionFs.writeRegionInfoOnFilesystem(false); 891 } else { 892 if (LOG.isDebugEnabled()) 893 LOG.debug("Skipping creation of .regioninfo file for " + regionInfo); 894 } 895 return regionFs; 896 } 897 898 /** 899 * Open Region from file-system. 900 * @param conf the {@link Configuration} to use 901 * @param fs {@link FileSystem} from which to add the region 902 * @param tableDir {@link Path} to where the table is being stored 903 * @param regionInfo {@link RegionInfo} for region to be added 904 * @param readOnly True if you don't want to edit the region data 905 * @throws IOException if the region creation fails due to a FileSystem exception. 906 */ 907 public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf, 908 final FileSystem fs, final Path tableDir, final RegionInfo regionInfo, boolean readOnly) 909 throws IOException { 910 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 911 Path regionDir = regionFs.getRegionDir(); 912 913 if (!fs.exists(regionDir)) { 914 LOG.warn("Trying to open a region that do not exists on disk: " + regionDir); 915 throw new IOException("The specified region do not exists on disk: " + regionDir); 916 } 917 918 if (!readOnly) { 919 // Cleanup temporary directories 920 regionFs.cleanupTempDir(); 921 922 // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta 923 // Only create HRI if we are the default replica 924 if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 925 regionFs.checkRegionInfoOnFilesystem(); 926 } else { 927 if (LOG.isDebugEnabled()) { 928 LOG.debug("Skipping creation of .regioninfo file for " + regionInfo); 929 } 930 } 931 } 932 933 return regionFs; 934 } 935 936 /** 937 * Remove the region from the table directory, archiving the region's hfiles. 938 * @param conf the {@link Configuration} to use 939 * @param fs {@link FileSystem} from which to remove the region 940 * @param tableDir {@link Path} to where the table is being stored 941 * @param regionInfo {@link RegionInfo} for region to be deleted 942 * @throws IOException if the request cannot be completed 943 */ 944 public static void deleteRegionFromFileSystem(final Configuration conf, final FileSystem fs, 945 final Path tableDir, final RegionInfo regionInfo) throws IOException { 946 HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo); 947 Path regionDir = regionFs.getRegionDir(); 948 949 if (!fs.exists(regionDir)) { 950 LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir); 951 return; 952 } 953 954 if (LOG.isDebugEnabled()) { 955 LOG.debug("DELETING region " + regionDir); 956 } 957 958 // Archive region 959 Path rootDir = CommonFSUtils.getRootDir(conf); 960 HFileArchiver.archiveRegion(conf, fs, rootDir, tableDir, regionDir); 961 962 // Delete empty region dir 963 if (!fs.delete(regionDir, true)) { 964 LOG.warn("Failed delete of " + regionDir); 965 } 966 } 967 968 /** 969 * Creates a directory. Assumes the user has already checked for this directory existence. 970 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks 971 * whether the directory exists or not, and returns true if it exists. 972 */ 973 boolean createDir(Path dir) throws IOException { 974 int i = 0; 975 IOException lastIOE = null; 976 do { 977 try { 978 return mkdirs(fs, conf, dir); 979 } catch (IOException ioe) { 980 lastIOE = ioe; 981 if (fs.exists(dir)) return true; // directory is present 982 try { 983 sleepBeforeRetry("Create Directory", i + 1); 984 } catch (InterruptedException e) { 985 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 986 } 987 } 988 } while (++i <= hdfsClientRetriesNumber); 989 throw new IOException("Exception in createDir", lastIOE); 990 } 991 992 /** 993 * Renames a directory. Assumes the user has already checked for this directory existence. 994 * @return true if rename is successful. 995 */ 996 boolean rename(Path srcpath, Path dstPath) throws IOException { 997 IOException lastIOE = null; 998 int i = 0; 999 do { 1000 try { 1001 return fs.rename(srcpath, dstPath); 1002 } catch (IOException ioe) { 1003 lastIOE = ioe; 1004 if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move 1005 // dir is not there, retry after some time. 1006 try { 1007 sleepBeforeRetry("Rename Directory", i + 1); 1008 } catch (InterruptedException e) { 1009 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 1010 } 1011 } 1012 } while (++i <= hdfsClientRetriesNumber); 1013 1014 throw new IOException("Exception in rename", lastIOE); 1015 } 1016 1017 /** 1018 * Deletes a directory. Assumes the user has already checked for this directory existence. 1019 * @return true if the directory is deleted. 1020 */ 1021 boolean deleteDir(Path dir) throws IOException { 1022 IOException lastIOE = null; 1023 int i = 0; 1024 do { 1025 try { 1026 return fs.delete(dir, true); 1027 } catch (IOException ioe) { 1028 lastIOE = ioe; 1029 if (!fs.exists(dir)) return true; 1030 // dir is there, retry deleting after some time. 1031 try { 1032 sleepBeforeRetry("Delete Directory", i + 1); 1033 } catch (InterruptedException e) { 1034 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 1035 } 1036 } 1037 } while (++i <= hdfsClientRetriesNumber); 1038 1039 throw new IOException("Exception in DeleteDir", lastIOE); 1040 } 1041 1042 /** 1043 * sleeping logic; handles the interrupt exception. 1044 */ 1045 private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException { 1046 sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber); 1047 } 1048 1049 /** 1050 * Creates a directory for a filesystem and configuration object. Assumes the user has already 1051 * checked for this directory existence. 1052 * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks 1053 * whether the directory exists or not, and returns true if it exists. 1054 */ 1055 private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir) 1056 throws IOException { 1057 int i = 0; 1058 IOException lastIOE = null; 1059 int hdfsClientRetriesNumber = 1060 conf.getInt("hdfs.client.retries.number", DEFAULT_HDFS_CLIENT_RETRIES_NUMBER); 1061 int baseSleepBeforeRetries = 1062 conf.getInt("hdfs.client.sleep.before.retries", DEFAULT_BASE_SLEEP_BEFORE_RETRIES); 1063 do { 1064 try { 1065 return fs.mkdirs(dir); 1066 } catch (IOException ioe) { 1067 lastIOE = ioe; 1068 if (fs.exists(dir)) return true; // directory is present 1069 try { 1070 sleepBeforeRetry("Create Directory", i + 1, baseSleepBeforeRetries, 1071 hdfsClientRetriesNumber); 1072 } catch (InterruptedException e) { 1073 throw (InterruptedIOException) new InterruptedIOException().initCause(e); 1074 } 1075 } 1076 } while (++i <= hdfsClientRetriesNumber); 1077 1078 throw new IOException("Exception in createDir", lastIOE); 1079 } 1080 1081 /** 1082 * sleeping logic for static methods; handles the interrupt exception. Keeping a static version 1083 * for this to avoid re-looking for the integer values. 1084 */ 1085 private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries, 1086 int hdfsClientRetriesNumber) throws InterruptedException { 1087 if (sleepMultiplier > hdfsClientRetriesNumber) { 1088 if (LOG.isDebugEnabled()) { 1089 LOG.debug(msg + ", retries exhausted"); 1090 } 1091 return; 1092 } 1093 if (LOG.isDebugEnabled()) { 1094 LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier); 1095 } 1096 Thread.sleep((long) baseSleepBeforeRetries * sleepMultiplier); 1097 } 1098 1099 public static HRegionFileSystem create(final Configuration conf, final FileSystem fs, 1100 final Path tableDir, final RegionInfo regionInfo) throws IOException { 1101 return new HRegionFileSystem(conf, fs, tableDir, regionInfo); 1102 } 1103}