001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.FileNotFoundException;
021import java.io.IOException;
022import java.io.InterruptedIOException;
023import java.util.ArrayList;
024import java.util.Collection;
025import java.util.List;
026import java.util.Objects;
027import java.util.Optional;
028import java.util.UUID;
029
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.fs.FSDataInputStream;
032import org.apache.hadoop.fs.FSDataOutputStream;
033import org.apache.hadoop.fs.FileStatus;
034import org.apache.hadoop.fs.FileSystem;
035import org.apache.hadoop.fs.FileUtil;
036import org.apache.hadoop.fs.LocatedFileStatus;
037import org.apache.hadoop.fs.Path;
038import org.apache.hadoop.fs.permission.FsPermission;
039import org.apache.hadoop.hbase.Cell;
040import org.apache.hadoop.hbase.HConstants;
041import org.apache.hadoop.hbase.PrivateCellUtil;
042import org.apache.hadoop.hbase.backup.HFileArchiver;
043import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
044import org.apache.hadoop.hbase.client.RegionInfo;
045import org.apache.hadoop.hbase.client.TableDescriptor;
046import org.apache.hadoop.hbase.fs.HFileSystem;
047import org.apache.hadoop.hbase.io.Reference;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.FSHDFSUtils;
050import org.apache.hadoop.hbase.util.FSUtils;
051import org.apache.hadoop.hbase.util.Pair;
052import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
053import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
054import org.apache.yetus.audience.InterfaceAudience;
055import org.slf4j.Logger;
056import org.slf4j.LoggerFactory;
057import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
058
059import edu.umd.cs.findbugs.annotations.Nullable;
060
061/**
062 * View to an on-disk Region.
063 * Provides the set of methods necessary to interact with the on-disk region data.
064 */
065@InterfaceAudience.Private
066public class HRegionFileSystem {
067  private static final Logger LOG = LoggerFactory.getLogger(HRegionFileSystem.class);
068
069  /** Name of the region info file that resides just under the region directory. */
070  public final static String REGION_INFO_FILE = ".regioninfo";
071
072  /** Temporary subdirectory of the region directory used for merges. */
073  public static final String REGION_MERGES_DIR = ".merges";
074
075  /** Temporary subdirectory of the region directory used for splits. */
076  public static final String REGION_SPLITS_DIR = ".splits";
077
078  /** Temporary subdirectory of the region directory used for compaction output. */
079  @VisibleForTesting static final String REGION_TEMP_DIR = ".tmp";
080
081  private final RegionInfo regionInfo;
082  //regionInfo for interacting with FS (getting encodedName, etc)
083  final RegionInfo regionInfoForFs;
084  final Configuration conf;
085  private final Path tableDir;
086  final FileSystem fs;
087  private final Path regionDir;
088
089  /**
090   * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
091   * client level.
092   */
093  private final int hdfsClientRetriesNumber;
094  private final int baseSleepBeforeRetries;
095  private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
096  private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
097
098  /**
099   * Create a view to the on-disk region
100   * @param conf the {@link Configuration} to use
101   * @param fs {@link FileSystem} that contains the region
102   * @param tableDir {@link Path} to where the table is being stored
103   * @param regionInfo {@link RegionInfo} for region
104   */
105  HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
106      final RegionInfo regionInfo) {
107    this.fs = fs;
108    this.conf = conf;
109    this.tableDir = Objects.requireNonNull(tableDir, "tableDir is null");
110    this.regionInfo = Objects.requireNonNull(regionInfo, "regionInfo is null");
111    this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo);
112    this.regionDir = FSUtils.getRegionDirFromTableDir(tableDir, regionInfo);
113    this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
114      DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
115    this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
116      DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
117 }
118
119  /** @return the underlying {@link FileSystem} */
120  public FileSystem getFileSystem() {
121    return this.fs;
122  }
123
124  /** @return the {@link RegionInfo} that describe this on-disk region view */
125  public RegionInfo getRegionInfo() {
126    return this.regionInfo;
127  }
128
129  public RegionInfo getRegionInfoForFS() {
130    return this.regionInfoForFs;
131  }
132
133  /** @return {@link Path} to the region's root directory. */
134  public Path getTableDir() {
135    return this.tableDir;
136  }
137
138  /** @return {@link Path} to the region directory. */
139  public Path getRegionDir() {
140    return regionDir;
141  }
142
143  // ===========================================================================
144  //  Temp Helpers
145  // ===========================================================================
146  /** @return {@link Path} to the region's temp directory, used for file creations */
147  Path getTempDir() {
148    return new Path(getRegionDir(), REGION_TEMP_DIR);
149  }
150
151  /**
152   * Clean up any temp detritus that may have been left around from previous operation attempts.
153   */
154  void cleanupTempDir() throws IOException {
155    deleteDir(getTempDir());
156  }
157
158  // ===========================================================================
159  //  Store/StoreFile Helpers
160  // ===========================================================================
161  /**
162   * Returns the directory path of the specified family
163   * @param familyName Column Family Name
164   * @return {@link Path} to the directory of the specified family
165   */
166  public Path getStoreDir(final String familyName) {
167    return new Path(this.getRegionDir(), familyName);
168  }
169
170  /**
171   * Create the store directory for the specified family name
172   * @param familyName Column Family Name
173   * @return {@link Path} to the directory of the specified family
174   * @throws IOException if the directory creation fails.
175   */
176  Path createStoreDir(final String familyName) throws IOException {
177    Path storeDir = getStoreDir(familyName);
178    if(!fs.exists(storeDir) && !createDir(storeDir))
179      throw new IOException("Failed creating "+storeDir);
180    return storeDir;
181  }
182
183  /**
184   * Set the directory of CF to the specified storage policy. <br>
185   * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>,
186   * <i>"COLD"</i> <br>
187   * <br>
188   * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details.
189   * @param familyName The name of column family.
190   * @param policyName The name of the storage policy: 'HOT', 'COLD', etc.
191   * See see hadoop 2.6+ org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g
192   * 'COLD', 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'.
193   */
194  public void setStoragePolicy(String familyName, String policyName) {
195    FSUtils.setStoragePolicy(this.fs, getStoreDir(familyName), policyName);
196  }
197
198  /**
199   * Get the storage policy of the directory of CF.
200   * @param familyName The name of column family.
201   * @return Storage policy name, or {@code null} if not using {@link HFileSystem} or exception
202   *         thrown when trying to get policy
203   */
204  @Nullable
205  public String getStoragePolicyName(String familyName) {
206    if (this.fs instanceof HFileSystem) {
207      Path storeDir = getStoreDir(familyName);
208      return ((HFileSystem) this.fs).getStoragePolicyName(storeDir);
209    }
210
211    return null;
212  }
213
214  /**
215   * Returns the store files available for the family.
216   * This methods performs the filtering based on the valid store files.
217   * @param familyName Column Family Name
218   * @return a set of {@link StoreFileInfo} for the specified family.
219   */
220  public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
221    return getStoreFiles(Bytes.toString(familyName));
222  }
223
224  public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
225    return getStoreFiles(familyName, true);
226  }
227
228  /**
229   * Returns the store files available for the family.
230   * This methods performs the filtering based on the valid store files.
231   * @param familyName Column Family Name
232   * @return a set of {@link StoreFileInfo} for the specified family.
233   */
234  public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate)
235      throws IOException {
236    Path familyDir = getStoreDir(familyName);
237    FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
238    if (files == null) {
239      if (LOG.isTraceEnabled()) {
240        LOG.trace("No StoreFiles for: " + familyDir);
241      }
242      return null;
243    }
244
245    ArrayList<StoreFileInfo> storeFiles = new ArrayList<>(files.length);
246    for (FileStatus status: files) {
247      if (validate && !StoreFileInfo.isValid(status)) {
248        LOG.warn("Invalid StoreFile: " + status.getPath());
249        continue;
250      }
251      StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
252        regionInfoForFs, familyName, status.getPath());
253      storeFiles.add(info);
254
255    }
256    return storeFiles;
257  }
258
259  /**
260   * Returns the store files' LocatedFileStatus which available for the family.
261   * This methods performs the filtering based on the valid store files.
262   * @param familyName Column Family Name
263   * @return a list of store files' LocatedFileStatus for the specified family.
264   */
265  public static List<LocatedFileStatus> getStoreFilesLocatedStatus(
266      final HRegionFileSystem regionfs, final String familyName,
267      final boolean validate) throws IOException {
268    Path familyDir = regionfs.getStoreDir(familyName);
269    List<LocatedFileStatus> locatedFileStatuses = FSUtils.listLocatedStatus(
270        regionfs.getFileSystem(), familyDir);
271    if (locatedFileStatuses == null) {
272      if (LOG.isTraceEnabled()) {
273        LOG.trace("No StoreFiles for: " + familyDir);
274      }
275      return null;
276    }
277
278    List<LocatedFileStatus> validStoreFiles = Lists.newArrayList();
279    for (LocatedFileStatus status : locatedFileStatuses) {
280      if (validate && !StoreFileInfo.isValid(status)) {
281        LOG.warn("Invalid StoreFile: " + status.getPath());
282      } else {
283        validStoreFiles.add(status);
284      }
285    }
286    return validStoreFiles;
287  }
288
289  /**
290   * Return Qualified Path of the specified family/file
291   *
292   * @param familyName Column Family Name
293   * @param fileName File Name
294   * @return The qualified Path for the specified family/file
295   */
296  Path getStoreFilePath(final String familyName, final String fileName) {
297    Path familyDir = getStoreDir(familyName);
298    return new Path(familyDir, fileName).makeQualified(fs.getUri(), fs.getWorkingDirectory());
299  }
300
301  /**
302   * Return the store file information of the specified family/file.
303   *
304   * @param familyName Column Family Name
305   * @param fileName File Name
306   * @return The {@link StoreFileInfo} for the specified family/file
307   */
308  StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
309      throws IOException {
310    Path familyDir = getStoreDir(familyName);
311    return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
312      regionInfoForFs, familyName, new Path(familyDir, fileName));
313  }
314
315  /**
316   * Returns true if the specified family has reference files
317   * @param familyName Column Family Name
318   * @return true if family contains reference files
319   * @throws IOException
320   */
321  public boolean hasReferences(final String familyName) throws IOException {
322    Path storeDir = getStoreDir(familyName);
323    FileStatus[] files = FSUtils.listStatus(fs, storeDir);
324    if (files != null) {
325      for(FileStatus stat: files) {
326        if(stat.isDirectory()) {
327          continue;
328        }
329        if (StoreFileInfo.isReference(stat.getPath())) {
330          LOG.trace("Reference {}", stat.getPath());
331          return true;
332        }
333      }
334    }
335    return false;
336  }
337
338  /**
339   * Check whether region has Reference file
340   * @param htd table desciptor of the region
341   * @return true if region has reference file
342   * @throws IOException
343   */
344  public boolean hasReferences(final TableDescriptor htd) throws IOException {
345    for (ColumnFamilyDescriptor family : htd.getColumnFamilies()) {
346      if (hasReferences(family.getNameAsString())) {
347        return true;
348      }
349    }
350    return false;
351  }
352
353  /**
354   * @return the set of families present on disk
355   * @throws IOException
356   */
357  public Collection<String> getFamilies() throws IOException {
358    FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
359    if (fds == null) return null;
360
361    ArrayList<String> families = new ArrayList<>(fds.length);
362    for (FileStatus status: fds) {
363      families.add(status.getPath().getName());
364    }
365
366    return families;
367  }
368
369  /**
370   * Remove the region family from disk, archiving the store files.
371   * @param familyName Column Family Name
372   * @throws IOException if an error occours during the archiving
373   */
374  public void deleteFamily(final String familyName) throws IOException {
375    // archive family store files
376    HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName));
377
378    // delete the family folder
379    Path familyDir = getStoreDir(familyName);
380    if(fs.exists(familyDir) && !deleteDir(familyDir))
381      throw new IOException("Could not delete family " + familyName
382          + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "("
383          + regionInfoForFs.getEncodedName() + ")");
384  }
385
386  /**
387   * Generate a unique file name, used by createTempName() and commitStoreFile()
388   * @param suffix extra information to append to the generated name
389   * @return Unique file name
390   */
391  private static String generateUniqueName(final String suffix) {
392    String name = UUID.randomUUID().toString().replaceAll("-", "");
393    if (suffix != null) name += suffix;
394    return name;
395  }
396
397  /**
398   * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
399   * to get a safer file creation.
400   * <code>
401   * Path file = fs.createTempName();
402   * ...StoreFile.Writer(file)...
403   * fs.commitStoreFile("family", file);
404   * </code>
405   *
406   * @return Unique {@link Path} of the temporary file
407   */
408  public Path createTempName() {
409    return createTempName(null);
410  }
411
412  /**
413   * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
414   * to get a safer file creation.
415   * <code>
416   * Path file = fs.createTempName();
417   * ...StoreFile.Writer(file)...
418   * fs.commitStoreFile("family", file);
419   * </code>
420   *
421   * @param suffix extra information to append to the generated name
422   * @return Unique {@link Path} of the temporary file
423   */
424  public Path createTempName(final String suffix) {
425    return new Path(getTempDir(), generateUniqueName(suffix));
426  }
427
428  /**
429   * Move the file from a build/temp location to the main family store directory.
430   * @param familyName Family that will gain the file
431   * @param buildPath {@link Path} to the file to commit.
432   * @return The new {@link Path} of the committed file
433   * @throws IOException
434   */
435  public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
436    Path dstPath = preCommitStoreFile(familyName, buildPath, -1, false);
437    return commitStoreFile(buildPath, dstPath);
438  }
439
440  /**
441   * Generate the filename in the main family store directory for moving the file from a build/temp
442   *  location.
443   * @param familyName Family that will gain the file
444   * @param buildPath {@link Path} to the file to commit.
445   * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
446   * @param generateNewName False if you want to keep the buildPath name
447   * @return The new {@link Path} of the to be committed file
448   * @throws IOException
449   */
450  private Path preCommitStoreFile(final String familyName, final Path buildPath,
451      final long seqNum, final boolean generateNewName) throws IOException {
452    Path storeDir = getStoreDir(familyName);
453    if(!fs.exists(storeDir) && !createDir(storeDir))
454      throw new IOException("Failed creating " + storeDir);
455
456    String name = buildPath.getName();
457    if (generateNewName) {
458      name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
459    }
460    Path dstPath = new Path(storeDir, name);
461    if (!fs.exists(buildPath)) {
462      throw new FileNotFoundException(buildPath.toString());
463    }
464    if (LOG.isDebugEnabled()) {
465      LOG.debug("Committing " + buildPath + " as " + dstPath);
466    }
467    return dstPath;
468  }
469
470  /*
471   * Moves file from staging dir to region dir
472   * @param buildPath {@link Path} to the file to commit.
473   * @param dstPath {@link Path} to the file under region dir
474   * @return The {@link Path} of the committed file
475   * @throws IOException
476   */
477  Path commitStoreFile(final Path buildPath, Path dstPath) throws IOException {
478    // buildPath exists, therefore not doing an exists() check.
479    if (!rename(buildPath, dstPath)) {
480      throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
481    }
482    return dstPath;
483  }
484
485  /**
486   * Archives the specified store file from the specified family.
487   * @param familyName Family that contains the store files
488   * @param filePath {@link Path} to the store file to remove
489   * @throws IOException if the archiving fails
490   */
491  public void removeStoreFile(final String familyName, final Path filePath)
492      throws IOException {
493    HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs,
494        this.tableDir, Bytes.toBytes(familyName), filePath);
495  }
496
497  /**
498   * Closes and archives the specified store files from the specified family.
499   * @param familyName Family that contains the store files
500   * @param storeFiles set of store files to remove
501   * @throws IOException if the archiving fails
502   */
503  public void removeStoreFiles(String familyName, Collection<HStoreFile> storeFiles)
504      throws IOException {
505    HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs,
506        this.tableDir, Bytes.toBytes(familyName), storeFiles);
507  }
508
509  /**
510   * Bulk load: Add a specified store file to the specified family.
511   * If the source file is on the same different file-system is moved from the
512   * source location to the destination location, otherwise is copied over.
513   *
514   * @param familyName Family that will gain the file
515   * @param srcPath {@link Path} to the file to import
516   * @param seqNum Bulk Load sequence number
517   * @return The destination {@link Path} of the bulk loaded file
518   * @throws IOException
519   */
520  Pair<Path, Path> bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
521      throws IOException {
522    // Copy the file if it's on another filesystem
523    FileSystem srcFs = srcPath.getFileSystem(conf);
524    srcPath = srcFs.resolvePath(srcPath);
525    FileSystem realSrcFs = srcPath.getFileSystem(conf);
526    FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
527
528    // We can't compare FileSystem instances as equals() includes UGI instance
529    // as part of the comparison and won't work when doing SecureBulkLoad
530    // TODO deal with viewFS
531    if (!FSHDFSUtils.isSameHdfs(conf, realSrcFs, desFs)) {
532      LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
533          "the destination store. Copying file over to destination filesystem.");
534      Path tmpPath = createTempName();
535      FileUtil.copy(realSrcFs, srcPath, fs, tmpPath, false, conf);
536      LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
537      srcPath = tmpPath;
538    }
539
540    return new Pair<>(srcPath, preCommitStoreFile(familyName, srcPath, seqNum, true));
541  }
542
543  // ===========================================================================
544  //  Splits Helpers
545  // ===========================================================================
546  /** @return {@link Path} to the temp directory used during split operations */
547  Path getSplitsDir() {
548    return new Path(getRegionDir(), REGION_SPLITS_DIR);
549  }
550
551  public Path getSplitsDir(final RegionInfo hri) {
552    return new Path(getSplitsDir(), hri.getEncodedName());
553  }
554
555  /**
556   * Clean up any split detritus that may have been left around from previous split attempts.
557   */
558  void cleanupSplitsDir() throws IOException {
559    deleteDir(getSplitsDir());
560  }
561
562  /**
563   * Clean up any split detritus that may have been left around from previous
564   * split attempts.
565   * Call this method on initial region deploy.
566   * @throws IOException
567   */
568  void cleanupAnySplitDetritus() throws IOException {
569    Path splitdir = this.getSplitsDir();
570    if (!fs.exists(splitdir)) return;
571    // Look at the splitdir.  It could have the encoded names of the daughter
572    // regions we tried to make.  See if the daughter regions actually got made
573    // out under the tabledir.  If here under splitdir still, then the split did
574    // not complete.  Try and do cleanup.  This code WILL NOT catch the case
575    // where we successfully created daughter a but regionserver crashed during
576    // the creation of region b.  In this case, there'll be an orphan daughter
577    // dir in the filesystem.  TOOD: Fix.
578    FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
579    if (daughters != null) {
580      for (FileStatus daughter: daughters) {
581        Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
582        if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
583          throw new IOException("Failed delete of " + daughterDir);
584        }
585      }
586    }
587    cleanupSplitsDir();
588    LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
589  }
590
591  /**
592   * Remove daughter region
593   * @param regionInfo daughter {@link RegionInfo}
594   * @throws IOException
595   */
596  void cleanupDaughterRegion(final RegionInfo regionInfo) throws IOException {
597    Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
598    if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
599      throw new IOException("Failed delete of " + regionDir);
600    }
601  }
602
603  /**
604   * Commit a daughter region, moving it from the split temporary directory
605   * to the proper location in the filesystem.
606   *
607   * @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo}
608   * @throws IOException
609   */
610  public Path commitDaughterRegion(final RegionInfo regionInfo)
611      throws IOException {
612    Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
613    Path daughterTmpDir = this.getSplitsDir(regionInfo);
614
615    if (fs.exists(daughterTmpDir)) {
616
617      // Write HRI to a file in case we need to recover hbase:meta
618      Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
619      byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
620      writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
621
622      // Move the daughter temp dir to the table dir
623      if (!rename(daughterTmpDir, regionDir)) {
624        throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
625      }
626    }
627
628    return regionDir;
629  }
630
631  /**
632   * Create the region splits directory.
633   */
634  public void createSplitsDir(RegionInfo daughterA, RegionInfo daughterB) throws IOException {
635    Path splitdir = getSplitsDir();
636    if (fs.exists(splitdir)) {
637      LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
638      if (!deleteDir(splitdir)) {
639        throw new IOException("Failed deletion of " + splitdir + " before creating them again.");
640      }
641    }
642    // splitDir doesn't exists now. No need to do an exists() call for it.
643    if (!createDir(splitdir)) {
644      throw new IOException("Failed create of " + splitdir);
645    }
646    Path daughterATmpDir = getSplitsDir(daughterA);
647    if (!createDir(daughterATmpDir)) {
648      throw new IOException("Failed create of " + daughterATmpDir);
649    }
650    Path daughterBTmpDir = getSplitsDir(daughterB);
651    if (!createDir(daughterBTmpDir)) {
652      throw new IOException("Failed create of " + daughterBTmpDir);
653    }
654  }
655
656  /**
657   * Write out a split reference. Package local so it doesnt leak out of
658   * regionserver.
659   * @param hri {@link RegionInfo} of the destination
660   * @param familyName Column Family Name
661   * @param f File to split.
662   * @param splitRow Split Row
663   * @param top True if we are referring to the top half of the hfile.
664   * @param splitPolicy A split policy instance; be careful! May not be full populated; e.g. if
665   *                    this method is invoked on the Master side, then the RegionSplitPolicy will
666   *                    NOT have a reference to a Region.
667   * @return Path to created reference.
668   * @throws IOException
669   */
670  public Path splitStoreFile(RegionInfo hri, String familyName, HStoreFile f, byte[] splitRow,
671      boolean top, RegionSplitPolicy splitPolicy) throws IOException {
672    if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) {
673      // Check whether the split row lies in the range of the store file
674      // If it is outside the range, return directly.
675      f.initReader();
676      try {
677        if (top) {
678          //check if larger than last key.
679          Cell splitKey = PrivateCellUtil.createFirstOnRow(splitRow);
680          Optional<Cell> lastKey = f.getLastKey();
681          // If lastKey is null means storefile is empty.
682          if (!lastKey.isPresent()) {
683            return null;
684          }
685          if (f.getComparator().compare(splitKey, lastKey.get()) > 0) {
686            return null;
687          }
688        } else {
689          //check if smaller than first key
690          Cell splitKey = PrivateCellUtil.createLastOnRow(splitRow);
691          Optional<Cell> firstKey = f.getFirstKey();
692          // If firstKey is null means storefile is empty.
693          if (!firstKey.isPresent()) {
694            return null;
695          }
696          if (f.getComparator().compare(splitKey, firstKey.get()) < 0) {
697            return null;
698          }
699        }
700      } finally {
701        f.closeStoreFile(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true);
702      }
703    }
704
705    Path splitDir = new Path(getSplitsDir(hri), familyName);
706    // A reference to the bottom half of the hsf store file.
707    Reference r =
708      top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
709    // Add the referred-to regions name as a dot separated suffix.
710    // See REF_NAME_REGEX regex above.  The referred-to regions name is
711    // up in the path of the passed in <code>f</code> -- parentdir is family,
712    // then the directory above is the region name.
713    String parentRegionName = regionInfoForFs.getEncodedName();
714    // Write reference with same file id only with the other region name as
715    // suffix and into the new region location (under same family).
716    Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
717    return r.write(fs, p);
718  }
719
720  // ===========================================================================
721  //  Merge Helpers
722  // ===========================================================================
723  /** @return {@link Path} to the temp directory used during merge operations */
724  public Path getMergesDir() {
725    return new Path(getRegionDir(), REGION_MERGES_DIR);
726  }
727
728  Path getMergesDir(final RegionInfo hri) {
729    return new Path(getMergesDir(), hri.getEncodedName());
730  }
731
732  /**
733   * Clean up any merge detritus that may have been left around from previous merge attempts.
734   */
735  void cleanupMergesDir() throws IOException {
736    deleteDir(getMergesDir());
737  }
738
739  /**
740   * Remove merged region
741   * @param mergedRegion {@link RegionInfo}
742   * @throws IOException
743   */
744  public void cleanupMergedRegion(final RegionInfo mergedRegion) throws IOException {
745    Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
746    if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
747      throw new IOException("Failed delete of " + regionDir);
748    }
749  }
750
751  static boolean mkdirs(FileSystem fs, Configuration conf, Path dir) throws IOException {
752    if (FSUtils.isDistributedFileSystem(fs) ||
753        !conf.getBoolean(HConstants.ENABLE_DATA_FILE_UMASK, false)) {
754      return fs.mkdirs(dir);
755    }
756    FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
757    return fs.mkdirs(dir, perms);
758  }
759
760  /**
761   * Create the region merges directory, a temporary directory to accumulate
762   * merges in.
763   * @throws IOException If merges dir already exists or we fail to create it.
764   * @see HRegionFileSystem#cleanupMergesDir()
765   */
766  public void createMergesDir() throws IOException {
767    Path mergesdir = getMergesDir();
768    if (fs.exists(mergesdir)) {
769      LOG.info("{} directory exists. Deleting it to recreate it anew", mergesdir);
770      if (!fs.delete(mergesdir, true)) {
771        throw new IOException("Failed deletion of " + mergesdir + " before recreate.");
772      }
773    }
774    if (!mkdirs(fs, conf, mergesdir)) {
775      throw new IOException("Failed create of " + mergesdir);
776    }
777  }
778
779  /**
780   * Write out a merge reference under the given merges directory. Package local
781   * so it doesnt leak out of regionserver.
782   * @param mergedRegion {@link RegionInfo} of the merged region
783   * @param familyName Column Family Name
784   * @param f File to create reference.
785   * @param mergedDir
786   * @return Path to created reference.
787   * @throws IOException
788   */
789  public Path mergeStoreFile(RegionInfo mergedRegion, String familyName, HStoreFile f,
790      Path mergedDir) throws IOException {
791    Path referenceDir = new Path(new Path(mergedDir,
792        mergedRegion.getEncodedName()), familyName);
793    // A whole reference to the store file.
794    Reference r = Reference.createTopReference(regionInfoForFs.getStartKey());
795    // Add the referred-to regions name as a dot separated suffix.
796    // See REF_NAME_REGEX regex above. The referred-to regions name is
797    // up in the path of the passed in <code>f</code> -- parentdir is family,
798    // then the directory above is the region name.
799    String mergingRegionName = regionInfoForFs.getEncodedName();
800    // Write reference with same file id only with the other region name as
801    // suffix and into the new region location (under same family).
802    Path p = new Path(referenceDir, f.getPath().getName() + "."
803        + mergingRegionName);
804    return r.write(fs, p);
805  }
806
807  /**
808   * Commit a merged region, moving it from the merges temporary directory to
809   * the proper location in the filesystem.
810   * @param mergedRegionInfo merged region {@link RegionInfo}
811   * @throws IOException
812   */
813  public void commitMergedRegion(final RegionInfo mergedRegionInfo) throws IOException {
814    Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
815    Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
816    // Move the tmp dir to the expected location
817    if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
818      if (!fs.rename(mergedRegionTmpDir, regionDir)) {
819        throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
820            + regionDir);
821      }
822    }
823  }
824
825  // ===========================================================================
826  //  Create/Open/Delete Helpers
827  // ===========================================================================
828  /**
829   * Log the current state of the region
830   * @param LOG log to output information
831   * @throws IOException if an unexpected exception occurs
832   */
833  void logFileSystemState(final Logger LOG) throws IOException {
834    FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
835  }
836
837  /**
838   * @param hri
839   * @return Content of the file we write out to the filesystem under a region
840   * @throws IOException
841   */
842  private static byte[] getRegionInfoFileContent(final RegionInfo hri) throws IOException {
843    return RegionInfo.toDelimitedByteArray(hri);
844  }
845
846  /**
847   * Create a {@link RegionInfo} from the serialized version on-disk.
848   * @param fs {@link FileSystem} that contains the Region Info file
849   * @param regionDir {@link Path} to the Region Directory that contains the Info file
850   * @return An {@link RegionInfo} instance gotten from the Region Info file.
851   * @throws IOException if an error occurred during file open/read operation.
852   */
853  public static RegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
854      throws IOException {
855    FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
856    try {
857      return RegionInfo.parseFrom(in);
858    } finally {
859      in.close();
860    }
861  }
862
863  /**
864   * Write the .regioninfo file on-disk.
865   * Overwrites if exists already.
866   */
867  private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
868      final Path regionInfoFile, final byte[] content) throws IOException {
869    // First check to get the permissions
870    FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
871    // Write the RegionInfo file content
872    FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null);
873    try {
874      out.write(content);
875    } finally {
876      out.close();
877    }
878  }
879
880  /**
881   * Write out an info file under the stored region directory. Useful recovering mangled regions.
882   * If the regionInfo already exists on-disk, then we fast exit.
883   */
884  void checkRegionInfoOnFilesystem() throws IOException {
885    // Compose the content of the file so we can compare to length in filesystem. If not same,
886    // rewrite it (it may have been written in the old format using Writables instead of pb). The
887    // pb version is much shorter -- we write now w/o the toString version -- so checking length
888    // only should be sufficient. I don't want to read the file every time to check if it pb
889    // serialized.
890    byte[] content = getRegionInfoFileContent(regionInfoForFs);
891
892    // Verify if the region directory exists before opening a region. We need to do this since if
893    // the region directory doesn't exist we will re-create the region directory and a new HRI
894    // when HRegion.openHRegion() is called.
895    try {
896      FileStatus status = fs.getFileStatus(getRegionDir());
897    } catch (FileNotFoundException e) {
898      LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() +
899          " on table " + regionInfo.getTable());
900    }
901
902    try {
903      Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
904      FileStatus status = fs.getFileStatus(regionInfoFile);
905      if (status != null && status.getLen() == content.length) {
906        // Then assume the content good and move on.
907        // NOTE: that the length is not sufficient to define the the content matches.
908        return;
909      }
910
911      LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
912      if (!fs.delete(regionInfoFile, false)) {
913        throw new IOException("Unable to remove existing " + regionInfoFile);
914      }
915    } catch (FileNotFoundException e) {
916      LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() +
917          " on table " + regionInfo.getTable());
918    }
919
920    // Write HRI to a file in case we need to recover hbase:meta
921    writeRegionInfoOnFilesystem(content, true);
922  }
923
924  /**
925   * Write out an info file under the region directory. Useful recovering mangled regions.
926   * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
927   */
928  private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
929    byte[] content = getRegionInfoFileContent(regionInfoForFs);
930    writeRegionInfoOnFilesystem(content, useTempDir);
931  }
932
933  /**
934   * Write out an info file under the region directory. Useful recovering mangled regions.
935   * @param regionInfoContent serialized version of the {@link RegionInfo}
936   * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
937   */
938  private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
939      final boolean useTempDir) throws IOException {
940    Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
941    if (useTempDir) {
942      // Create in tmpDir and then move into place in case we crash after
943      // create but before close. If we don't successfully close the file,
944      // subsequent region reopens will fail the below because create is
945      // registered in NN.
946
947      // And then create the file
948      Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
949
950      // If datanode crashes or if the RS goes down just before the close is called while trying to
951      // close the created regioninfo file in the .tmp directory then on next
952      // creation we will be getting AlreadyCreatedException.
953      // Hence delete and create the file if exists.
954      if (FSUtils.isExists(fs, tmpPath)) {
955        FSUtils.delete(fs, tmpPath, true);
956      }
957
958      // Write HRI to a file in case we need to recover hbase:meta
959      writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
960
961      // Move the created file to the original path
962      if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
963        throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
964      }
965    } else {
966      // Write HRI to a file in case we need to recover hbase:meta
967      writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
968    }
969  }
970
971  /**
972   * Create a new Region on file-system.
973   * @param conf the {@link Configuration} to use
974   * @param fs {@link FileSystem} from which to add the region
975   * @param tableDir {@link Path} to where the table is being stored
976   * @param regionInfo {@link RegionInfo} for region to be added
977   * @throws IOException if the region creation fails due to a FileSystem exception.
978   */
979  public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
980      final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException {
981    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
982
983    // We only create a .regioninfo and the region directory if this is the default region replica
984    if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
985      Path regionDir = regionFs.getRegionDir();
986      if (fs.exists(regionDir)) {
987        LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
988      } else {
989        // Create the region directory
990        if (!createDirOnFileSystem(fs, conf, regionDir)) {
991          LOG.warn("Unable to create the region directory: " + regionDir);
992          throw new IOException("Unable to create region directory: " + regionDir);
993        }
994      }
995
996      // Write HRI to a file in case we need to recover hbase:meta
997      regionFs.writeRegionInfoOnFilesystem(false);
998    } else {
999      if (LOG.isDebugEnabled())
1000        LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
1001    }
1002    return regionFs;
1003  }
1004
1005  /**
1006   * Open Region from file-system.
1007   * @param conf the {@link Configuration} to use
1008   * @param fs {@link FileSystem} from which to add the region
1009   * @param tableDir {@link Path} to where the table is being stored
1010   * @param regionInfo {@link RegionInfo} for region to be added
1011   * @param readOnly True if you don't want to edit the region data
1012   * @throws IOException if the region creation fails due to a FileSystem exception.
1013   */
1014  public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
1015      final FileSystem fs, final Path tableDir, final RegionInfo regionInfo, boolean readOnly)
1016      throws IOException {
1017    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
1018    Path regionDir = regionFs.getRegionDir();
1019
1020    if (!fs.exists(regionDir)) {
1021      LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
1022      throw new IOException("The specified region do not exists on disk: " + regionDir);
1023    }
1024
1025    if (!readOnly) {
1026      // Cleanup temporary directories
1027      regionFs.cleanupTempDir();
1028      regionFs.cleanupSplitsDir();
1029      regionFs.cleanupMergesDir();
1030
1031      // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
1032      // Only create HRI if we are the default replica
1033      if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1034        regionFs.checkRegionInfoOnFilesystem();
1035      } else {
1036        if (LOG.isDebugEnabled()) {
1037          LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
1038        }
1039      }
1040    }
1041
1042    return regionFs;
1043  }
1044
1045  /**
1046   * Remove the region from the table directory, archiving the region's hfiles.
1047   * @param conf the {@link Configuration} to use
1048   * @param fs {@link FileSystem} from which to remove the region
1049   * @param tableDir {@link Path} to where the table is being stored
1050   * @param regionInfo {@link RegionInfo} for region to be deleted
1051   * @throws IOException if the request cannot be completed
1052   */
1053  public static void deleteRegionFromFileSystem(final Configuration conf,
1054      final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException {
1055    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
1056    Path regionDir = regionFs.getRegionDir();
1057
1058    if (!fs.exists(regionDir)) {
1059      LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
1060      return;
1061    }
1062
1063    if (LOG.isDebugEnabled()) {
1064      LOG.debug("DELETING region " + regionDir);
1065    }
1066
1067    // Archive region
1068    Path rootDir = FSUtils.getRootDir(conf);
1069    HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
1070
1071    // Delete empty region dir
1072    if (!fs.delete(regionDir, true)) {
1073      LOG.warn("Failed delete of " + regionDir);
1074    }
1075  }
1076
1077  /**
1078   * Creates a directory. Assumes the user has already checked for this directory existence.
1079   * @param dir
1080   * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1081   *         whether the directory exists or not, and returns true if it exists.
1082   * @throws IOException
1083   */
1084  boolean createDir(Path dir) throws IOException {
1085    int i = 0;
1086    IOException lastIOE = null;
1087    do {
1088      try {
1089        return mkdirs(fs, conf, dir);
1090      } catch (IOException ioe) {
1091        lastIOE = ioe;
1092        if (fs.exists(dir)) return true; // directory is present
1093        try {
1094          sleepBeforeRetry("Create Directory", i+1);
1095        } catch (InterruptedException e) {
1096          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1097        }
1098      }
1099    } while (++i <= hdfsClientRetriesNumber);
1100    throw new IOException("Exception in createDir", lastIOE);
1101  }
1102
1103  /**
1104   * Renames a directory. Assumes the user has already checked for this directory existence.
1105   * @param srcpath
1106   * @param dstPath
1107   * @return true if rename is successful.
1108   * @throws IOException
1109   */
1110  boolean rename(Path srcpath, Path dstPath) throws IOException {
1111    IOException lastIOE = null;
1112    int i = 0;
1113    do {
1114      try {
1115        return fs.rename(srcpath, dstPath);
1116      } catch (IOException ioe) {
1117        lastIOE = ioe;
1118        if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
1119        // dir is not there, retry after some time.
1120        try {
1121          sleepBeforeRetry("Rename Directory", i+1);
1122        } catch (InterruptedException e) {
1123          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1124        }
1125      }
1126    } while (++i <= hdfsClientRetriesNumber);
1127
1128    throw new IOException("Exception in rename", lastIOE);
1129  }
1130
1131  /**
1132   * Deletes a directory. Assumes the user has already checked for this directory existence.
1133   * @param dir
1134   * @return true if the directory is deleted.
1135   * @throws IOException
1136   */
1137  boolean deleteDir(Path dir) throws IOException {
1138    IOException lastIOE = null;
1139    int i = 0;
1140    do {
1141      try {
1142        return fs.delete(dir, true);
1143      } catch (IOException ioe) {
1144        lastIOE = ioe;
1145        if (!fs.exists(dir)) return true;
1146        // dir is there, retry deleting after some time.
1147        try {
1148          sleepBeforeRetry("Delete Directory", i+1);
1149        } catch (InterruptedException e) {
1150          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1151        }
1152      }
1153    } while (++i <= hdfsClientRetriesNumber);
1154
1155    throw new IOException("Exception in DeleteDir", lastIOE);
1156  }
1157
1158  /**
1159   * sleeping logic; handles the interrupt exception.
1160   */
1161  private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
1162    sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1163  }
1164
1165  /**
1166   * Creates a directory for a filesystem and configuration object. Assumes the user has already
1167   * checked for this directory existence.
1168   * @param fs
1169   * @param conf
1170   * @param dir
1171   * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1172   *         whether the directory exists or not, and returns true if it exists.
1173   * @throws IOException
1174   */
1175  private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1176      throws IOException {
1177    int i = 0;
1178    IOException lastIOE = null;
1179    int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1180      DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1181    int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1182      DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1183    do {
1184      try {
1185        return fs.mkdirs(dir);
1186      } catch (IOException ioe) {
1187        lastIOE = ioe;
1188        if (fs.exists(dir)) return true; // directory is present
1189        try {
1190          sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1191        } catch (InterruptedException e) {
1192          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1193        }
1194      }
1195    } while (++i <= hdfsClientRetriesNumber);
1196
1197    throw new IOException("Exception in createDir", lastIOE);
1198  }
1199
1200  /**
1201   * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1202   * for this to avoid re-looking for the integer values.
1203   */
1204  private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1205      int hdfsClientRetriesNumber) throws InterruptedException {
1206    if (sleepMultiplier > hdfsClientRetriesNumber) {
1207      if (LOG.isDebugEnabled()) {
1208        LOG.debug(msg + ", retries exhausted");
1209      }
1210      return;
1211    }
1212    if (LOG.isDebugEnabled()) {
1213      LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1214    }
1215    Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1216  }
1217}