001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019
020package org.apache.hadoop.hbase.regionserver;
021
022import edu.umd.cs.findbugs.annotations.Nullable;
023import java.io.FileNotFoundException;
024import java.io.IOException;
025import java.io.InterruptedIOException;
026import java.util.ArrayList;
027import java.util.Collection;
028import java.util.List;
029import java.util.Optional;
030import java.util.UUID;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.fs.FSDataInputStream;
033import org.apache.hadoop.fs.FSDataOutputStream;
034import org.apache.hadoop.fs.FileStatus;
035import org.apache.hadoop.fs.FileSystem;
036import org.apache.hadoop.fs.FileUtil;
037import org.apache.hadoop.fs.LocatedFileStatus;
038import org.apache.hadoop.fs.Path;
039import org.apache.hadoop.fs.permission.FsPermission;
040import org.apache.hadoop.hbase.Cell;
041import org.apache.hadoop.hbase.HConstants;
042import org.apache.hadoop.hbase.PrivateCellUtil;
043import org.apache.hadoop.hbase.backup.HFileArchiver;
044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
045import org.apache.hadoop.hbase.client.RegionInfo;
046import org.apache.hadoop.hbase.client.TableDescriptor;
047import org.apache.hadoop.hbase.fs.HFileSystem;
048import org.apache.hadoop.hbase.io.Reference;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.FSHDFSUtils;
051import org.apache.hadoop.hbase.util.FSUtils;
052import org.apache.hadoop.hbase.util.Pair;
053import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
054import org.apache.yetus.audience.InterfaceAudience;
055import org.slf4j.Logger;
056import org.slf4j.LoggerFactory;
057
058import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
059import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
060
061/**
062 * View to an on-disk Region.
063 * Provides the set of methods necessary to interact with the on-disk region data.
064 */
065@InterfaceAudience.Private
066public class HRegionFileSystem {
067  private static final Logger LOG = LoggerFactory.getLogger(HRegionFileSystem.class);
068
069  /** Name of the region info file that resides just under the region directory. */
070  public final static String REGION_INFO_FILE = ".regioninfo";
071
072  /** Temporary subdirectory of the region directory used for merges. */
073  public static final String REGION_MERGES_DIR = ".merges";
074
075  /** Temporary subdirectory of the region directory used for splits. */
076  public static final String REGION_SPLITS_DIR = ".splits";
077
078  /** Temporary subdirectory of the region directory used for compaction output. */
079  @VisibleForTesting static final String REGION_TEMP_DIR = ".tmp";
080
081  private final RegionInfo regionInfo;
082  //regionInfo for interacting with FS (getting encodedName, etc)
083  private final RegionInfo regionInfoForFs;
084  private final Configuration conf;
085  private final Path tableDir;
086  private final FileSystem fs;
087
088  /**
089   * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
090   * client level.
091   */
092  private final int hdfsClientRetriesNumber;
093  private final int baseSleepBeforeRetries;
094  private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
095  private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
096
097  /**
098   * Create a view to the on-disk region
099   * @param conf the {@link Configuration} to use
100   * @param fs {@link FileSystem} that contains the region
101   * @param tableDir {@link Path} to where the table is being stored
102   * @param regionInfo {@link RegionInfo} for region
103   */
104  HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
105      final RegionInfo regionInfo) {
106    this.fs = fs;
107    this.conf = conf;
108    this.tableDir = tableDir;
109    this.regionInfo = regionInfo;
110    this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo);
111    this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
112      DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
113    this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
114      DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
115 }
116
117  /** @return the underlying {@link FileSystem} */
118  public FileSystem getFileSystem() {
119    return this.fs;
120  }
121
122  /** @return the {@link RegionInfo} that describe this on-disk region view */
123  public RegionInfo getRegionInfo() {
124    return this.regionInfo;
125  }
126
127  public RegionInfo getRegionInfoForFS() {
128    return this.regionInfoForFs;
129  }
130
131  /** @return {@link Path} to the region's root directory. */
132  public Path getTableDir() {
133    return this.tableDir;
134  }
135
136  /** @return {@link Path} to the region directory. */
137  public Path getRegionDir() {
138    return new Path(this.tableDir, this.regionInfoForFs.getEncodedName());
139  }
140
141  // ===========================================================================
142  //  Temp Helpers
143  // ===========================================================================
144  /** @return {@link Path} to the region's temp directory, used for file creations */
145  Path getTempDir() {
146    return new Path(getRegionDir(), REGION_TEMP_DIR);
147  }
148
149  /**
150   * Clean up any temp detritus that may have been left around from previous operation attempts.
151   */
152  void cleanupTempDir() throws IOException {
153    deleteDir(getTempDir());
154  }
155
156  // ===========================================================================
157  //  Store/StoreFile Helpers
158  // ===========================================================================
159  /**
160   * Returns the directory path of the specified family
161   * @param familyName Column Family Name
162   * @return {@link Path} to the directory of the specified family
163   */
164  public Path getStoreDir(final String familyName) {
165    return new Path(this.getRegionDir(), familyName);
166  }
167
168  /**
169   * Create the store directory for the specified family name
170   * @param familyName Column Family Name
171   * @return {@link Path} to the directory of the specified family
172   * @throws IOException if the directory creation fails.
173   */
174  Path createStoreDir(final String familyName) throws IOException {
175    Path storeDir = getStoreDir(familyName);
176    if(!fs.exists(storeDir) && !createDir(storeDir))
177      throw new IOException("Failed creating "+storeDir);
178    return storeDir;
179  }
180
181  /**
182   * Set the directory of CF to the specified storage policy. <br>
183   * <i>"LAZY_PERSIST"</i>, <i>"ALL_SSD"</i>, <i>"ONE_SSD"</i>, <i>"HOT"</i>, <i>"WARM"</i>,
184   * <i>"COLD"</i> <br>
185   * <br>
186   * See {@link org.apache.hadoop.hdfs.protocol.HdfsConstants} for more details.
187   * @param familyName The name of column family.
188   * @param policyName The name of the storage policy: 'HOT', 'COLD', etc.
189   * See see hadoop 2.6+ org.apache.hadoop.hdfs.protocol.HdfsConstants for possible list e.g
190   * 'COLD', 'WARM', 'HOT', 'ONE_SSD', 'ALL_SSD', 'LAZY_PERSIST'.
191   */
192  public void setStoragePolicy(String familyName, String policyName) {
193    FSUtils.setStoragePolicy(this.fs, getStoreDir(familyName), policyName);
194  }
195
196  /**
197   * Get the storage policy of the directory of CF.
198   * @param familyName The name of column family.
199   * @return Storage policy name, or {@code null} if not using {@link HFileSystem} or exception
200   *         thrown when trying to get policy
201   */
202  @Nullable
203  public String getStoragePolicyName(String familyName) {
204    if (this.fs instanceof HFileSystem) {
205      Path storeDir = getStoreDir(familyName);
206      return ((HFileSystem) this.fs).getStoragePolicyName(storeDir);
207    }
208
209    return null;
210  }
211
212  /**
213   * Returns the store files available for the family.
214   * This methods performs the filtering based on the valid store files.
215   * @param familyName Column Family Name
216   * @return a set of {@link StoreFileInfo} for the specified family.
217   */
218  public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
219    return getStoreFiles(Bytes.toString(familyName));
220  }
221
222  public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
223    return getStoreFiles(familyName, true);
224  }
225
226  /**
227   * Returns the store files available for the family.
228   * This methods performs the filtering based on the valid store files.
229   * @param familyName Column Family Name
230   * @return a set of {@link StoreFileInfo} for the specified family.
231   */
232  public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate)
233      throws IOException {
234    Path familyDir = getStoreDir(familyName);
235    FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
236    if (files == null) {
237      if (LOG.isTraceEnabled()) {
238        LOG.trace("No StoreFiles for: " + familyDir);
239      }
240      return null;
241    }
242
243    ArrayList<StoreFileInfo> storeFiles = new ArrayList<>(files.length);
244    for (FileStatus status: files) {
245      if (validate && !StoreFileInfo.isValid(status)) {
246        LOG.warn("Invalid StoreFile: " + status.getPath());
247        continue;
248      }
249      StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
250        regionInfoForFs, familyName, status.getPath());
251      storeFiles.add(info);
252
253    }
254    return storeFiles;
255  }
256
257  /**
258   * Returns the store files' LocatedFileStatus which available for the family.
259   * This methods performs the filtering based on the valid store files.
260   * @param familyName Column Family Name
261   * @return a list of store files' LocatedFileStatus for the specified family.
262   */
263  public static List<LocatedFileStatus> getStoreFilesLocatedStatus(
264      final HRegionFileSystem regionfs, final String familyName,
265      final boolean validate) throws IOException {
266    Path familyDir = regionfs.getStoreDir(familyName);
267    List<LocatedFileStatus> locatedFileStatuses = FSUtils.listLocatedStatus(
268        regionfs.getFileSystem(), familyDir);
269    if (locatedFileStatuses == null) {
270      if (LOG.isTraceEnabled()) {
271        LOG.trace("No StoreFiles for: " + familyDir);
272      }
273      return null;
274    }
275
276    List<LocatedFileStatus> validStoreFiles = Lists.newArrayList();
277    for (LocatedFileStatus status : locatedFileStatuses) {
278      if (validate && !StoreFileInfo.isValid(status)) {
279        LOG.warn("Invalid StoreFile: " + status.getPath());
280      } else {
281        validStoreFiles.add(status);
282      }
283    }
284    return validStoreFiles;
285  }
286
287  /**
288   * Return Qualified Path of the specified family/file
289   *
290   * @param familyName Column Family Name
291   * @param fileName File Name
292   * @return The qualified Path for the specified family/file
293   */
294  Path getStoreFilePath(final String familyName, final String fileName) {
295    Path familyDir = getStoreDir(familyName);
296    return new Path(familyDir, fileName).makeQualified(fs.getUri(), fs.getWorkingDirectory());
297  }
298
299  /**
300   * Return the store file information of the specified family/file.
301   *
302   * @param familyName Column Family Name
303   * @param fileName File Name
304   * @return The {@link StoreFileInfo} for the specified family/file
305   */
306  StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
307      throws IOException {
308    Path familyDir = getStoreDir(familyName);
309    return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
310      regionInfoForFs, familyName, new Path(familyDir, fileName));
311  }
312
313  /**
314   * Returns true if the specified family has reference files
315   * @param familyName Column Family Name
316   * @return true if family contains reference files
317   * @throws IOException
318   */
319  public boolean hasReferences(final String familyName) throws IOException {
320    Path storeDir = getStoreDir(familyName);
321    FileStatus[] files = FSUtils.listStatus(fs, storeDir);
322    if (files != null) {
323      for(FileStatus stat: files) {
324        if(stat.isDirectory()) {
325          continue;
326        }
327        if (StoreFileInfo.isReference(stat.getPath())) {
328          LOG.trace("Reference {}", stat.getPath());
329          return true;
330        }
331      }
332    }
333    return false;
334  }
335
336  /**
337   * Check whether region has Reference file
338   * @param htd table desciptor of the region
339   * @return true if region has reference file
340   * @throws IOException
341   */
342  public boolean hasReferences(final TableDescriptor htd) throws IOException {
343    for (ColumnFamilyDescriptor family : htd.getColumnFamilies()) {
344      if (hasReferences(family.getNameAsString())) {
345        return true;
346      }
347    }
348    return false;
349  }
350
351  /**
352   * @return the set of families present on disk
353   * @throws IOException
354   */
355  public Collection<String> getFamilies() throws IOException {
356    FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
357    if (fds == null) return null;
358
359    ArrayList<String> families = new ArrayList<>(fds.length);
360    for (FileStatus status: fds) {
361      families.add(status.getPath().getName());
362    }
363
364    return families;
365  }
366
367  /**
368   * Remove the region family from disk, archiving the store files.
369   * @param familyName Column Family Name
370   * @throws IOException if an error occours during the archiving
371   */
372  public void deleteFamily(final String familyName) throws IOException {
373    // archive family store files
374    HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName));
375
376    // delete the family folder
377    Path familyDir = getStoreDir(familyName);
378    if(fs.exists(familyDir) && !deleteDir(familyDir))
379      throw new IOException("Could not delete family " + familyName
380          + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "("
381          + regionInfoForFs.getEncodedName() + ")");
382  }
383
384  /**
385   * Generate a unique file name, used by createTempName() and commitStoreFile()
386   * @param suffix extra information to append to the generated name
387   * @return Unique file name
388   */
389  private static String generateUniqueName(final String suffix) {
390    String name = UUID.randomUUID().toString().replaceAll("-", "");
391    if (suffix != null) name += suffix;
392    return name;
393  }
394
395  /**
396   * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
397   * to get a safer file creation.
398   * <code>
399   * Path file = fs.createTempName();
400   * ...StoreFile.Writer(file)...
401   * fs.commitStoreFile("family", file);
402   * </code>
403   *
404   * @return Unique {@link Path} of the temporary file
405   */
406  public Path createTempName() {
407    return createTempName(null);
408  }
409
410  /**
411   * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
412   * to get a safer file creation.
413   * <code>
414   * Path file = fs.createTempName();
415   * ...StoreFile.Writer(file)...
416   * fs.commitStoreFile("family", file);
417   * </code>
418   *
419   * @param suffix extra information to append to the generated name
420   * @return Unique {@link Path} of the temporary file
421   */
422  public Path createTempName(final String suffix) {
423    return new Path(getTempDir(), generateUniqueName(suffix));
424  }
425
426  /**
427   * Move the file from a build/temp location to the main family store directory.
428   * @param familyName Family that will gain the file
429   * @param buildPath {@link Path} to the file to commit.
430   * @return The new {@link Path} of the committed file
431   * @throws IOException
432   */
433  public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
434    Path dstPath = preCommitStoreFile(familyName, buildPath, -1, false);
435    return commitStoreFile(buildPath, dstPath);
436  }
437
438  /**
439   * Generate the filename in the main family store directory for moving the file from a build/temp
440   *  location.
441   * @param familyName Family that will gain the file
442   * @param buildPath {@link Path} to the file to commit.
443   * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
444   * @param generateNewName False if you want to keep the buildPath name
445   * @return The new {@link Path} of the to be committed file
446   * @throws IOException
447   */
448  private Path preCommitStoreFile(final String familyName, final Path buildPath,
449      final long seqNum, final boolean generateNewName) throws IOException {
450    Path storeDir = getStoreDir(familyName);
451    if(!fs.exists(storeDir) && !createDir(storeDir))
452      throw new IOException("Failed creating " + storeDir);
453
454    String name = buildPath.getName();
455    if (generateNewName) {
456      name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
457    }
458    Path dstPath = new Path(storeDir, name);
459    if (!fs.exists(buildPath)) {
460      throw new FileNotFoundException(buildPath.toString());
461    }
462    if (LOG.isDebugEnabled()) {
463      LOG.debug("Committing " + buildPath + " as " + dstPath);
464    }
465    return dstPath;
466  }
467
468  /*
469   * Moves file from staging dir to region dir
470   * @param buildPath {@link Path} to the file to commit.
471   * @param dstPath {@link Path} to the file under region dir
472   * @return The {@link Path} of the committed file
473   * @throws IOException
474   */
475  Path commitStoreFile(final Path buildPath, Path dstPath) throws IOException {
476    // buildPath exists, therefore not doing an exists() check.
477    if (!rename(buildPath, dstPath)) {
478      throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
479    }
480    return dstPath;
481  }
482
483  /**
484   * Archives the specified store file from the specified family.
485   * @param familyName Family that contains the store files
486   * @param filePath {@link Path} to the store file to remove
487   * @throws IOException if the archiving fails
488   */
489  public void removeStoreFile(final String familyName, final Path filePath)
490      throws IOException {
491    HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs,
492        this.tableDir, Bytes.toBytes(familyName), filePath);
493  }
494
495  /**
496   * Closes and archives the specified store files from the specified family.
497   * @param familyName Family that contains the store files
498   * @param storeFiles set of store files to remove
499   * @throws IOException if the archiving fails
500   */
501  public void removeStoreFiles(String familyName, Collection<HStoreFile> storeFiles)
502      throws IOException {
503    HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs,
504        this.tableDir, Bytes.toBytes(familyName), storeFiles);
505  }
506
507  /**
508   * Bulk load: Add a specified store file to the specified family.
509   * If the source file is on the same different file-system is moved from the
510   * source location to the destination location, otherwise is copied over.
511   *
512   * @param familyName Family that will gain the file
513   * @param srcPath {@link Path} to the file to import
514   * @param seqNum Bulk Load sequence number
515   * @return The destination {@link Path} of the bulk loaded file
516   * @throws IOException
517   */
518  Pair<Path, Path> bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
519      throws IOException {
520    // Copy the file if it's on another filesystem
521    FileSystem srcFs = srcPath.getFileSystem(conf);
522    srcPath = srcFs.resolvePath(srcPath);
523    FileSystem realSrcFs = srcPath.getFileSystem(conf);
524    FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
525
526    // We can't compare FileSystem instances as equals() includes UGI instance
527    // as part of the comparison and won't work when doing SecureBulkLoad
528    // TODO deal with viewFS
529    if (!FSHDFSUtils.isSameHdfs(conf, realSrcFs, desFs)) {
530      LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
531          "the destination store. Copying file over to destination filesystem.");
532      Path tmpPath = createTempName();
533      FileUtil.copy(realSrcFs, srcPath, fs, tmpPath, false, conf);
534      LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
535      srcPath = tmpPath;
536    }
537
538    return new Pair<>(srcPath, preCommitStoreFile(familyName, srcPath, seqNum, true));
539  }
540
541  // ===========================================================================
542  //  Splits Helpers
543  // ===========================================================================
544  /** @return {@link Path} to the temp directory used during split operations */
545  Path getSplitsDir() {
546    return new Path(getRegionDir(), REGION_SPLITS_DIR);
547  }
548
549  public Path getSplitsDir(final RegionInfo hri) {
550    return new Path(getSplitsDir(), hri.getEncodedName());
551  }
552
553  /**
554   * Clean up any split detritus that may have been left around from previous split attempts.
555   */
556  void cleanupSplitsDir() throws IOException {
557    deleteDir(getSplitsDir());
558  }
559
560  /**
561   * Clean up any split detritus that may have been left around from previous
562   * split attempts.
563   * Call this method on initial region deploy.
564   * @throws IOException
565   */
566  void cleanupAnySplitDetritus() throws IOException {
567    Path splitdir = this.getSplitsDir();
568    if (!fs.exists(splitdir)) return;
569    // Look at the splitdir.  It could have the encoded names of the daughter
570    // regions we tried to make.  See if the daughter regions actually got made
571    // out under the tabledir.  If here under splitdir still, then the split did
572    // not complete.  Try and do cleanup.  This code WILL NOT catch the case
573    // where we successfully created daughter a but regionserver crashed during
574    // the creation of region b.  In this case, there'll be an orphan daughter
575    // dir in the filesystem.  TOOD: Fix.
576    FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
577    if (daughters != null) {
578      for (FileStatus daughter: daughters) {
579        Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
580        if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
581          throw new IOException("Failed delete of " + daughterDir);
582        }
583      }
584    }
585    cleanupSplitsDir();
586    LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
587  }
588
589  /**
590   * Remove daughter region
591   * @param regionInfo daughter {@link RegionInfo}
592   * @throws IOException
593   */
594  void cleanupDaughterRegion(final RegionInfo regionInfo) throws IOException {
595    Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
596    if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
597      throw new IOException("Failed delete of " + regionDir);
598    }
599  }
600
601  /**
602   * Commit a daughter region, moving it from the split temporary directory
603   * to the proper location in the filesystem.
604   *
605   * @param regionInfo daughter {@link org.apache.hadoop.hbase.client.RegionInfo}
606   * @throws IOException
607   */
608  public Path commitDaughterRegion(final RegionInfo regionInfo)
609      throws IOException {
610    Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
611    Path daughterTmpDir = this.getSplitsDir(regionInfo);
612
613    if (fs.exists(daughterTmpDir)) {
614
615      // Write HRI to a file in case we need to recover hbase:meta
616      Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
617      byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
618      writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
619
620      // Move the daughter temp dir to the table dir
621      if (!rename(daughterTmpDir, regionDir)) {
622        throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
623      }
624    }
625
626    return regionDir;
627  }
628
629  /**
630   * Create the region splits directory.
631   */
632  public void createSplitsDir(RegionInfo daughterA, RegionInfo daughterB) throws IOException {
633    Path splitdir = getSplitsDir();
634    if (fs.exists(splitdir)) {
635      LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
636      if (!deleteDir(splitdir)) {
637        throw new IOException("Failed deletion of " + splitdir + " before creating them again.");
638      }
639    }
640    // splitDir doesn't exists now. No need to do an exists() call for it.
641    if (!createDir(splitdir)) {
642      throw new IOException("Failed create of " + splitdir);
643    }
644    Path daughterATmpDir = getSplitsDir(daughterA);
645    if (!createDir(daughterATmpDir)) {
646      throw new IOException("Failed create of " + daughterATmpDir);
647    }
648    Path daughterBTmpDir = getSplitsDir(daughterB);
649    if (!createDir(daughterBTmpDir)) {
650      throw new IOException("Failed create of " + daughterBTmpDir);
651    }
652  }
653
654  /**
655   * Write out a split reference. Package local so it doesnt leak out of
656   * regionserver.
657   * @param hri {@link RegionInfo} of the destination
658   * @param familyName Column Family Name
659   * @param f File to split.
660   * @param splitRow Split Row
661   * @param top True if we are referring to the top half of the hfile.
662   * @param splitPolicy A split policy instance; be careful! May not be full populated; e.g. if
663   *                    this method is invoked on the Master side, then the RegionSplitPolicy will
664   *                    NOT have a reference to a Region.
665   * @return Path to created reference.
666   * @throws IOException
667   */
668  public Path splitStoreFile(RegionInfo hri, String familyName, HStoreFile f, byte[] splitRow,
669      boolean top, RegionSplitPolicy splitPolicy) throws IOException {
670    if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) {
671      // Check whether the split row lies in the range of the store file
672      // If it is outside the range, return directly.
673      f.initReader();
674      try {
675        if (top) {
676          //check if larger than last key.
677          Cell splitKey = PrivateCellUtil.createFirstOnRow(splitRow);
678          Optional<Cell> lastKey = f.getLastKey();
679          // If lastKey is null means storefile is empty.
680          if (!lastKey.isPresent()) {
681            return null;
682          }
683          if (f.getComparator().compare(splitKey, lastKey.get()) > 0) {
684            return null;
685          }
686        } else {
687          //check if smaller than first key
688          Cell splitKey = PrivateCellUtil.createLastOnRow(splitRow);
689          Optional<Cell> firstKey = f.getFirstKey();
690          // If firstKey is null means storefile is empty.
691          if (!firstKey.isPresent()) {
692            return null;
693          }
694          if (f.getComparator().compare(splitKey, firstKey.get()) < 0) {
695            return null;
696          }
697        }
698      } finally {
699        f.closeStoreFile(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true);
700      }
701    }
702
703    Path splitDir = new Path(getSplitsDir(hri), familyName);
704    // A reference to the bottom half of the hsf store file.
705    Reference r =
706      top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
707    // Add the referred-to regions name as a dot separated suffix.
708    // See REF_NAME_REGEX regex above.  The referred-to regions name is
709    // up in the path of the passed in <code>f</code> -- parentdir is family,
710    // then the directory above is the region name.
711    String parentRegionName = regionInfoForFs.getEncodedName();
712    // Write reference with same file id only with the other region name as
713    // suffix and into the new region location (under same family).
714    Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
715    return r.write(fs, p);
716  }
717
718  // ===========================================================================
719  //  Merge Helpers
720  // ===========================================================================
721  /** @return {@link Path} to the temp directory used during merge operations */
722  public Path getMergesDir() {
723    return new Path(getRegionDir(), REGION_MERGES_DIR);
724  }
725
726  Path getMergesDir(final RegionInfo hri) {
727    return new Path(getMergesDir(), hri.getEncodedName());
728  }
729
730  /**
731   * Clean up any merge detritus that may have been left around from previous merge attempts.
732   */
733  void cleanupMergesDir() throws IOException {
734    deleteDir(getMergesDir());
735  }
736
737  /**
738   * Remove merged region
739   * @param mergedRegion {@link RegionInfo}
740   * @throws IOException
741   */
742  public void cleanupMergedRegion(final RegionInfo mergedRegion) throws IOException {
743    Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
744    if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
745      throw new IOException("Failed delete of " + regionDir);
746    }
747  }
748
749  static boolean mkdirs(FileSystem fs, Configuration conf, Path dir) throws IOException {
750    if (FSUtils.isDistributedFileSystem(fs) ||
751        !conf.getBoolean(HConstants.ENABLE_DATA_FILE_UMASK, false)) {
752      return fs.mkdirs(dir);
753    }
754    FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
755    return fs.mkdirs(dir, perms);
756  }
757
758  /**
759   * Create the region merges directory, a temporary directory to accumulate
760   * merges in.
761   * @throws IOException If merges dir already exists or we fail to create it.
762   * @see HRegionFileSystem#cleanupMergesDir()
763   */
764  public void createMergesDir() throws IOException {
765    Path mergesdir = getMergesDir();
766    if (fs.exists(mergesdir)) {
767      LOG.info("{} directory exists. Deleting it to recreate it anew", mergesdir);
768      if (!fs.delete(mergesdir, true)) {
769        throw new IOException("Failed deletion of " + mergesdir + " before recreate.");
770      }
771    }
772    if (!mkdirs(fs, conf, mergesdir)) {
773      throw new IOException("Failed create of " + mergesdir);
774    }
775  }
776
777  /**
778   * Write out a merge reference under the given merges directory. Package local
779   * so it doesnt leak out of regionserver.
780   * @param mergedRegion {@link RegionInfo} of the merged region
781   * @param familyName Column Family Name
782   * @param f File to create reference.
783   * @param mergedDir
784   * @return Path to created reference.
785   * @throws IOException
786   */
787  public Path mergeStoreFile(RegionInfo mergedRegion, String familyName, HStoreFile f,
788      Path mergedDir) throws IOException {
789    Path referenceDir = new Path(new Path(mergedDir,
790        mergedRegion.getEncodedName()), familyName);
791    // A whole reference to the store file.
792    Reference r = Reference.createTopReference(regionInfoForFs.getStartKey());
793    // Add the referred-to regions name as a dot separated suffix.
794    // See REF_NAME_REGEX regex above. The referred-to regions name is
795    // up in the path of the passed in <code>f</code> -- parentdir is family,
796    // then the directory above is the region name.
797    String mergingRegionName = regionInfoForFs.getEncodedName();
798    // Write reference with same file id only with the other region name as
799    // suffix and into the new region location (under same family).
800    Path p = new Path(referenceDir, f.getPath().getName() + "."
801        + mergingRegionName);
802    return r.write(fs, p);
803  }
804
805  /**
806   * Commit a merged region, moving it from the merges temporary directory to
807   * the proper location in the filesystem.
808   * @param mergedRegionInfo merged region {@link RegionInfo}
809   * @throws IOException
810   */
811  public void commitMergedRegion(final RegionInfo mergedRegionInfo) throws IOException {
812    Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
813    Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
814    // Move the tmp dir to the expected location
815    if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
816      if (!fs.rename(mergedRegionTmpDir, regionDir)) {
817        throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
818            + regionDir);
819      }
820    }
821  }
822
823  // ===========================================================================
824  //  Create/Open/Delete Helpers
825  // ===========================================================================
826  /**
827   * Log the current state of the region
828   * @param LOG log to output information
829   * @throws IOException if an unexpected exception occurs
830   */
831  void logFileSystemState(final Logger LOG) throws IOException {
832    FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
833  }
834
835  /**
836   * @param hri
837   * @return Content of the file we write out to the filesystem under a region
838   * @throws IOException
839   */
840  private static byte[] getRegionInfoFileContent(final RegionInfo hri) throws IOException {
841    return RegionInfo.toDelimitedByteArray(hri);
842  }
843
844  /**
845   * Create a {@link RegionInfo} from the serialized version on-disk.
846   * @param fs {@link FileSystem} that contains the Region Info file
847   * @param regionDir {@link Path} to the Region Directory that contains the Info file
848   * @return An {@link RegionInfo} instance gotten from the Region Info file.
849   * @throws IOException if an error occurred during file open/read operation.
850   */
851  public static RegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
852      throws IOException {
853    FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
854    try {
855      return RegionInfo.parseFrom(in);
856    } finally {
857      in.close();
858    }
859  }
860
861  /**
862   * Write the .regioninfo file on-disk.
863   * Overwrites if exists already.
864   */
865  private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
866      final Path regionInfoFile, final byte[] content) throws IOException {
867    // First check to get the permissions
868    FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
869    // Write the RegionInfo file content
870    FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null);
871    try {
872      out.write(content);
873    } finally {
874      out.close();
875    }
876  }
877
878  /**
879   * Write out an info file under the stored region directory. Useful recovering mangled regions.
880   * If the regionInfo already exists on-disk, then we fast exit.
881   */
882  void checkRegionInfoOnFilesystem() throws IOException {
883    // Compose the content of the file so we can compare to length in filesystem. If not same,
884    // rewrite it (it may have been written in the old format using Writables instead of pb). The
885    // pb version is much shorter -- we write now w/o the toString version -- so checking length
886    // only should be sufficient. I don't want to read the file every time to check if it pb
887    // serialized.
888    byte[] content = getRegionInfoFileContent(regionInfoForFs);
889
890    // Verify if the region directory exists before opening a region. We need to do this since if
891    // the region directory doesn't exist we will re-create the region directory and a new HRI
892    // when HRegion.openHRegion() is called.
893    try {
894      FileStatus status = fs.getFileStatus(getRegionDir());
895    } catch (FileNotFoundException e) {
896      LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() +
897          " on table " + regionInfo.getTable());
898    }
899
900    try {
901      Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
902      FileStatus status = fs.getFileStatus(regionInfoFile);
903      if (status != null && status.getLen() == content.length) {
904        // Then assume the content good and move on.
905        // NOTE: that the length is not sufficient to define the the content matches.
906        return;
907      }
908
909      LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
910      if (!fs.delete(regionInfoFile, false)) {
911        throw new IOException("Unable to remove existing " + regionInfoFile);
912      }
913    } catch (FileNotFoundException e) {
914      LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() +
915          " on table " + regionInfo.getTable());
916    }
917
918    // Write HRI to a file in case we need to recover hbase:meta
919    writeRegionInfoOnFilesystem(content, true);
920  }
921
922  /**
923   * Write out an info file under the region directory. Useful recovering mangled regions.
924   * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
925   */
926  private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
927    byte[] content = getRegionInfoFileContent(regionInfoForFs);
928    writeRegionInfoOnFilesystem(content, useTempDir);
929  }
930
931  /**
932   * Write out an info file under the region directory. Useful recovering mangled regions.
933   * @param regionInfoContent serialized version of the {@link RegionInfo}
934   * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
935   */
936  private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
937      final boolean useTempDir) throws IOException {
938    Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
939    if (useTempDir) {
940      // Create in tmpDir and then move into place in case we crash after
941      // create but before close. If we don't successfully close the file,
942      // subsequent region reopens will fail the below because create is
943      // registered in NN.
944
945      // And then create the file
946      Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
947
948      // If datanode crashes or if the RS goes down just before the close is called while trying to
949      // close the created regioninfo file in the .tmp directory then on next
950      // creation we will be getting AlreadyCreatedException.
951      // Hence delete and create the file if exists.
952      if (FSUtils.isExists(fs, tmpPath)) {
953        FSUtils.delete(fs, tmpPath, true);
954      }
955
956      // Write HRI to a file in case we need to recover hbase:meta
957      writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
958
959      // Move the created file to the original path
960      if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
961        throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
962      }
963    } else {
964      // Write HRI to a file in case we need to recover hbase:meta
965      writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
966    }
967  }
968
969  /**
970   * Create a new Region on file-system.
971   * @param conf the {@link Configuration} to use
972   * @param fs {@link FileSystem} from which to add the region
973   * @param tableDir {@link Path} to where the table is being stored
974   * @param regionInfo {@link RegionInfo} for region to be added
975   * @throws IOException if the region creation fails due to a FileSystem exception.
976   */
977  public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
978      final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException {
979    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
980
981    // We only create a .regioninfo and the region directory if this is the default region replica
982    if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
983      Path regionDir = regionFs.getRegionDir();
984      if (fs.exists(regionDir)) {
985        LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
986      } else {
987        // Create the region directory
988        if (!createDirOnFileSystem(fs, conf, regionDir)) {
989          LOG.warn("Unable to create the region directory: " + regionDir);
990          throw new IOException("Unable to create region directory: " + regionDir);
991        }
992      }
993
994      // Write HRI to a file in case we need to recover hbase:meta
995      regionFs.writeRegionInfoOnFilesystem(false);
996    } else {
997      if (LOG.isDebugEnabled())
998        LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
999    }
1000    return regionFs;
1001  }
1002
1003  /**
1004   * Open Region from file-system.
1005   * @param conf the {@link Configuration} to use
1006   * @param fs {@link FileSystem} from which to add the region
1007   * @param tableDir {@link Path} to where the table is being stored
1008   * @param regionInfo {@link RegionInfo} for region to be added
1009   * @param readOnly True if you don't want to edit the region data
1010   * @throws IOException if the region creation fails due to a FileSystem exception.
1011   */
1012  public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
1013      final FileSystem fs, final Path tableDir, final RegionInfo regionInfo, boolean readOnly)
1014      throws IOException {
1015    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
1016    Path regionDir = regionFs.getRegionDir();
1017
1018    if (!fs.exists(regionDir)) {
1019      LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
1020      throw new IOException("The specified region do not exists on disk: " + regionDir);
1021    }
1022
1023    if (!readOnly) {
1024      // Cleanup temporary directories
1025      regionFs.cleanupTempDir();
1026      regionFs.cleanupSplitsDir();
1027      regionFs.cleanupMergesDir();
1028
1029      // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
1030      // Only create HRI if we are the default replica
1031      if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) {
1032        regionFs.checkRegionInfoOnFilesystem();
1033      } else {
1034        if (LOG.isDebugEnabled()) {
1035          LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
1036        }
1037      }
1038    }
1039
1040    return regionFs;
1041  }
1042
1043  /**
1044   * Remove the region from the table directory, archiving the region's hfiles.
1045   * @param conf the {@link Configuration} to use
1046   * @param fs {@link FileSystem} from which to remove the region
1047   * @param tableDir {@link Path} to where the table is being stored
1048   * @param regionInfo {@link RegionInfo} for region to be deleted
1049   * @throws IOException if the request cannot be completed
1050   */
1051  public static void deleteRegionFromFileSystem(final Configuration conf,
1052      final FileSystem fs, final Path tableDir, final RegionInfo regionInfo) throws IOException {
1053    HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
1054    Path regionDir = regionFs.getRegionDir();
1055
1056    if (!fs.exists(regionDir)) {
1057      LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
1058      return;
1059    }
1060
1061    if (LOG.isDebugEnabled()) {
1062      LOG.debug("DELETING region " + regionDir);
1063    }
1064
1065    // Archive region
1066    Path rootDir = FSUtils.getRootDir(conf);
1067    HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
1068
1069    // Delete empty region dir
1070    if (!fs.delete(regionDir, true)) {
1071      LOG.warn("Failed delete of " + regionDir);
1072    }
1073  }
1074
1075  /**
1076   * Creates a directory. Assumes the user has already checked for this directory existence.
1077   * @param dir
1078   * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1079   *         whether the directory exists or not, and returns true if it exists.
1080   * @throws IOException
1081   */
1082  boolean createDir(Path dir) throws IOException {
1083    int i = 0;
1084    IOException lastIOE = null;
1085    do {
1086      try {
1087        return mkdirs(fs, conf, dir);
1088      } catch (IOException ioe) {
1089        lastIOE = ioe;
1090        if (fs.exists(dir)) return true; // directory is present
1091        try {
1092          sleepBeforeRetry("Create Directory", i+1);
1093        } catch (InterruptedException e) {
1094          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1095        }
1096      }
1097    } while (++i <= hdfsClientRetriesNumber);
1098    throw new IOException("Exception in createDir", lastIOE);
1099  }
1100
1101  /**
1102   * Renames a directory. Assumes the user has already checked for this directory existence.
1103   * @param srcpath
1104   * @param dstPath
1105   * @return true if rename is successful.
1106   * @throws IOException
1107   */
1108  boolean rename(Path srcpath, Path dstPath) throws IOException {
1109    IOException lastIOE = null;
1110    int i = 0;
1111    do {
1112      try {
1113        return fs.rename(srcpath, dstPath);
1114      } catch (IOException ioe) {
1115        lastIOE = ioe;
1116        if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
1117        // dir is not there, retry after some time.
1118        try {
1119          sleepBeforeRetry("Rename Directory", i+1);
1120        } catch (InterruptedException e) {
1121          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1122        }
1123      }
1124    } while (++i <= hdfsClientRetriesNumber);
1125
1126    throw new IOException("Exception in rename", lastIOE);
1127  }
1128
1129  /**
1130   * Deletes a directory. Assumes the user has already checked for this directory existence.
1131   * @param dir
1132   * @return true if the directory is deleted.
1133   * @throws IOException
1134   */
1135  boolean deleteDir(Path dir) throws IOException {
1136    IOException lastIOE = null;
1137    int i = 0;
1138    do {
1139      try {
1140        return fs.delete(dir, true);
1141      } catch (IOException ioe) {
1142        lastIOE = ioe;
1143        if (!fs.exists(dir)) return true;
1144        // dir is there, retry deleting after some time.
1145        try {
1146          sleepBeforeRetry("Delete Directory", i+1);
1147        } catch (InterruptedException e) {
1148          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1149        }
1150      }
1151    } while (++i <= hdfsClientRetriesNumber);
1152
1153    throw new IOException("Exception in DeleteDir", lastIOE);
1154  }
1155
1156  /**
1157   * sleeping logic; handles the interrupt exception.
1158   */
1159  private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
1160    sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1161  }
1162
1163  /**
1164   * Creates a directory for a filesystem and configuration object. Assumes the user has already
1165   * checked for this directory existence.
1166   * @param fs
1167   * @param conf
1168   * @param dir
1169   * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1170   *         whether the directory exists or not, and returns true if it exists.
1171   * @throws IOException
1172   */
1173  private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1174      throws IOException {
1175    int i = 0;
1176    IOException lastIOE = null;
1177    int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1178      DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1179    int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1180      DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1181    do {
1182      try {
1183        return fs.mkdirs(dir);
1184      } catch (IOException ioe) {
1185        lastIOE = ioe;
1186        if (fs.exists(dir)) return true; // directory is present
1187        try {
1188          sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1189        } catch (InterruptedException e) {
1190          throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1191        }
1192      }
1193    } while (++i <= hdfsClientRetriesNumber);
1194
1195    throw new IOException("Exception in createDir", lastIOE);
1196  }
1197
1198  /**
1199   * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1200   * for this to avoid re-looking for the integer values.
1201   */
1202  private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1203      int hdfsClientRetriesNumber) throws InterruptedException {
1204    if (sleepMultiplier > hdfsClientRetriesNumber) {
1205      if (LOG.isDebugEnabled()) {
1206        LOG.debug(msg + ", retries exhausted");
1207      }
1208      return;
1209    }
1210    if (LOG.isDebugEnabled()) {
1211      LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1212    }
1213    Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1214  }
1215}