View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.io.InterruptedIOException;
25  import java.util.ArrayList;
26  import java.util.Collection;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.UUID;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.FileUtil;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.fs.PathFilter;
42  import org.apache.hadoop.fs.permission.FsPermission;
43  import org.apache.hadoop.hbase.HColumnDescriptor;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.HTableDescriptor;
47  import org.apache.hadoop.hbase.KeyValue;
48  import org.apache.hadoop.hbase.KeyValueUtil;
49  import org.apache.hadoop.hbase.backup.HFileArchiver;
50  import org.apache.hadoop.hbase.fs.HFileSystem;
51  import org.apache.hadoop.hbase.io.Reference;
52  import org.apache.hadoop.hbase.util.Bytes;
53  import org.apache.hadoop.hbase.util.FSHDFSUtils;
54  import org.apache.hadoop.hbase.util.FSUtils;
55  import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
56  
57  /**
58   * View to an on-disk Region.
59   * Provides the set of methods necessary to interact with the on-disk region data.
60   */
61  @InterfaceAudience.Private
62  public class HRegionFileSystem {
63    public static final Log LOG = LogFactory.getLog(HRegionFileSystem.class);
64  
65    /** Name of the region info file that resides just under the region directory. */
66    public final static String REGION_INFO_FILE = ".regioninfo";
67  
68    /** Temporary subdirectory of the region directory used for merges. */
69    public static final String REGION_MERGES_DIR = ".merges";
70  
71    /** Temporary subdirectory of the region directory used for splits. */
72    public static final String REGION_SPLITS_DIR = ".splits";
73  
74    /** Temporary subdirectory of the region directory used for compaction output. */
75    private static final String REGION_TEMP_DIR = ".tmp";
76  
77    private final HRegionInfo regionInfo;
78    //regionInfo for interacting with FS (getting encodedName, etc)
79    private final HRegionInfo regionInfoForFs;
80    private final Configuration conf;
81    private final Path tableDir;
82    private final FileSystem fs;
83  
84    /**
85     * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
86     * client level.
87     */
88    private final int hdfsClientRetriesNumber;
89    private final int baseSleepBeforeRetries;
90    private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
91    private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
92  
93    /**
94     * Create a view to the on-disk region
95     * @param conf the {@link Configuration} to use
96     * @param fs {@link FileSystem} that contains the region
97     * @param tableDir {@link Path} to where the table is being stored
98     * @param regionInfo {@link HRegionInfo} for region
99     */
100   HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
101       final HRegionInfo regionInfo) {
102     this.fs = fs;
103     this.conf = conf;
104     this.tableDir = tableDir;
105     this.regionInfo = regionInfo;
106     this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo);
107     this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
108       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
109     this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
110       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
111  }
112 
113   /** @return the underlying {@link FileSystem} */
114   public FileSystem getFileSystem() {
115     return this.fs;
116   }
117 
118   /** @return the {@link HRegionInfo} that describe this on-disk region view */
119   public HRegionInfo getRegionInfo() {
120     return this.regionInfo;
121   }
122 
123   /** @return {@link Path} to the region's root directory. */
124   public Path getTableDir() {
125     return this.tableDir;
126   }
127 
128   /** @return {@link Path} to the region directory. */
129   public Path getRegionDir() {
130     return new Path(this.tableDir, this.regionInfoForFs.getEncodedName());
131   }
132 
133   // ===========================================================================
134   //  Temp Helpers
135   // ===========================================================================
136   /** @return {@link Path} to the region's temp directory, used for file creations */
137   Path getTempDir() {
138     return new Path(getRegionDir(), REGION_TEMP_DIR);
139   }
140 
141   /**
142    * Clean up any temp detritus that may have been left around from previous operation attempts.
143    */
144   void cleanupTempDir() throws IOException {
145     deleteDir(getTempDir());
146   }
147 
148   // ===========================================================================
149   //  Store/StoreFile Helpers
150   // ===========================================================================
151   /**
152    * Returns the directory path of the specified family
153    * @param familyName Column Family Name
154    * @return {@link Path} to the directory of the specified family
155    */
156   public Path getStoreDir(final String familyName) {
157     return new Path(this.getRegionDir(), familyName);
158   }
159 
160   /**
161    * Create the store directory for the specified family name
162    * @param familyName Column Family Name
163    * @return {@link Path} to the directory of the specified family
164    * @throws IOException if the directory creation fails.
165    */
166   Path createStoreDir(final String familyName) throws IOException {
167     Path storeDir = getStoreDir(familyName);
168     if(!fs.exists(storeDir) && !createDir(storeDir))
169       throw new IOException("Failed creating "+storeDir);
170     return storeDir;
171   }
172 
173   /**
174    * Returns the store files available for the family.
175    * This methods performs the filtering based on the valid store files.
176    * @param familyName Column Family Name
177    * @return a set of {@link StoreFileInfo} for the specified family.
178    */
179   public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
180     return getStoreFiles(Bytes.toString(familyName));
181   }
182 
183   public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
184     return getStoreFiles(familyName, true);
185   }
186 
187   /**
188    * Returns the store files available for the family.
189    * This methods performs the filtering based on the valid store files.
190    * @param familyName Column Family Name
191    * @return a set of {@link StoreFileInfo} for the specified family.
192    */
193   public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate)
194       throws IOException {
195     Path familyDir = getStoreDir(familyName);
196     FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
197     if (files == null) {
198       LOG.debug("No StoreFiles for: " + familyDir);
199       return null;
200     }
201 
202     ArrayList<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>(files.length);
203     for (FileStatus status: files) {
204       if (validate && !StoreFileInfo.isValid(status)) {
205         LOG.warn("Invalid StoreFile: " + status.getPath());
206         continue;
207       }
208       StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
209         regionInfoForFs, familyName, status);
210       storeFiles.add(info);
211 
212     }
213     return storeFiles;
214   }
215 
216   /**
217    * Return Qualified Path of the specified family/file
218    *
219    * @param familyName Column Family Name
220    * @param fileName File Name
221    * @return The qualified Path for the specified family/file
222    */
223   Path getStoreFilePath(final String familyName, final String fileName) {
224     Path familyDir = getStoreDir(familyName);
225     return new Path(familyDir, fileName).makeQualified(this.fs);
226   }
227 
228   /**
229    * Return the store file information of the specified family/file.
230    *
231    * @param familyName Column Family Name
232    * @param fileName File Name
233    * @return The {@link StoreFileInfo} for the specified family/file
234    */
235   StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
236       throws IOException {
237     Path familyDir = getStoreDir(familyName);
238     FileStatus status = fs.getFileStatus(new Path(familyDir, fileName));
239     return new StoreFileInfo(this.conf, this.fs, status);
240   }
241 
242   /**
243    * Returns true if the specified family has reference files
244    * @param familyName Column Family Name
245    * @return true if family contains reference files
246    * @throws IOException
247    */
248   public boolean hasReferences(final String familyName) throws IOException {
249     FileStatus[] files = FSUtils.listStatus(fs, getStoreDir(familyName),
250       new PathFilter () {
251         @Override
252         public boolean accept(Path path) {
253           return StoreFileInfo.isReference(path);
254         }
255       }
256     );
257     return files != null && files.length > 0;
258   }
259 
260   /**
261    * Check whether region has Reference file
262    * @param htd table desciptor of the region
263    * @return true if region has reference file
264    * @throws IOException
265    */
266   public boolean hasReferences(final HTableDescriptor htd) throws IOException {
267     for (HColumnDescriptor family : htd.getFamilies()) {
268       if (hasReferences(family.getNameAsString())) {
269         return true;
270       }
271     }
272     return false;
273   }
274 
275   /**
276    * @return the set of families present on disk
277    * @throws IOException
278    */
279   public Collection<String> getFamilies() throws IOException {
280     FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
281     if (fds == null) return null;
282 
283     ArrayList<String> families = new ArrayList<String>(fds.length);
284     for (FileStatus status: fds) {
285       families.add(status.getPath().getName());
286     }
287 
288     return families;
289   }
290 
291   /**
292    * Remove the region family from disk, archiving the store files.
293    * @param familyName Column Family Name
294    * @throws IOException if an error occours during the archiving
295    */
296   public void deleteFamily(final String familyName) throws IOException {
297     // archive family store files
298     HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName));
299 
300     // delete the family folder
301     Path familyDir = getStoreDir(familyName);
302     if(fs.exists(familyDir) && !deleteDir(familyDir))
303       throw new IOException("Could not delete family " + familyName
304           + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "("
305           + regionInfoForFs.getEncodedName() + ")");
306   }
307 
308   /**
309    * Generate a unique file name, used by createTempName() and commitStoreFile()
310    * @param suffix extra information to append to the generated name
311    * @return Unique file name
312    */
313   private static String generateUniqueName(final String suffix) {
314     String name = UUID.randomUUID().toString().replaceAll("-", "");
315     if (suffix != null) name += suffix;
316     return name;
317   }
318 
319   /**
320    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
321    * to get a safer file creation.
322    * <code>
323    * Path file = fs.createTempName();
324    * ...StoreFile.Writer(file)...
325    * fs.commitStoreFile("family", file);
326    * </code>
327    *
328    * @return Unique {@link Path} of the temporary file
329    */
330   public Path createTempName() {
331     return createTempName(null);
332   }
333 
334   /**
335    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
336    * to get a safer file creation.
337    * <code>
338    * Path file = fs.createTempName();
339    * ...StoreFile.Writer(file)...
340    * fs.commitStoreFile("family", file);
341    * </code>
342    *
343    * @param suffix extra information to append to the generated name
344    * @return Unique {@link Path} of the temporary file
345    */
346   public Path createTempName(final String suffix) {
347     return new Path(getTempDir(), generateUniqueName(suffix));
348   }
349 
350   /**
351    * Move the file from a build/temp location to the main family store directory.
352    * @param familyName Family that will gain the file
353    * @param buildPath {@link Path} to the file to commit.
354    * @return The new {@link Path} of the committed file
355    * @throws IOException
356    */
357   public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
358     return commitStoreFile(familyName, buildPath, -1, false);
359   }
360 
361   /**
362    * Move the file from a build/temp location to the main family store directory.
363    * @param familyName Family that will gain the file
364    * @param buildPath {@link Path} to the file to commit.
365    * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
366    * @param generateNewName False if you want to keep the buildPath name
367    * @return The new {@link Path} of the committed file
368    * @throws IOException
369    */
370   private Path commitStoreFile(final String familyName, final Path buildPath,
371       final long seqNum, final boolean generateNewName) throws IOException {
372     Path storeDir = getStoreDir(familyName);
373     if(!fs.exists(storeDir) && !createDir(storeDir))
374       throw new IOException("Failed creating " + storeDir);
375 
376     String name = buildPath.getName();
377     if (generateNewName) {
378       name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
379     }
380     Path dstPath = new Path(storeDir, name);
381     if (!fs.exists(buildPath)) {
382       throw new FileNotFoundException(buildPath.toString());
383     }
384     LOG.debug("Committing store file " + buildPath + " as " + dstPath);
385     // buildPath exists, therefore not doing an exists() check.
386     if (!rename(buildPath, dstPath)) {
387       throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
388     }
389     return dstPath;
390   }
391 
392 
393   /**
394    * Moves multiple store files to the relative region's family store directory.
395    * @param storeFiles list of store files divided by family
396    * @throws IOException
397    */
398   void commitStoreFiles(final Map<byte[], List<StoreFile>> storeFiles) throws IOException {
399     for (Map.Entry<byte[], List<StoreFile>> es: storeFiles.entrySet()) {
400       String familyName = Bytes.toString(es.getKey());
401       for (StoreFile sf: es.getValue()) {
402         commitStoreFile(familyName, sf.getPath());
403       }
404     }
405   }
406 
407   /**
408    * Archives the specified store file from the specified family.
409    * @param familyName Family that contains the store files
410    * @param filePath {@link Path} to the store file to remove
411    * @throws IOException if the archiving fails
412    */
413   public void removeStoreFile(final String familyName, final Path filePath)
414       throws IOException {
415     HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs,
416         this.tableDir, Bytes.toBytes(familyName), filePath);
417   }
418 
419   /**
420    * Closes and archives the specified store files from the specified family.
421    * @param familyName Family that contains the store files
422    * @param storeFiles set of store files to remove
423    * @throws IOException if the archiving fails
424    */
425   public void removeStoreFiles(final String familyName, final Collection<StoreFile> storeFiles)
426       throws IOException {
427     HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs,
428         this.tableDir, Bytes.toBytes(familyName), storeFiles);
429   }
430 
431   /**
432    * Bulk load: Add a specified store file to the specified family.
433    * If the source file is on the same different file-system is moved from the
434    * source location to the destination location, otherwise is copied over.
435    *
436    * @param familyName Family that will gain the file
437    * @param srcPath {@link Path} to the file to import
438    * @param seqNum Bulk Load sequence number
439    * @return The destination {@link Path} of the bulk loaded file
440    * @throws IOException
441    */
442   Path bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
443       throws IOException {
444     // Copy the file if it's on another filesystem
445     FileSystem srcFs = srcPath.getFileSystem(conf);
446     FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
447 
448     // We can't compare FileSystem instances as equals() includes UGI instance
449     // as part of the comparison and won't work when doing SecureBulkLoad
450     // TODO deal with viewFS
451     if (!FSHDFSUtils.isSameHdfs(conf, srcFs, desFs)) {
452       LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
453           "the destination store. Copying file over to destination filesystem.");
454       Path tmpPath = createTempName();
455       FileUtil.copy(srcFs, srcPath, fs, tmpPath, false, conf);
456       LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
457       srcPath = tmpPath;
458     }
459 
460     return commitStoreFile(familyName, srcPath, seqNum, true);
461   }
462 
463   // ===========================================================================
464   //  Splits Helpers
465   // ===========================================================================
466   /** @return {@link Path} to the temp directory used during split operations */
467   Path getSplitsDir() {
468     return new Path(getRegionDir(), REGION_SPLITS_DIR);
469   }
470 
471   Path getSplitsDir(final HRegionInfo hri) {
472     return new Path(getSplitsDir(), hri.getEncodedName());
473   }
474 
475   /**
476    * Clean up any split detritus that may have been left around from previous split attempts.
477    */
478   void cleanupSplitsDir() throws IOException {
479     deleteDir(getSplitsDir());
480   }
481 
482   /**
483    * Clean up any split detritus that may have been left around from previous
484    * split attempts.
485    * Call this method on initial region deploy.
486    * @throws IOException
487    */
488   void cleanupAnySplitDetritus() throws IOException {
489     Path splitdir = this.getSplitsDir();
490     if (!fs.exists(splitdir)) return;
491     // Look at the splitdir.  It could have the encoded names of the daughter
492     // regions we tried to make.  See if the daughter regions actually got made
493     // out under the tabledir.  If here under splitdir still, then the split did
494     // not complete.  Try and do cleanup.  This code WILL NOT catch the case
495     // where we successfully created daughter a but regionserver crashed during
496     // the creation of region b.  In this case, there'll be an orphan daughter
497     // dir in the filesystem.  TOOD: Fix.
498     FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
499     if (daughters != null) {
500       for (FileStatus daughter: daughters) {
501         Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
502         if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
503           throw new IOException("Failed delete of " + daughterDir);
504         }
505       }
506     }
507     cleanupSplitsDir();
508     LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
509   }
510 
511   /**
512    * Remove daughter region
513    * @param regionInfo daughter {@link HRegionInfo}
514    * @throws IOException
515    */
516   void cleanupDaughterRegion(final HRegionInfo regionInfo) throws IOException {
517     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
518     if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
519       throw new IOException("Failed delete of " + regionDir);
520     }
521   }
522 
523   /**
524    * Commit a daughter region, moving it from the split temporary directory
525    * to the proper location in the filesystem.
526    * @param regionInfo daughter {@link HRegionInfo}
527    * @throws IOException
528    */
529   Path commitDaughterRegion(final HRegionInfo regionInfo) throws IOException {
530     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
531     Path daughterTmpDir = this.getSplitsDir(regionInfo);
532     if (fs.exists(daughterTmpDir)) {
533       // Write HRI to a file in case we need to recover hbase:meta
534       Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
535       byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
536       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
537 
538       // Move the daughter temp dir to the table dir
539       if (!rename(daughterTmpDir, regionDir)) {
540         throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
541       }
542     }
543     return regionDir;
544   }
545 
546   /**
547    * Create the region splits directory.
548    */
549   void createSplitsDir() throws IOException {
550     Path splitdir = getSplitsDir();
551     if (fs.exists(splitdir)) {
552       LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
553       if (!deleteDir(splitdir)) {
554         throw new IOException("Failed deletion of " + splitdir
555             + " before creating them again.");
556       }
557     }
558     // splitDir doesn't exists now. No need to do an exists() call for it.
559     if (!createDir(splitdir)) {
560       throw new IOException("Failed create of " + splitdir);
561     }
562   }
563 
564   /**
565    * Write out a split reference. Package local so it doesnt leak out of
566    * regionserver.
567    * @param hri {@link HRegionInfo} of the destination
568    * @param familyName Column Family Name
569    * @param f File to split.
570    * @param splitRow Split Row
571    * @param top True if we are referring to the top half of the hfile.
572    * @return Path to created reference.
573    * @throws IOException
574    */
575   Path splitStoreFile(final HRegionInfo hri, final String familyName,
576       final StoreFile f, final byte[] splitRow, final boolean top) throws IOException {
577 
578     // Check whether the split row lies in the range of the store file
579     // If it is outside the range, return directly.
580     if (top) {
581       //check if larger than last key.
582       KeyValue splitKey = KeyValueUtil.createFirstOnRow(splitRow);
583       byte[] lastKey = f.createReader().getLastKey();
584       // If lastKey is null means storefile is empty.
585       if (lastKey == null) return null;
586       if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(),
587           splitKey.getKeyOffset(), splitKey.getKeyLength(), lastKey, 0, lastKey.length) > 0) {
588         return null;
589       }
590     } else {
591       //check if smaller than first key
592       KeyValue splitKey = KeyValueUtil.createLastOnRow(splitRow);
593       byte[] firstKey = f.createReader().getFirstKey();
594       // If firstKey is null means storefile is empty.
595       if (firstKey == null) return null;
596       if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(),
597           splitKey.getKeyOffset(), splitKey.getKeyLength(), firstKey, 0, firstKey.length) < 0) {
598         return null;
599       }
600     }
601 
602     f.getReader().close(true);
603 
604     Path splitDir = new Path(getSplitsDir(hri), familyName);
605     // A reference to the bottom half of the hsf store file.
606     Reference r =
607       top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
608     // Add the referred-to regions name as a dot separated suffix.
609     // See REF_NAME_REGEX regex above.  The referred-to regions name is
610     // up in the path of the passed in <code>f</code> -- parentdir is family,
611     // then the directory above is the region name.
612     String parentRegionName = regionInfoForFs.getEncodedName();
613     // Write reference with same file id only with the other region name as
614     // suffix and into the new region location (under same family).
615     Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
616     return r.write(fs, p);
617   }
618 
619   // ===========================================================================
620   //  Merge Helpers
621   // ===========================================================================
622   /** @return {@link Path} to the temp directory used during merge operations */
623   Path getMergesDir() {
624     return new Path(getRegionDir(), REGION_MERGES_DIR);
625   }
626 
627   Path getMergesDir(final HRegionInfo hri) {
628     return new Path(getMergesDir(), hri.getEncodedName());
629   }
630 
631   /**
632    * Clean up any merge detritus that may have been left around from previous merge attempts.
633    */
634   void cleanupMergesDir() throws IOException {
635     deleteDir(getMergesDir());
636   }
637 
638   /**
639    * Remove merged region
640    * @param mergedRegion {@link HRegionInfo}
641    * @throws IOException
642    */
643   void cleanupMergedRegion(final HRegionInfo mergedRegion) throws IOException {
644     Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
645     if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
646       throw new IOException("Failed delete of " + regionDir);
647     }
648   }
649 
650   /**
651    * Create the region merges directory.
652    * @throws IOException If merges dir already exists or we fail to create it.
653    * @see HRegionFileSystem#cleanupMergesDir()
654    */
655   void createMergesDir() throws IOException {
656     Path mergesdir = getMergesDir();
657     if (fs.exists(mergesdir)) {
658       LOG.info("The " + mergesdir
659           + " directory exists.  Hence deleting it to recreate it");
660       if (!fs.delete(mergesdir, true)) {
661         throw new IOException("Failed deletion of " + mergesdir
662             + " before creating them again.");
663       }
664     }
665     if (!fs.mkdirs(mergesdir))
666       throw new IOException("Failed create of " + mergesdir);
667   }
668 
669   /**
670    * Write out a merge reference under the given merges directory. Package local
671    * so it doesnt leak out of regionserver.
672    * @param mergedRegion {@link HRegionInfo} of the merged region
673    * @param familyName Column Family Name
674    * @param f File to create reference.
675    * @param mergedDir
676    * @return Path to created reference.
677    * @throws IOException
678    */
679   Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName,
680       final StoreFile f, final Path mergedDir)
681       throws IOException {
682     Path referenceDir = new Path(new Path(mergedDir,
683         mergedRegion.getEncodedName()), familyName);
684     // A whole reference to the store file.
685     Reference r = Reference.createTopReference(regionInfoForFs.getStartKey());
686     // Add the referred-to regions name as a dot separated suffix.
687     // See REF_NAME_REGEX regex above. The referred-to regions name is
688     // up in the path of the passed in <code>f</code> -- parentdir is family,
689     // then the directory above is the region name.
690     String mergingRegionName = regionInfoForFs.getEncodedName();
691     // Write reference with same file id only with the other region name as
692     // suffix and into the new region location (under same family).
693     Path p = new Path(referenceDir, f.getPath().getName() + "."
694         + mergingRegionName);
695     return r.write(fs, p);
696   }
697 
698   /**
699    * Commit a merged region, moving it from the merges temporary directory to
700    * the proper location in the filesystem.
701    * @param mergedRegionInfo merged region {@link HRegionInfo}
702    * @throws IOException
703    */
704   void commitMergedRegion(final HRegionInfo mergedRegionInfo) throws IOException {
705     Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
706     Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
707     // Move the tmp dir in the expected location
708     if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
709       if (!fs.rename(mergedRegionTmpDir, regionDir)) {
710         throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
711             + regionDir);
712       }
713     }
714   }
715 
716   // ===========================================================================
717   //  Create/Open/Delete Helpers
718   // ===========================================================================
719   /**
720    * Log the current state of the region
721    * @param LOG log to output information
722    * @throws IOException if an unexpected exception occurs
723    */
724   void logFileSystemState(final Log LOG) throws IOException {
725     FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
726   }
727 
728   /**
729    * @param hri
730    * @return Content of the file we write out to the filesystem under a region
731    * @throws IOException
732    */
733   private static byte[] getRegionInfoFileContent(final HRegionInfo hri) throws IOException {
734     return hri.toDelimitedByteArray();
735   }
736 
737   /**
738    * Create a {@link HRegionInfo} from the serialized version on-disk.
739    * @param fs {@link FileSystem} that contains the Region Info file
740    * @param regionDir {@link Path} to the Region Directory that contains the Info file
741    * @return An {@link HRegionInfo} instance gotten from the Region Info file.
742    * @throws IOException if an error occurred during file open/read operation.
743    */
744   public static HRegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
745       throws IOException {
746     FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
747     try {
748       return HRegionInfo.parseFrom(in);
749     } finally {
750       in.close();
751     }
752   }
753 
754   /**
755    * Write the .regioninfo file on-disk.
756    */
757   private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
758       final Path regionInfoFile, final byte[] content) throws IOException {
759     // First check to get the permissions
760     FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
761     // Write the RegionInfo file content
762     FSDataOutputStream out = FSUtils.create(fs, regionInfoFile, perms, null);
763     try {
764       out.write(content);
765     } finally {
766       out.close();
767     }
768   }
769 
770   /**
771    * Write out an info file under the stored region directory. Useful recovering mangled regions.
772    * If the regionInfo already exists on-disk, then we fast exit.
773    */
774   void checkRegionInfoOnFilesystem() throws IOException {
775     // Compose the content of the file so we can compare to length in filesystem. If not same,
776     // rewrite it (it may have been written in the old format using Writables instead of pb). The
777     // pb version is much shorter -- we write now w/o the toString version -- so checking length
778     // only should be sufficient. I don't want to read the file every time to check if it pb
779     // serialized.
780     byte[] content = getRegionInfoFileContent(regionInfoForFs);
781     try {
782       Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
783 
784       FileStatus status = fs.getFileStatus(regionInfoFile);
785       if (status != null && status.getLen() == content.length) {
786         // Then assume the content good and move on.
787         // NOTE: that the length is not sufficient to define the the content matches.
788         return;
789       }
790 
791       LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
792       if (!fs.delete(regionInfoFile, false)) {
793         throw new IOException("Unable to remove existing " + regionInfoFile);
794       }
795     } catch (FileNotFoundException e) {
796       LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() +
797           " on table " + regionInfo.getTable());
798     }
799 
800     // Write HRI to a file in case we need to recover hbase:meta
801     writeRegionInfoOnFilesystem(content, true);
802   }
803 
804   /**
805    * Write out an info file under the region directory. Useful recovering mangled regions.
806    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
807    */
808   private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
809     byte[] content = getRegionInfoFileContent(regionInfoForFs);
810     writeRegionInfoOnFilesystem(content, useTempDir);
811   }
812 
813   /**
814    * Write out an info file under the region directory. Useful recovering mangled regions.
815    * @param regionInfoContent serialized version of the {@link HRegionInfo}
816    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
817    */
818   private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
819       final boolean useTempDir) throws IOException {
820     Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
821     if (useTempDir) {
822       // Create in tmpDir and then move into place in case we crash after
823       // create but before close. If we don't successfully close the file,
824       // subsequent region reopens will fail the below because create is
825       // registered in NN.
826 
827       // And then create the file
828       Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
829 
830       // If datanode crashes or if the RS goes down just before the close is called while trying to
831       // close the created regioninfo file in the .tmp directory then on next
832       // creation we will be getting AlreadyCreatedException.
833       // Hence delete and create the file if exists.
834       if (FSUtils.isExists(fs, tmpPath)) {
835         FSUtils.delete(fs, tmpPath, true);
836       }
837 
838       // Write HRI to a file in case we need to recover hbase:meta
839       writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
840 
841       // Move the created file to the original path
842       if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
843         throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
844       }
845     } else {
846       // Write HRI to a file in case we need to recover hbase:meta
847       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
848     }
849   }
850 
851   /**
852    * Create a new Region on file-system.
853    * @param conf the {@link Configuration} to use
854    * @param fs {@link FileSystem} from which to add the region
855    * @param tableDir {@link Path} to where the table is being stored
856    * @param regionInfo {@link HRegionInfo} for region to be added
857    * @throws IOException if the region creation fails due to a FileSystem exception.
858    */
859   public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
860       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
861     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
862     Path regionDir = regionFs.getRegionDir();
863 
864     if (fs.exists(regionDir)) {
865       LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
866       throw new IOException("The specified region already exists on disk: " + regionDir);
867     }
868 
869     // Create the region directory
870     if (!createDirOnFileSystem(fs, conf, regionDir)) {
871       LOG.warn("Unable to create the region directory: " + regionDir);
872       throw new IOException("Unable to create region directory: " + regionDir);
873     }
874 
875     // Write HRI to a file in case we need to recover hbase:meta
876     regionFs.writeRegionInfoOnFilesystem(false);
877     return regionFs;
878   }
879 
880   /**
881    * Open Region from file-system.
882    * @param conf the {@link Configuration} to use
883    * @param fs {@link FileSystem} from which to add the region
884    * @param tableDir {@link Path} to where the table is being stored
885    * @param regionInfo {@link HRegionInfo} for region to be added
886    * @param readOnly True if you don't want to edit the region data
887    * @throws IOException if the region creation fails due to a FileSystem exception.
888    */
889   public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
890       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo, boolean readOnly)
891       throws IOException {
892     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
893     Path regionDir = regionFs.getRegionDir();
894 
895     if (!fs.exists(regionDir)) {
896       LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
897       throw new IOException("The specified region do not exists on disk: " + regionDir);
898     }
899 
900     if (!readOnly) {
901       // Cleanup temporary directories
902       regionFs.cleanupTempDir();
903       regionFs.cleanupSplitsDir();
904       regionFs.cleanupMergesDir();
905 
906       // if it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
907       regionFs.checkRegionInfoOnFilesystem();
908     }
909 
910     return regionFs;
911   }
912 
913   /**
914    * Remove the region from the table directory, archiving the region's hfiles.
915    * @param conf the {@link Configuration} to use
916    * @param fs {@link FileSystem} from which to remove the region
917    * @param tableDir {@link Path} to where the table is being stored
918    * @param regionInfo {@link HRegionInfo} for region to be deleted
919    * @throws IOException if the request cannot be completed
920    */
921   public static void deleteRegionFromFileSystem(final Configuration conf,
922       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
923     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
924     Path regionDir = regionFs.getRegionDir();
925 
926     if (!fs.exists(regionDir)) {
927       LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
928       return;
929     }
930 
931     if (LOG.isDebugEnabled()) {
932       LOG.debug("DELETING region " + regionDir);
933     }
934 
935     // Archive region
936     Path rootDir = FSUtils.getRootDir(conf);
937     HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
938 
939     // Delete empty region dir
940     if (!fs.delete(regionDir, true)) {
941       LOG.warn("Failed delete of " + regionDir);
942     }
943   }
944 
945   /**
946    * Creates a directory. Assumes the user has already checked for this directory existence.
947    * @param dir
948    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
949    *         whether the directory exists or not, and returns true if it exists.
950    * @throws IOException
951    */
952   boolean createDir(Path dir) throws IOException {
953     int i = 0;
954     IOException lastIOE = null;
955     do {
956       try {
957         return fs.mkdirs(dir);
958       } catch (IOException ioe) {
959         lastIOE = ioe;
960         if (fs.exists(dir)) return true; // directory is present
961         try {
962           sleepBeforeRetry("Create Directory", i+1);
963         } catch (InterruptedException e) {
964           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
965         }
966       }
967     } while (++i <= hdfsClientRetriesNumber);
968     throw new IOException("Exception in createDir", lastIOE);
969   }
970 
971   /**
972    * Renames a directory. Assumes the user has already checked for this directory existence.
973    * @param srcpath
974    * @param dstPath
975    * @return true if rename is successful.
976    * @throws IOException
977    */
978   boolean rename(Path srcpath, Path dstPath) throws IOException {
979     IOException lastIOE = null;
980     int i = 0;
981     do {
982       try {
983         return fs.rename(srcpath, dstPath);
984       } catch (IOException ioe) {
985         lastIOE = ioe;
986         if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
987         // dir is not there, retry after some time.
988         try {
989           sleepBeforeRetry("Rename Directory", i+1);
990         } catch (InterruptedException e) {
991           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
992         }
993       }
994     } while (++i <= hdfsClientRetriesNumber);
995 
996     throw new IOException("Exception in rename", lastIOE);
997   }
998 
999   /**
1000    * Deletes a directory. Assumes the user has already checked for this directory existence.
1001    * @param dir
1002    * @return true if the directory is deleted.
1003    * @throws IOException
1004    */
1005   boolean deleteDir(Path dir) throws IOException {
1006     IOException lastIOE = null;
1007     int i = 0;
1008     do {
1009       try {
1010         return fs.delete(dir, true);
1011       } catch (IOException ioe) {
1012         lastIOE = ioe;
1013         if (!fs.exists(dir)) return true;
1014         // dir is there, retry deleting after some time.
1015         try {
1016           sleepBeforeRetry("Delete Directory", i+1);
1017         } catch (InterruptedException e) {
1018           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1019         }
1020       }
1021     } while (++i <= hdfsClientRetriesNumber);
1022 
1023     throw new IOException("Exception in DeleteDir", lastIOE);
1024   }
1025 
1026   /**
1027    * sleeping logic; handles the interrupt exception.
1028    */
1029   private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
1030     sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1031   }
1032 
1033   /**
1034    * Creates a directory for a filesystem and configuration object. Assumes the user has already
1035    * checked for this directory existence.
1036    * @param fs
1037    * @param conf
1038    * @param dir
1039    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1040    *         whether the directory exists or not, and returns true if it exists.
1041    * @throws IOException
1042    */
1043   private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1044       throws IOException {
1045     int i = 0;
1046     IOException lastIOE = null;
1047     int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1048       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1049     int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1050       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1051     do {
1052       try {
1053         return fs.mkdirs(dir);
1054       } catch (IOException ioe) {
1055         lastIOE = ioe;
1056         if (fs.exists(dir)) return true; // directory is present
1057         try {
1058           sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1059         } catch (InterruptedException e) {
1060           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1061         }
1062       }
1063     } while (++i <= hdfsClientRetriesNumber);
1064 
1065     throw new IOException("Exception in createDir", lastIOE);
1066   }
1067 
1068   /**
1069    * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1070    * for this to avoid re-looking for the integer values.
1071    */
1072   private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1073       int hdfsClientRetriesNumber) throws InterruptedException {
1074     if (sleepMultiplier > hdfsClientRetriesNumber) {
1075       LOG.debug(msg + ", retries exhausted");
1076       return;
1077     }
1078     LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1079     Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1080   }
1081 }