View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.io.InterruptedIOException;
25  import java.util.ArrayList;
26  import java.util.Collection;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.UUID;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.FileUtil;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.fs.permission.FsPermission;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.HTableDescriptor;
46  import org.apache.hadoop.hbase.KeyValue;
47  import org.apache.hadoop.hbase.KeyValueUtil;
48  import org.apache.hadoop.hbase.backup.HFileArchiver;
49  import org.apache.hadoop.hbase.fs.HFileSystem;
50  import org.apache.hadoop.hbase.io.Reference;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.FSHDFSUtils;
53  import org.apache.hadoop.hbase.util.FSUtils;
54  import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
55  
56  /**
57   * View to an on-disk Region.
58   * Provides the set of methods necessary to interact with the on-disk region data.
59   */
60  @InterfaceAudience.Private
61  public class HRegionFileSystem {
62    public static final Log LOG = LogFactory.getLog(HRegionFileSystem.class);
63  
64    /** Name of the region info file that resides just under the region directory. */
65    public final static String REGION_INFO_FILE = ".regioninfo";
66  
67    /** Temporary subdirectory of the region directory used for merges. */
68    public static final String REGION_MERGES_DIR = ".merges";
69  
70    /** Temporary subdirectory of the region directory used for splits. */
71    public static final String REGION_SPLITS_DIR = ".splits";
72  
73    /** Temporary subdirectory of the region directory used for compaction output. */
74    private static final String REGION_TEMP_DIR = ".tmp";
75  
76    private final HRegionInfo regionInfo;
77    //regionInfo for interacting with FS (getting encodedName, etc)
78    private final HRegionInfo regionInfoForFs;
79    private final Configuration conf;
80    private final Path tableDir;
81    private final FileSystem fs;
82  
83    /**
84     * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
85     * client level.
86     */
87    private final int hdfsClientRetriesNumber;
88    private final int baseSleepBeforeRetries;
89    private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
90    private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
91  
92    /**
93     * Create a view to the on-disk region
94     * @param conf the {@link Configuration} to use
95     * @param fs {@link FileSystem} that contains the region
96     * @param tableDir {@link Path} to where the table is being stored
97     * @param regionInfo {@link HRegionInfo} for region
98     */
99    HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
100       final HRegionInfo regionInfo) {
101     this.fs = fs;
102     this.conf = conf;
103     this.tableDir = tableDir;
104     this.regionInfo = regionInfo;
105     this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo);
106     this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
107       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
108     this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
109       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
110  }
111 
112   /** @return the underlying {@link FileSystem} */
113   public FileSystem getFileSystem() {
114     return this.fs;
115   }
116 
117   /** @return the {@link HRegionInfo} that describe this on-disk region view */
118   public HRegionInfo getRegionInfo() {
119     return this.regionInfo;
120   }
121 
122   /** @return {@link Path} to the region's root directory. */
123   public Path getTableDir() {
124     return this.tableDir;
125   }
126 
127   /** @return {@link Path} to the region directory. */
128   public Path getRegionDir() {
129     return new Path(this.tableDir, this.regionInfoForFs.getEncodedName());
130   }
131 
132   // ===========================================================================
133   //  Temp Helpers
134   // ===========================================================================
135   /** @return {@link Path} to the region's temp directory, used for file creations */
136   Path getTempDir() {
137     return new Path(getRegionDir(), REGION_TEMP_DIR);
138   }
139 
140   /**
141    * Clean up any temp detritus that may have been left around from previous operation attempts.
142    */
143   void cleanupTempDir() throws IOException {
144     deleteDir(getTempDir());
145   }
146 
147   // ===========================================================================
148   //  Store/StoreFile Helpers
149   // ===========================================================================
150   /**
151    * Returns the directory path of the specified family
152    * @param familyName Column Family Name
153    * @return {@link Path} to the directory of the specified family
154    */
155   public Path getStoreDir(final String familyName) {
156     return new Path(this.getRegionDir(), familyName);
157   }
158 
159   /**
160    * Create the store directory for the specified family name
161    * @param familyName Column Family Name
162    * @return {@link Path} to the directory of the specified family
163    * @throws IOException if the directory creation fails.
164    */
165   Path createStoreDir(final String familyName) throws IOException {
166     Path storeDir = getStoreDir(familyName);
167     if(!fs.exists(storeDir) && !createDir(storeDir))
168       throw new IOException("Failed creating "+storeDir);
169     return storeDir;
170   }
171 
172   /**
173    * Returns the store files available for the family.
174    * This methods performs the filtering based on the valid store files.
175    * @param familyName Column Family Name
176    * @return a set of {@link StoreFileInfo} for the specified family.
177    */
178   public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
179     return getStoreFiles(Bytes.toString(familyName));
180   }
181 
182   public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
183     return getStoreFiles(familyName, true);
184   }
185 
186   /**
187    * Returns the store files available for the family.
188    * This methods performs the filtering based on the valid store files.
189    * @param familyName Column Family Name
190    * @return a set of {@link StoreFileInfo} for the specified family.
191    */
192   public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate)
193       throws IOException {
194     Path familyDir = getStoreDir(familyName);
195     FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
196     if (files == null) {
197       LOG.debug("No StoreFiles for: " + familyDir);
198       return null;
199     }
200 
201     ArrayList<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>(files.length);
202     for (FileStatus status: files) {
203       if (validate && !StoreFileInfo.isValid(status)) {
204         LOG.warn("Invalid StoreFile: " + status.getPath());
205         continue;
206       }
207       StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
208         regionInfoForFs, familyName, status);
209       storeFiles.add(info);
210 
211     }
212     return storeFiles;
213   }
214 
215   /**
216    * Return Qualified Path of the specified family/file
217    *
218    * @param familyName Column Family Name
219    * @param fileName File Name
220    * @return The qualified Path for the specified family/file
221    */
222   Path getStoreFilePath(final String familyName, final String fileName) {
223     Path familyDir = getStoreDir(familyName);
224     return new Path(familyDir, fileName).makeQualified(this.fs);
225   }
226 
227   /**
228    * Return the store file information of the specified family/file.
229    *
230    * @param familyName Column Family Name
231    * @param fileName File Name
232    * @return The {@link StoreFileInfo} for the specified family/file
233    */
234   StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
235       throws IOException {
236     Path familyDir = getStoreDir(familyName);
237     FileStatus status = fs.getFileStatus(new Path(familyDir, fileName));
238     return new StoreFileInfo(this.conf, this.fs, status);
239   }
240 
241   /**
242    * Returns true if the specified family has reference files
243    * @param familyName Column Family Name
244    * @return true if family contains reference files
245    * @throws IOException
246    */
247   public boolean hasReferences(final String familyName) throws IOException {
248     FileStatus[] files = FSUtils.listStatus(fs, getStoreDir(familyName),
249         new FSUtils.ReferenceFileFilter(fs));
250     return files != null && files.length > 0;
251   }
252 
253   /**
254    * Check whether region has Reference file
255    * @param htd table desciptor of the region
256    * @return true if region has reference file
257    * @throws IOException
258    */
259   public boolean hasReferences(final HTableDescriptor htd) throws IOException {
260     for (HColumnDescriptor family : htd.getFamilies()) {
261       if (hasReferences(family.getNameAsString())) {
262         return true;
263       }
264     }
265     return false;
266   }
267 
268   /**
269    * @return the set of families present on disk
270    * @throws IOException
271    */
272   public Collection<String> getFamilies() throws IOException {
273     FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
274     if (fds == null) return null;
275 
276     ArrayList<String> families = new ArrayList<String>(fds.length);
277     for (FileStatus status: fds) {
278       families.add(status.getPath().getName());
279     }
280 
281     return families;
282   }
283 
284   /**
285    * Remove the region family from disk, archiving the store files.
286    * @param familyName Column Family Name
287    * @throws IOException if an error occours during the archiving
288    */
289   public void deleteFamily(final String familyName) throws IOException {
290     // archive family store files
291     HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName));
292 
293     // delete the family folder
294     Path familyDir = getStoreDir(familyName);
295     if(fs.exists(familyDir) && !deleteDir(familyDir))
296       throw new IOException("Could not delete family " + familyName
297           + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "("
298           + regionInfoForFs.getEncodedName() + ")");
299   }
300 
301   /**
302    * Generate a unique file name, used by createTempName() and commitStoreFile()
303    * @param suffix extra information to append to the generated name
304    * @return Unique file name
305    */
306   private static String generateUniqueName(final String suffix) {
307     String name = UUID.randomUUID().toString().replaceAll("-", "");
308     if (suffix != null) name += suffix;
309     return name;
310   }
311 
312   /**
313    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
314    * to get a safer file creation.
315    * <code>
316    * Path file = fs.createTempName();
317    * ...StoreFile.Writer(file)...
318    * fs.commitStoreFile("family", file);
319    * </code>
320    *
321    * @return Unique {@link Path} of the temporary file
322    */
323   public Path createTempName() {
324     return createTempName(null);
325   }
326 
327   /**
328    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
329    * to get a safer file creation.
330    * <code>
331    * Path file = fs.createTempName();
332    * ...StoreFile.Writer(file)...
333    * fs.commitStoreFile("family", file);
334    * </code>
335    *
336    * @param suffix extra information to append to the generated name
337    * @return Unique {@link Path} of the temporary file
338    */
339   public Path createTempName(final String suffix) {
340     return new Path(getTempDir(), generateUniqueName(suffix));
341   }
342 
343   /**
344    * Move the file from a build/temp location to the main family store directory.
345    * @param familyName Family that will gain the file
346    * @param buildPath {@link Path} to the file to commit.
347    * @return The new {@link Path} of the committed file
348    * @throws IOException
349    */
350   public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
351     return commitStoreFile(familyName, buildPath, -1, false);
352   }
353 
354   /**
355    * Move the file from a build/temp location to the main family store directory.
356    * @param familyName Family that will gain the file
357    * @param buildPath {@link Path} to the file to commit.
358    * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
359    * @param generateNewName False if you want to keep the buildPath name
360    * @return The new {@link Path} of the committed file
361    * @throws IOException
362    */
363   private Path commitStoreFile(final String familyName, final Path buildPath,
364       final long seqNum, final boolean generateNewName) throws IOException {
365     Path storeDir = getStoreDir(familyName);
366     if(!fs.exists(storeDir) && !createDir(storeDir))
367       throw new IOException("Failed creating " + storeDir);
368 
369     String name = buildPath.getName();
370     if (generateNewName) {
371       name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
372     }
373     Path dstPath = new Path(storeDir, name);
374     if (!fs.exists(buildPath)) {
375       throw new FileNotFoundException(buildPath.toString());
376     }
377     LOG.debug("Committing store file " + buildPath + " as " + dstPath);
378     // buildPath exists, therefore not doing an exists() check.
379     if (!rename(buildPath, dstPath)) {
380       throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
381     }
382     return dstPath;
383   }
384 
385 
386   /**
387    * Moves multiple store files to the relative region's family store directory.
388    * @param storeFiles list of store files divided by family
389    * @throws IOException
390    */
391   void commitStoreFiles(final Map<byte[], List<StoreFile>> storeFiles) throws IOException {
392     for (Map.Entry<byte[], List<StoreFile>> es: storeFiles.entrySet()) {
393       String familyName = Bytes.toString(es.getKey());
394       for (StoreFile sf: es.getValue()) {
395         commitStoreFile(familyName, sf.getPath());
396       }
397     }
398   }
399 
400   /**
401    * Archives the specified store file from the specified family.
402    * @param familyName Family that contains the store files
403    * @param filePath {@link Path} to the store file to remove
404    * @throws IOException if the archiving fails
405    */
406   public void removeStoreFile(final String familyName, final Path filePath)
407       throws IOException {
408     HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs,
409         this.tableDir, Bytes.toBytes(familyName), filePath);
410   }
411 
412   /**
413    * Closes and archives the specified store files from the specified family.
414    * @param familyName Family that contains the store files
415    * @param storeFiles set of store files to remove
416    * @throws IOException if the archiving fails
417    */
418   public void removeStoreFiles(final String familyName, final Collection<StoreFile> storeFiles)
419       throws IOException {
420     HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs,
421         this.tableDir, Bytes.toBytes(familyName), storeFiles);
422   }
423 
424   /**
425    * Bulk load: Add a specified store file to the specified family.
426    * If the source file is on the same different file-system is moved from the
427    * source location to the destination location, otherwise is copied over.
428    *
429    * @param familyName Family that will gain the file
430    * @param srcPath {@link Path} to the file to import
431    * @param seqNum Bulk Load sequence number
432    * @return The destination {@link Path} of the bulk loaded file
433    * @throws IOException
434    */
435   Path bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
436       throws IOException {
437     // Copy the file if it's on another filesystem
438     FileSystem srcFs = srcPath.getFileSystem(conf);
439     FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
440 
441     // We can't compare FileSystem instances as equals() includes UGI instance
442     // as part of the comparison and won't work when doing SecureBulkLoad
443     // TODO deal with viewFS
444     if (!FSHDFSUtils.isSameHdfs(conf, srcFs, desFs)) {
445       LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
446           "the destination store. Copying file over to destination filesystem.");
447       Path tmpPath = createTempName();
448       FileUtil.copy(srcFs, srcPath, fs, tmpPath, false, conf);
449       LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
450       srcPath = tmpPath;
451     }
452 
453     return commitStoreFile(familyName, srcPath, seqNum, true);
454   }
455 
456   // ===========================================================================
457   //  Splits Helpers
458   // ===========================================================================
459   /** @return {@link Path} to the temp directory used during split operations */
460   Path getSplitsDir() {
461     return new Path(getRegionDir(), REGION_SPLITS_DIR);
462   }
463 
464   Path getSplitsDir(final HRegionInfo hri) {
465     return new Path(getSplitsDir(), hri.getEncodedName());
466   }
467 
468   /**
469    * Clean up any split detritus that may have been left around from previous split attempts.
470    */
471   void cleanupSplitsDir() throws IOException {
472     deleteDir(getSplitsDir());
473   }
474 
475   /**
476    * Clean up any split detritus that may have been left around from previous
477    * split attempts.
478    * Call this method on initial region deploy.
479    * @throws IOException
480    */
481   void cleanupAnySplitDetritus() throws IOException {
482     Path splitdir = this.getSplitsDir();
483     if (!fs.exists(splitdir)) return;
484     // Look at the splitdir.  It could have the encoded names of the daughter
485     // regions we tried to make.  See if the daughter regions actually got made
486     // out under the tabledir.  If here under splitdir still, then the split did
487     // not complete.  Try and do cleanup.  This code WILL NOT catch the case
488     // where we successfully created daughter a but regionserver crashed during
489     // the creation of region b.  In this case, there'll be an orphan daughter
490     // dir in the filesystem.  TOOD: Fix.
491     FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
492     if (daughters != null) {
493       for (FileStatus daughter: daughters) {
494         Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
495         if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
496           throw new IOException("Failed delete of " + daughterDir);
497         }
498       }
499     }
500     cleanupSplitsDir();
501     LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
502   }
503 
504   /**
505    * Remove daughter region
506    * @param regionInfo daughter {@link HRegionInfo}
507    * @throws IOException
508    */
509   void cleanupDaughterRegion(final HRegionInfo regionInfo) throws IOException {
510     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
511     if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
512       throw new IOException("Failed delete of " + regionDir);
513     }
514   }
515 
516   /**
517    * Commit a daughter region, moving it from the split temporary directory
518    * to the proper location in the filesystem.
519    *
520    * @param regionInfo                 daughter {@link org.apache.hadoop.hbase.HRegionInfo}
521    * @throws IOException
522    */
523   Path commitDaughterRegion(final HRegionInfo regionInfo)
524       throws IOException {
525     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
526     Path daughterTmpDir = this.getSplitsDir(regionInfo);
527 
528     if (fs.exists(daughterTmpDir)) {
529 
530       // Write HRI to a file in case we need to recover hbase:meta
531       Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
532       byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
533       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
534 
535       // Move the daughter temp dir to the table dir
536       if (!rename(daughterTmpDir, regionDir)) {
537         throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
538       }
539     }
540 
541     return regionDir;
542   }
543 
544   /**
545    * Create the region splits directory.
546    */
547   void createSplitsDir() throws IOException {
548     Path splitdir = getSplitsDir();
549     if (fs.exists(splitdir)) {
550       LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
551       if (!deleteDir(splitdir)) {
552         throw new IOException("Failed deletion of " + splitdir
553             + " before creating them again.");
554       }
555     }
556     // splitDir doesn't exists now. No need to do an exists() call for it.
557     if (!createDir(splitdir)) {
558       throw new IOException("Failed create of " + splitdir);
559     }
560   }
561 
562   /**
563    * Write out a split reference. Package local so it doesnt leak out of
564    * regionserver.
565    * @param hri {@link HRegionInfo} of the destination
566    * @param familyName Column Family Name
567    * @param f File to split.
568    * @param splitRow Split Row
569    * @param top True if we are referring to the top half of the hfile.
570    * @param splitPolicy
571    * @return Path to created reference.
572    * @throws IOException
573    */
574   Path splitStoreFile(final HRegionInfo hri, final String familyName, final StoreFile f,
575       final byte[] splitRow, final boolean top, RegionSplitPolicy splitPolicy)
576           throws IOException {
577 
578     if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck()) {
579       // Check whether the split row lies in the range of the store file
580       // If it is outside the range, return directly.
581       if (top) {
582         //check if larger than last key.
583         KeyValue splitKey = KeyValueUtil.createFirstOnRow(splitRow);
584         byte[] lastKey = f.createReader().getLastKey();
585         // If lastKey is null means storefile is empty.
586         if (lastKey == null) return null;
587         if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(),
588           splitKey.getKeyOffset(), splitKey.getKeyLength(), lastKey, 0, lastKey.length) > 0) {
589           return null;
590         }
591       } else {
592         //check if smaller than first key
593         KeyValue splitKey = KeyValueUtil.createLastOnRow(splitRow);
594         byte[] firstKey = f.createReader().getFirstKey();
595         // If firstKey is null means storefile is empty.
596         if (firstKey == null) return null;
597         if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(),
598           splitKey.getKeyOffset(), splitKey.getKeyLength(), firstKey, 0, firstKey.length) < 0) {
599           return null;
600         }
601       }
602     }
603 
604     f.closeReader(true);
605 
606     Path splitDir = new Path(getSplitsDir(hri), familyName);
607     // A reference to the bottom half of the hsf store file.
608     Reference r =
609       top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
610     // Add the referred-to regions name as a dot separated suffix.
611     // See REF_NAME_REGEX regex above.  The referred-to regions name is
612     // up in the path of the passed in <code>f</code> -- parentdir is family,
613     // then the directory above is the region name.
614     String parentRegionName = regionInfoForFs.getEncodedName();
615     // Write reference with same file id only with the other region name as
616     // suffix and into the new region location (under same family).
617     Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
618     return r.write(fs, p);
619   }
620 
621   // ===========================================================================
622   //  Merge Helpers
623   // ===========================================================================
624   /** @return {@link Path} to the temp directory used during merge operations */
625   Path getMergesDir() {
626     return new Path(getRegionDir(), REGION_MERGES_DIR);
627   }
628 
629   Path getMergesDir(final HRegionInfo hri) {
630     return new Path(getMergesDir(), hri.getEncodedName());
631   }
632 
633   /**
634    * Clean up any merge detritus that may have been left around from previous merge attempts.
635    */
636   void cleanupMergesDir() throws IOException {
637     deleteDir(getMergesDir());
638   }
639 
640   /**
641    * Remove merged region
642    * @param mergedRegion {@link HRegionInfo}
643    * @throws IOException
644    */
645   void cleanupMergedRegion(final HRegionInfo mergedRegion) throws IOException {
646     Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
647     if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
648       throw new IOException("Failed delete of " + regionDir);
649     }
650   }
651 
652   /**
653    * Create the region merges directory.
654    * @throws IOException If merges dir already exists or we fail to create it.
655    * @see HRegionFileSystem#cleanupMergesDir()
656    */
657   void createMergesDir() throws IOException {
658     Path mergesdir = getMergesDir();
659     if (fs.exists(mergesdir)) {
660       LOG.info("The " + mergesdir
661           + " directory exists.  Hence deleting it to recreate it");
662       if (!fs.delete(mergesdir, true)) {
663         throw new IOException("Failed deletion of " + mergesdir
664             + " before creating them again.");
665       }
666     }
667     if (!fs.mkdirs(mergesdir))
668       throw new IOException("Failed create of " + mergesdir);
669   }
670 
671   /**
672    * Write out a merge reference under the given merges directory. Package local
673    * so it doesnt leak out of regionserver.
674    * @param mergedRegion {@link HRegionInfo} of the merged region
675    * @param familyName Column Family Name
676    * @param f File to create reference.
677    * @param mergedDir
678    * @return Path to created reference.
679    * @throws IOException
680    */
681   Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName,
682       final StoreFile f, final Path mergedDir)
683       throws IOException {
684     Path referenceDir = new Path(new Path(mergedDir,
685         mergedRegion.getEncodedName()), familyName);
686     // A whole reference to the store file.
687     Reference r = Reference.createTopReference(regionInfoForFs.getStartKey());
688     // Add the referred-to regions name as a dot separated suffix.
689     // See REF_NAME_REGEX regex above. The referred-to regions name is
690     // up in the path of the passed in <code>f</code> -- parentdir is family,
691     // then the directory above is the region name.
692     String mergingRegionName = regionInfoForFs.getEncodedName();
693     // Write reference with same file id only with the other region name as
694     // suffix and into the new region location (under same family).
695     Path p = new Path(referenceDir, f.getPath().getName() + "."
696         + mergingRegionName);
697     return r.write(fs, p);
698   }
699 
700   /**
701    * Commit a merged region, moving it from the merges temporary directory to
702    * the proper location in the filesystem.
703    * @param mergedRegionInfo merged region {@link HRegionInfo}
704    * @throws IOException
705    */
706   void commitMergedRegion(final HRegionInfo mergedRegionInfo) throws IOException {
707     Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
708     Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
709     // Move the tmp dir in the expected location
710     if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
711       if (!fs.rename(mergedRegionTmpDir, regionDir)) {
712         throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
713             + regionDir);
714       }
715     }
716   }
717 
718   // ===========================================================================
719   //  Create/Open/Delete Helpers
720   // ===========================================================================
721   /**
722    * Log the current state of the region
723    * @param LOG log to output information
724    * @throws IOException if an unexpected exception occurs
725    */
726   void logFileSystemState(final Log LOG) throws IOException {
727     FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
728   }
729 
730   /**
731    * @param hri
732    * @return Content of the file we write out to the filesystem under a region
733    * @throws IOException
734    */
735   private static byte[] getRegionInfoFileContent(final HRegionInfo hri) throws IOException {
736     return hri.toDelimitedByteArray();
737   }
738 
739   /**
740    * Create a {@link HRegionInfo} from the serialized version on-disk.
741    * @param fs {@link FileSystem} that contains the Region Info file
742    * @param regionDir {@link Path} to the Region Directory that contains the Info file
743    * @return An {@link HRegionInfo} instance gotten from the Region Info file.
744    * @throws IOException if an error occurred during file open/read operation.
745    */
746   public static HRegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
747       throws IOException {
748     FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
749     try {
750       return HRegionInfo.parseFrom(in);
751     } finally {
752       in.close();
753     }
754   }
755 
756   /**
757    * Write the .regioninfo file on-disk.
758    */
759   private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
760       final Path regionInfoFile, final byte[] content) throws IOException {
761     // First check to get the permissions
762     FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
763     // Write the RegionInfo file content
764     FSDataOutputStream out = FSUtils.create(fs, regionInfoFile, perms, null);
765     try {
766       out.write(content);
767     } finally {
768       out.close();
769     }
770   }
771 
772   /**
773    * Write out an info file under the stored region directory. Useful recovering mangled regions.
774    * If the regionInfo already exists on-disk, then we fast exit.
775    */
776   void checkRegionInfoOnFilesystem() throws IOException {
777     // Compose the content of the file so we can compare to length in filesystem. If not same,
778     // rewrite it (it may have been written in the old format using Writables instead of pb). The
779     // pb version is much shorter -- we write now w/o the toString version -- so checking length
780     // only should be sufficient. I don't want to read the file every time to check if it pb
781     // serialized.
782     byte[] content = getRegionInfoFileContent(regionInfoForFs);
783     try {
784       Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
785 
786       FileStatus status = fs.getFileStatus(regionInfoFile);
787       if (status != null && status.getLen() == content.length) {
788         // Then assume the content good and move on.
789         // NOTE: that the length is not sufficient to define the the content matches.
790         return;
791       }
792 
793       LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
794       if (!fs.delete(regionInfoFile, false)) {
795         throw new IOException("Unable to remove existing " + regionInfoFile);
796       }
797     } catch (FileNotFoundException e) {
798       LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() +
799           " on table " + regionInfo.getTable());
800     }
801 
802     // Write HRI to a file in case we need to recover hbase:meta
803     writeRegionInfoOnFilesystem(content, true);
804   }
805 
806   /**
807    * Write out an info file under the region directory. Useful recovering mangled regions.
808    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
809    */
810   private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
811     byte[] content = getRegionInfoFileContent(regionInfoForFs);
812     writeRegionInfoOnFilesystem(content, useTempDir);
813   }
814 
815   /**
816    * Write out an info file under the region directory. Useful recovering mangled regions.
817    * @param regionInfoContent serialized version of the {@link HRegionInfo}
818    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
819    */
820   private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
821       final boolean useTempDir) throws IOException {
822     Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
823     if (useTempDir) {
824       // Create in tmpDir and then move into place in case we crash after
825       // create but before close. If we don't successfully close the file,
826       // subsequent region reopens will fail the below because create is
827       // registered in NN.
828 
829       // And then create the file
830       Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
831 
832       // If datanode crashes or if the RS goes down just before the close is called while trying to
833       // close the created regioninfo file in the .tmp directory then on next
834       // creation we will be getting AlreadyCreatedException.
835       // Hence delete and create the file if exists.
836       if (FSUtils.isExists(fs, tmpPath)) {
837         FSUtils.delete(fs, tmpPath, true);
838       }
839 
840       // Write HRI to a file in case we need to recover hbase:meta
841       writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
842 
843       // Move the created file to the original path
844       if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
845         throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
846       }
847     } else {
848       // Write HRI to a file in case we need to recover hbase:meta
849       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
850     }
851   }
852 
853   /**
854    * Create a new Region on file-system.
855    * @param conf the {@link Configuration} to use
856    * @param fs {@link FileSystem} from which to add the region
857    * @param tableDir {@link Path} to where the table is being stored
858    * @param regionInfo {@link HRegionInfo} for region to be added
859    * @throws IOException if the region creation fails due to a FileSystem exception.
860    */
861   public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
862       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
863     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
864     Path regionDir = regionFs.getRegionDir();
865 
866     if (fs.exists(regionDir)) {
867       LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
868       throw new IOException("The specified region already exists on disk: " + regionDir);
869     }
870 
871     // Create the region directory
872     if (!createDirOnFileSystem(fs, conf, regionDir)) {
873       LOG.warn("Unable to create the region directory: " + regionDir);
874       throw new IOException("Unable to create region directory: " + regionDir);
875     }
876 
877     // Write HRI to a file in case we need to recover hbase:meta
878     regionFs.writeRegionInfoOnFilesystem(false);
879     return regionFs;
880   }
881 
882   /**
883    * Open Region from file-system.
884    * @param conf the {@link Configuration} to use
885    * @param fs {@link FileSystem} from which to add the region
886    * @param tableDir {@link Path} to where the table is being stored
887    * @param regionInfo {@link HRegionInfo} for region to be added
888    * @param readOnly True if you don't want to edit the region data
889    * @throws IOException if the region creation fails due to a FileSystem exception.
890    */
891   public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
892       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo, boolean readOnly)
893       throws IOException {
894     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
895     Path regionDir = regionFs.getRegionDir();
896 
897     if (!fs.exists(regionDir)) {
898       LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
899       throw new IOException("The specified region do not exists on disk: " + regionDir);
900     }
901 
902     if (!readOnly) {
903       // Cleanup temporary directories
904       regionFs.cleanupTempDir();
905       regionFs.cleanupSplitsDir();
906       regionFs.cleanupMergesDir();
907 
908       // if it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
909       regionFs.checkRegionInfoOnFilesystem();
910     }
911 
912     return regionFs;
913   }
914 
915   /**
916    * Remove the region from the table directory, archiving the region's hfiles.
917    * @param conf the {@link Configuration} to use
918    * @param fs {@link FileSystem} from which to remove the region
919    * @param tableDir {@link Path} to where the table is being stored
920    * @param regionInfo {@link HRegionInfo} for region to be deleted
921    * @throws IOException if the request cannot be completed
922    */
923   public static void deleteRegionFromFileSystem(final Configuration conf,
924       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
925     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
926     Path regionDir = regionFs.getRegionDir();
927 
928     if (!fs.exists(regionDir)) {
929       LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
930       return;
931     }
932 
933     if (LOG.isDebugEnabled()) {
934       LOG.debug("DELETING region " + regionDir);
935     }
936 
937     // Archive region
938     Path rootDir = FSUtils.getRootDir(conf);
939     HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
940 
941     // Delete empty region dir
942     if (!fs.delete(regionDir, true)) {
943       LOG.warn("Failed delete of " + regionDir);
944     }
945   }
946 
947   /**
948    * Creates a directory. Assumes the user has already checked for this directory existence.
949    * @param dir
950    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
951    *         whether the directory exists or not, and returns true if it exists.
952    * @throws IOException
953    */
954   boolean createDir(Path dir) throws IOException {
955     int i = 0;
956     IOException lastIOE = null;
957     do {
958       try {
959         return fs.mkdirs(dir);
960       } catch (IOException ioe) {
961         lastIOE = ioe;
962         if (fs.exists(dir)) return true; // directory is present
963         try {
964           sleepBeforeRetry("Create Directory", i+1);
965         } catch (InterruptedException e) {
966           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
967         }
968       }
969     } while (++i <= hdfsClientRetriesNumber);
970     throw new IOException("Exception in createDir", lastIOE);
971   }
972 
973   /**
974    * Renames a directory. Assumes the user has already checked for this directory existence.
975    * @param srcpath
976    * @param dstPath
977    * @return true if rename is successful.
978    * @throws IOException
979    */
980   boolean rename(Path srcpath, Path dstPath) throws IOException {
981     IOException lastIOE = null;
982     int i = 0;
983     do {
984       try {
985         return fs.rename(srcpath, dstPath);
986       } catch (IOException ioe) {
987         lastIOE = ioe;
988         if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
989         // dir is not there, retry after some time.
990         try {
991           sleepBeforeRetry("Rename Directory", i+1);
992         } catch (InterruptedException e) {
993           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
994         }
995       }
996     } while (++i <= hdfsClientRetriesNumber);
997 
998     throw new IOException("Exception in rename", lastIOE);
999   }
1000 
1001   /**
1002    * Deletes a directory. Assumes the user has already checked for this directory existence.
1003    * @param dir
1004    * @return true if the directory is deleted.
1005    * @throws IOException
1006    */
1007   boolean deleteDir(Path dir) throws IOException {
1008     IOException lastIOE = null;
1009     int i = 0;
1010     do {
1011       try {
1012         return fs.delete(dir, true);
1013       } catch (IOException ioe) {
1014         lastIOE = ioe;
1015         if (!fs.exists(dir)) return true;
1016         // dir is there, retry deleting after some time.
1017         try {
1018           sleepBeforeRetry("Delete Directory", i+1);
1019         } catch (InterruptedException e) {
1020           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1021         }
1022       }
1023     } while (++i <= hdfsClientRetriesNumber);
1024 
1025     throw new IOException("Exception in DeleteDir", lastIOE);
1026   }
1027 
1028   /**
1029    * sleeping logic; handles the interrupt exception.
1030    */
1031   private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
1032     sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1033   }
1034 
1035   /**
1036    * Creates a directory for a filesystem and configuration object. Assumes the user has already
1037    * checked for this directory existence.
1038    * @param fs
1039    * @param conf
1040    * @param dir
1041    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1042    *         whether the directory exists or not, and returns true if it exists.
1043    * @throws IOException
1044    */
1045   private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1046       throws IOException {
1047     int i = 0;
1048     IOException lastIOE = null;
1049     int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1050       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1051     int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1052       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1053     do {
1054       try {
1055         return fs.mkdirs(dir);
1056       } catch (IOException ioe) {
1057         lastIOE = ioe;
1058         if (fs.exists(dir)) return true; // directory is present
1059         try {
1060           sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1061         } catch (InterruptedException e) {
1062           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1063         }
1064       }
1065     } while (++i <= hdfsClientRetriesNumber);
1066 
1067     throw new IOException("Exception in createDir", lastIOE);
1068   }
1069 
1070   /**
1071    * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1072    * for this to avoid re-looking for the integer values.
1073    */
1074   private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1075       int hdfsClientRetriesNumber) throws InterruptedException {
1076     if (sleepMultiplier > hdfsClientRetriesNumber) {
1077       LOG.debug(msg + ", retries exhausted");
1078       return;
1079     }
1080     LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1081     Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1082   }
1083 }