View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.io.InterruptedIOException;
25  import java.util.ArrayList;
26  import java.util.Collection;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.UUID;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.FileUtil;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.fs.permission.FsPermission;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.HTableDescriptor;
46  import org.apache.hadoop.hbase.KeyValue;
47  import org.apache.hadoop.hbase.KeyValueUtil;
48  import org.apache.hadoop.hbase.backup.HFileArchiver;
49  import org.apache.hadoop.hbase.fs.HFileSystem;
50  import org.apache.hadoop.hbase.io.Reference;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.FSHDFSUtils;
53  import org.apache.hadoop.hbase.util.FSUtils;
54  import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
55  
56  /**
57   * View to an on-disk Region.
58   * Provides the set of methods necessary to interact with the on-disk region data.
59   */
60  @InterfaceAudience.Private
61  public class HRegionFileSystem {
62    public static final Log LOG = LogFactory.getLog(HRegionFileSystem.class);
63  
64    /** Name of the region info file that resides just under the region directory. */
65    public final static String REGION_INFO_FILE = ".regioninfo";
66  
67    /** Temporary subdirectory of the region directory used for merges. */
68    public static final String REGION_MERGES_DIR = ".merges";
69  
70    /** Temporary subdirectory of the region directory used for splits. */
71    public static final String REGION_SPLITS_DIR = ".splits";
72  
73    /** Temporary subdirectory of the region directory used for compaction output. */
74    private static final String REGION_TEMP_DIR = ".tmp";
75  
76    private final HRegionInfo regionInfo;
77    //regionInfo for interacting with FS (getting encodedName, etc)
78    private final HRegionInfo regionInfoForFs;
79    private final Configuration conf;
80    private final Path tableDir;
81    private final FileSystem fs;
82  
83    /**
84     * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
85     * client level.
86     */
87    private final int hdfsClientRetriesNumber;
88    private final int baseSleepBeforeRetries;
89    private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
90    private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
91  
92    /**
93     * Create a view to the on-disk region
94     * @param conf the {@link Configuration} to use
95     * @param fs {@link FileSystem} that contains the region
96     * @param tableDir {@link Path} to where the table is being stored
97     * @param regionInfo {@link HRegionInfo} for region
98     */
99    HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
100       final HRegionInfo regionInfo) {
101     this.fs = fs;
102     this.conf = conf;
103     this.tableDir = tableDir;
104     this.regionInfo = regionInfo;
105     this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo);
106     this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
107       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
108     this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
109       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
110  }
111 
112   /** @return the underlying {@link FileSystem} */
113   public FileSystem getFileSystem() {
114     return this.fs;
115   }
116 
117   /** @return the {@link HRegionInfo} that describe this on-disk region view */
118   public HRegionInfo getRegionInfo() {
119     return this.regionInfo;
120   }
121 
122   public HRegionInfo getRegionInfoForFS() {
123     return this.regionInfoForFs;
124   }
125 
126   /** @return {@link Path} to the region's root directory. */
127   public Path getTableDir() {
128     return this.tableDir;
129   }
130 
131   /** @return {@link Path} to the region directory. */
132   public Path getRegionDir() {
133     return new Path(this.tableDir, this.regionInfoForFs.getEncodedName());
134   }
135 
136   // ===========================================================================
137   //  Temp Helpers
138   // ===========================================================================
139   /** @return {@link Path} to the region's temp directory, used for file creations */
140   Path getTempDir() {
141     return new Path(getRegionDir(), REGION_TEMP_DIR);
142   }
143 
144   /**
145    * Clean up any temp detritus that may have been left around from previous operation attempts.
146    */
147   void cleanupTempDir() throws IOException {
148     deleteDir(getTempDir());
149   }
150 
151   // ===========================================================================
152   //  Store/StoreFile Helpers
153   // ===========================================================================
154   /**
155    * Returns the directory path of the specified family
156    * @param familyName Column Family Name
157    * @return {@link Path} to the directory of the specified family
158    */
159   public Path getStoreDir(final String familyName) {
160     return new Path(this.getRegionDir(), familyName);
161   }
162 
163   /**
164    * Create the store directory for the specified family name
165    * @param familyName Column Family Name
166    * @return {@link Path} to the directory of the specified family
167    * @throws IOException if the directory creation fails.
168    */
169   Path createStoreDir(final String familyName) throws IOException {
170     Path storeDir = getStoreDir(familyName);
171     if(!fs.exists(storeDir) && !createDir(storeDir))
172       throw new IOException("Failed creating "+storeDir);
173     return storeDir;
174   }
175 
176   /**
177    * Returns the store files available for the family.
178    * This methods performs the filtering based on the valid store files.
179    * @param familyName Column Family Name
180    * @return a set of {@link StoreFileInfo} for the specified family.
181    */
182   public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
183     return getStoreFiles(Bytes.toString(familyName));
184   }
185 
186   public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
187     return getStoreFiles(familyName, true);
188   }
189 
190   /**
191    * Returns the store files available for the family.
192    * This methods performs the filtering based on the valid store files.
193    * @param familyName Column Family Name
194    * @return a set of {@link StoreFileInfo} for the specified family.
195    */
196   public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate)
197       throws IOException {
198     Path familyDir = getStoreDir(familyName);
199     FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
200     if (files == null) {
201       LOG.debug("No StoreFiles for: " + familyDir);
202       return null;
203     }
204 
205     ArrayList<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>(files.length);
206     for (FileStatus status: files) {
207       if (validate && !StoreFileInfo.isValid(status)) {
208         LOG.warn("Invalid StoreFile: " + status.getPath());
209         continue;
210       }
211       StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
212         regionInfoForFs, familyName, status.getPath());
213       storeFiles.add(info);
214 
215     }
216     return storeFiles;
217   }
218 
219   /**
220    * Return Qualified Path of the specified family/file
221    *
222    * @param familyName Column Family Name
223    * @param fileName File Name
224    * @return The qualified Path for the specified family/file
225    */
226   Path getStoreFilePath(final String familyName, final String fileName) {
227     Path familyDir = getStoreDir(familyName);
228     return new Path(familyDir, fileName).makeQualified(this.fs);
229   }
230 
231   /**
232    * Return the store file information of the specified family/file.
233    *
234    * @param familyName Column Family Name
235    * @param fileName File Name
236    * @return The {@link StoreFileInfo} for the specified family/file
237    */
238   StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
239       throws IOException {
240     Path familyDir = getStoreDir(familyName);
241     return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
242       regionInfoForFs, familyName, new Path(familyDir, fileName));
243   }
244 
245   /**
246    * Returns true if the specified family has reference files
247    * @param familyName Column Family Name
248    * @return true if family contains reference files
249    * @throws IOException
250    */
251   public boolean hasReferences(final String familyName) throws IOException {
252     FileStatus[] files = FSUtils.listStatus(fs, getStoreDir(familyName),
253         new FSUtils.ReferenceFileFilter(fs));
254     return files != null && files.length > 0;
255   }
256 
257   /**
258    * Check whether region has Reference file
259    * @param htd table desciptor of the region
260    * @return true if region has reference file
261    * @throws IOException
262    */
263   public boolean hasReferences(final HTableDescriptor htd) throws IOException {
264     for (HColumnDescriptor family : htd.getFamilies()) {
265       if (hasReferences(family.getNameAsString())) {
266         return true;
267       }
268     }
269     return false;
270   }
271 
272   /**
273    * @return the set of families present on disk
274    * @throws IOException
275    */
276   public Collection<String> getFamilies() throws IOException {
277     FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
278     if (fds == null) return null;
279 
280     ArrayList<String> families = new ArrayList<String>(fds.length);
281     for (FileStatus status: fds) {
282       families.add(status.getPath().getName());
283     }
284 
285     return families;
286   }
287 
288   /**
289    * Remove the region family from disk, archiving the store files.
290    * @param familyName Column Family Name
291    * @throws IOException if an error occours during the archiving
292    */
293   public void deleteFamily(final String familyName) throws IOException {
294     // archive family store files
295     HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName));
296 
297     // delete the family folder
298     Path familyDir = getStoreDir(familyName);
299     if(fs.exists(familyDir) && !deleteDir(familyDir))
300       throw new IOException("Could not delete family " + familyName
301           + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "("
302           + regionInfoForFs.getEncodedName() + ")");
303   }
304 
305   /**
306    * Generate a unique file name, used by createTempName() and commitStoreFile()
307    * @param suffix extra information to append to the generated name
308    * @return Unique file name
309    */
310   private static String generateUniqueName(final String suffix) {
311     String name = UUID.randomUUID().toString().replaceAll("-", "");
312     if (suffix != null) name += suffix;
313     return name;
314   }
315 
316   /**
317    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
318    * to get a safer file creation.
319    * <code>
320    * Path file = fs.createTempName();
321    * ...StoreFile.Writer(file)...
322    * fs.commitStoreFile("family", file);
323    * </code>
324    *
325    * @return Unique {@link Path} of the temporary file
326    */
327   public Path createTempName() {
328     return createTempName(null);
329   }
330 
331   /**
332    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
333    * to get a safer file creation.
334    * <code>
335    * Path file = fs.createTempName();
336    * ...StoreFile.Writer(file)...
337    * fs.commitStoreFile("family", file);
338    * </code>
339    *
340    * @param suffix extra information to append to the generated name
341    * @return Unique {@link Path} of the temporary file
342    */
343   public Path createTempName(final String suffix) {
344     return new Path(getTempDir(), generateUniqueName(suffix));
345   }
346 
347   /**
348    * Move the file from a build/temp location to the main family store directory.
349    * @param familyName Family that will gain the file
350    * @param buildPath {@link Path} to the file to commit.
351    * @return The new {@link Path} of the committed file
352    * @throws IOException
353    */
354   public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
355     return commitStoreFile(familyName, buildPath, -1, false);
356   }
357 
358   /**
359    * Move the file from a build/temp location to the main family store directory.
360    * @param familyName Family that will gain the file
361    * @param buildPath {@link Path} to the file to commit.
362    * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
363    * @param generateNewName False if you want to keep the buildPath name
364    * @return The new {@link Path} of the committed file
365    * @throws IOException
366    */
367   private Path commitStoreFile(final String familyName, final Path buildPath,
368       final long seqNum, final boolean generateNewName) throws IOException {
369     Path storeDir = getStoreDir(familyName);
370     if(!fs.exists(storeDir) && !createDir(storeDir))
371       throw new IOException("Failed creating " + storeDir);
372 
373     String name = buildPath.getName();
374     if (generateNewName) {
375       name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
376     }
377     Path dstPath = new Path(storeDir, name);
378     if (!fs.exists(buildPath)) {
379       throw new FileNotFoundException(buildPath.toString());
380     }
381     LOG.debug("Committing store file " + buildPath + " as " + dstPath);
382     // buildPath exists, therefore not doing an exists() check.
383     if (!rename(buildPath, dstPath)) {
384       throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
385     }
386     return dstPath;
387   }
388 
389 
390   /**
391    * Moves multiple store files to the relative region's family store directory.
392    * @param storeFiles list of store files divided by family
393    * @throws IOException
394    */
395   void commitStoreFiles(final Map<byte[], List<StoreFile>> storeFiles) throws IOException {
396     for (Map.Entry<byte[], List<StoreFile>> es: storeFiles.entrySet()) {
397       String familyName = Bytes.toString(es.getKey());
398       for (StoreFile sf: es.getValue()) {
399         commitStoreFile(familyName, sf.getPath());
400       }
401     }
402   }
403 
404   /**
405    * Archives the specified store file from the specified family.
406    * @param familyName Family that contains the store files
407    * @param filePath {@link Path} to the store file to remove
408    * @throws IOException if the archiving fails
409    */
410   public void removeStoreFile(final String familyName, final Path filePath)
411       throws IOException {
412     HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs,
413         this.tableDir, Bytes.toBytes(familyName), filePath);
414   }
415 
416   /**
417    * Closes and archives the specified store files from the specified family.
418    * @param familyName Family that contains the store files
419    * @param storeFiles set of store files to remove
420    * @throws IOException if the archiving fails
421    */
422   public void removeStoreFiles(final String familyName, final Collection<StoreFile> storeFiles)
423       throws IOException {
424     HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs,
425         this.tableDir, Bytes.toBytes(familyName), storeFiles);
426   }
427 
428   /**
429    * Bulk load: Add a specified store file to the specified family.
430    * If the source file is on the same different file-system is moved from the
431    * source location to the destination location, otherwise is copied over.
432    *
433    * @param familyName Family that will gain the file
434    * @param srcPath {@link Path} to the file to import
435    * @param seqNum Bulk Load sequence number
436    * @return The destination {@link Path} of the bulk loaded file
437    * @throws IOException
438    */
439   Path bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
440       throws IOException {
441     // Copy the file if it's on another filesystem
442     FileSystem srcFs = srcPath.getFileSystem(conf);
443     FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
444 
445     // We can't compare FileSystem instances as equals() includes UGI instance
446     // as part of the comparison and won't work when doing SecureBulkLoad
447     // TODO deal with viewFS
448     if (!FSHDFSUtils.isSameHdfs(conf, srcFs, desFs)) {
449       LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
450           "the destination store. Copying file over to destination filesystem.");
451       Path tmpPath = createTempName();
452       FileUtil.copy(srcFs, srcPath, fs, tmpPath, false, conf);
453       LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
454       srcPath = tmpPath;
455     }
456 
457     return commitStoreFile(familyName, srcPath, seqNum, true);
458   }
459 
460   // ===========================================================================
461   //  Splits Helpers
462   // ===========================================================================
463   /** @return {@link Path} to the temp directory used during split operations */
464   Path getSplitsDir() {
465     return new Path(getRegionDir(), REGION_SPLITS_DIR);
466   }
467 
468   Path getSplitsDir(final HRegionInfo hri) {
469     return new Path(getSplitsDir(), hri.getEncodedName());
470   }
471 
472   /**
473    * Clean up any split detritus that may have been left around from previous split attempts.
474    */
475   void cleanupSplitsDir() throws IOException {
476     deleteDir(getSplitsDir());
477   }
478 
479   /**
480    * Clean up any split detritus that may have been left around from previous
481    * split attempts.
482    * Call this method on initial region deploy.
483    * @throws IOException
484    */
485   void cleanupAnySplitDetritus() throws IOException {
486     Path splitdir = this.getSplitsDir();
487     if (!fs.exists(splitdir)) return;
488     // Look at the splitdir.  It could have the encoded names of the daughter
489     // regions we tried to make.  See if the daughter regions actually got made
490     // out under the tabledir.  If here under splitdir still, then the split did
491     // not complete.  Try and do cleanup.  This code WILL NOT catch the case
492     // where we successfully created daughter a but regionserver crashed during
493     // the creation of region b.  In this case, there'll be an orphan daughter
494     // dir in the filesystem.  TOOD: Fix.
495     FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
496     if (daughters != null) {
497       for (FileStatus daughter: daughters) {
498         Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
499         if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
500           throw new IOException("Failed delete of " + daughterDir);
501         }
502       }
503     }
504     cleanupSplitsDir();
505     LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
506   }
507 
508   /**
509    * Remove daughter region
510    * @param regionInfo daughter {@link HRegionInfo}
511    * @throws IOException
512    */
513   void cleanupDaughterRegion(final HRegionInfo regionInfo) throws IOException {
514     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
515     if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
516       throw new IOException("Failed delete of " + regionDir);
517     }
518   }
519 
520   /**
521    * Commit a daughter region, moving it from the split temporary directory
522    * to the proper location in the filesystem.
523    *
524    * @param regionInfo                 daughter {@link org.apache.hadoop.hbase.HRegionInfo}
525    * @throws IOException
526    */
527   Path commitDaughterRegion(final HRegionInfo regionInfo)
528       throws IOException {
529     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
530     Path daughterTmpDir = this.getSplitsDir(regionInfo);
531 
532     if (fs.exists(daughterTmpDir)) {
533 
534       // Write HRI to a file in case we need to recover hbase:meta
535       Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
536       byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
537       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
538 
539       // Move the daughter temp dir to the table dir
540       if (!rename(daughterTmpDir, regionDir)) {
541         throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
542       }
543     }
544 
545     return regionDir;
546   }
547 
548   /**
549    * Create the region splits directory.
550    */
551   void createSplitsDir() throws IOException {
552     Path splitdir = getSplitsDir();
553     if (fs.exists(splitdir)) {
554       LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
555       if (!deleteDir(splitdir)) {
556         throw new IOException("Failed deletion of " + splitdir
557             + " before creating them again.");
558       }
559     }
560     // splitDir doesn't exists now. No need to do an exists() call for it.
561     if (!createDir(splitdir)) {
562       throw new IOException("Failed create of " + splitdir);
563     }
564   }
565 
566   /**
567    * Write out a split reference. Package local so it doesnt leak out of
568    * regionserver.
569    * @param hri {@link HRegionInfo} of the destination
570    * @param familyName Column Family Name
571    * @param f File to split.
572    * @param splitRow Split Row
573    * @param top True if we are referring to the top half of the hfile.
574    * @param splitPolicy
575    * @return Path to created reference.
576    * @throws IOException
577    */
578   Path splitStoreFile(final HRegionInfo hri, final String familyName, final StoreFile f,
579       final byte[] splitRow, final boolean top, RegionSplitPolicy splitPolicy)
580           throws IOException {
581 
582     if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck()) {
583       // Check whether the split row lies in the range of the store file
584       // If it is outside the range, return directly.
585       if (top) {
586         //check if larger than last key.
587         KeyValue splitKey = KeyValueUtil.createFirstOnRow(splitRow);
588         byte[] lastKey = f.createReader().getLastKey();
589         // If lastKey is null means storefile is empty.
590         if (lastKey == null) return null;
591         if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(),
592           splitKey.getKeyOffset(), splitKey.getKeyLength(), lastKey, 0, lastKey.length) > 0) {
593           return null;
594         }
595       } else {
596         //check if smaller than first key
597         KeyValue splitKey = KeyValueUtil.createLastOnRow(splitRow);
598         byte[] firstKey = f.createReader().getFirstKey();
599         // If firstKey is null means storefile is empty.
600         if (firstKey == null) return null;
601         if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(),
602           splitKey.getKeyOffset(), splitKey.getKeyLength(), firstKey, 0, firstKey.length) < 0) {
603           return null;
604         }
605       }
606     }
607 
608     f.closeReader(true);
609 
610     Path splitDir = new Path(getSplitsDir(hri), familyName);
611     // A reference to the bottom half of the hsf store file.
612     Reference r =
613       top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
614     // Add the referred-to regions name as a dot separated suffix.
615     // See REF_NAME_REGEX regex above.  The referred-to regions name is
616     // up in the path of the passed in <code>f</code> -- parentdir is family,
617     // then the directory above is the region name.
618     String parentRegionName = regionInfoForFs.getEncodedName();
619     // Write reference with same file id only with the other region name as
620     // suffix and into the new region location (under same family).
621     Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
622     return r.write(fs, p);
623   }
624 
625   // ===========================================================================
626   //  Merge Helpers
627   // ===========================================================================
628   /** @return {@link Path} to the temp directory used during merge operations */
629   Path getMergesDir() {
630     return new Path(getRegionDir(), REGION_MERGES_DIR);
631   }
632 
633   Path getMergesDir(final HRegionInfo hri) {
634     return new Path(getMergesDir(), hri.getEncodedName());
635   }
636 
637   /**
638    * Clean up any merge detritus that may have been left around from previous merge attempts.
639    */
640   void cleanupMergesDir() throws IOException {
641     deleteDir(getMergesDir());
642   }
643 
644   /**
645    * Remove merged region
646    * @param mergedRegion {@link HRegionInfo}
647    * @throws IOException
648    */
649   void cleanupMergedRegion(final HRegionInfo mergedRegion) throws IOException {
650     Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
651     if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
652       throw new IOException("Failed delete of " + regionDir);
653     }
654   }
655 
656   /**
657    * Create the region merges directory.
658    * @throws IOException If merges dir already exists or we fail to create it.
659    * @see HRegionFileSystem#cleanupMergesDir()
660    */
661   void createMergesDir() throws IOException {
662     Path mergesdir = getMergesDir();
663     if (fs.exists(mergesdir)) {
664       LOG.info("The " + mergesdir
665           + " directory exists.  Hence deleting it to recreate it");
666       if (!fs.delete(mergesdir, true)) {
667         throw new IOException("Failed deletion of " + mergesdir
668             + " before creating them again.");
669       }
670     }
671     if (!fs.mkdirs(mergesdir))
672       throw new IOException("Failed create of " + mergesdir);
673   }
674 
675   /**
676    * Write out a merge reference under the given merges directory. Package local
677    * so it doesnt leak out of regionserver.
678    * @param mergedRegion {@link HRegionInfo} of the merged region
679    * @param familyName Column Family Name
680    * @param f File to create reference.
681    * @param mergedDir
682    * @return Path to created reference.
683    * @throws IOException
684    */
685   Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName,
686       final StoreFile f, final Path mergedDir)
687       throws IOException {
688     Path referenceDir = new Path(new Path(mergedDir,
689         mergedRegion.getEncodedName()), familyName);
690     // A whole reference to the store file.
691     Reference r = Reference.createTopReference(regionInfoForFs.getStartKey());
692     // Add the referred-to regions name as a dot separated suffix.
693     // See REF_NAME_REGEX regex above. The referred-to regions name is
694     // up in the path of the passed in <code>f</code> -- parentdir is family,
695     // then the directory above is the region name.
696     String mergingRegionName = regionInfoForFs.getEncodedName();
697     // Write reference with same file id only with the other region name as
698     // suffix and into the new region location (under same family).
699     Path p = new Path(referenceDir, f.getPath().getName() + "."
700         + mergingRegionName);
701     return r.write(fs, p);
702   }
703 
704   /**
705    * Commit a merged region, moving it from the merges temporary directory to
706    * the proper location in the filesystem.
707    * @param mergedRegionInfo merged region {@link HRegionInfo}
708    * @throws IOException
709    */
710   void commitMergedRegion(final HRegionInfo mergedRegionInfo) throws IOException {
711     Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
712     Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
713     // Move the tmp dir in the expected location
714     if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
715       if (!fs.rename(mergedRegionTmpDir, regionDir)) {
716         throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
717             + regionDir);
718       }
719     }
720   }
721 
722   // ===========================================================================
723   //  Create/Open/Delete Helpers
724   // ===========================================================================
725   /**
726    * Log the current state of the region
727    * @param LOG log to output information
728    * @throws IOException if an unexpected exception occurs
729    */
730   void logFileSystemState(final Log LOG) throws IOException {
731     FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
732   }
733 
734   /**
735    * @param hri
736    * @return Content of the file we write out to the filesystem under a region
737    * @throws IOException
738    */
739   private static byte[] getRegionInfoFileContent(final HRegionInfo hri) throws IOException {
740     return hri.toDelimitedByteArray();
741   }
742 
743   /**
744    * Create a {@link HRegionInfo} from the serialized version on-disk.
745    * @param fs {@link FileSystem} that contains the Region Info file
746    * @param regionDir {@link Path} to the Region Directory that contains the Info file
747    * @return An {@link HRegionInfo} instance gotten from the Region Info file.
748    * @throws IOException if an error occurred during file open/read operation.
749    */
750   public static HRegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
751       throws IOException {
752     FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
753     try {
754       return HRegionInfo.parseFrom(in);
755     } finally {
756       in.close();
757     }
758   }
759 
760   /**
761    * Write the .regioninfo file on-disk.
762    */
763   private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
764       final Path regionInfoFile, final byte[] content) throws IOException {
765     // First check to get the permissions
766     FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
767     // Write the RegionInfo file content
768     FSDataOutputStream out = FSUtils.create(fs, regionInfoFile, perms, null);
769     try {
770       out.write(content);
771     } finally {
772       out.close();
773     }
774   }
775 
776   /**
777    * Write out an info file under the stored region directory. Useful recovering mangled regions.
778    * If the regionInfo already exists on-disk, then we fast exit.
779    */
780   void checkRegionInfoOnFilesystem() throws IOException {
781     // Compose the content of the file so we can compare to length in filesystem. If not same,
782     // rewrite it (it may have been written in the old format using Writables instead of pb). The
783     // pb version is much shorter -- we write now w/o the toString version -- so checking length
784     // only should be sufficient. I don't want to read the file every time to check if it pb
785     // serialized.
786     byte[] content = getRegionInfoFileContent(regionInfoForFs);
787     try {
788       Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
789 
790       FileStatus status = fs.getFileStatus(regionInfoFile);
791       if (status != null && status.getLen() == content.length) {
792         // Then assume the content good and move on.
793         // NOTE: that the length is not sufficient to define the the content matches.
794         return;
795       }
796 
797       LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
798       if (!fs.delete(regionInfoFile, false)) {
799         throw new IOException("Unable to remove existing " + regionInfoFile);
800       }
801     } catch (FileNotFoundException e) {
802       LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() +
803           " on table " + regionInfo.getTable());
804     }
805 
806     // Write HRI to a file in case we need to recover hbase:meta
807     writeRegionInfoOnFilesystem(content, true);
808   }
809 
810   /**
811    * Write out an info file under the region directory. Useful recovering mangled regions.
812    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
813    */
814   private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
815     byte[] content = getRegionInfoFileContent(regionInfoForFs);
816     writeRegionInfoOnFilesystem(content, useTempDir);
817   }
818 
819   /**
820    * Write out an info file under the region directory. Useful recovering mangled regions.
821    * @param regionInfoContent serialized version of the {@link HRegionInfo}
822    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
823    */
824   private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
825       final boolean useTempDir) throws IOException {
826     Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
827     if (useTempDir) {
828       // Create in tmpDir and then move into place in case we crash after
829       // create but before close. If we don't successfully close the file,
830       // subsequent region reopens will fail the below because create is
831       // registered in NN.
832 
833       // And then create the file
834       Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
835 
836       // If datanode crashes or if the RS goes down just before the close is called while trying to
837       // close the created regioninfo file in the .tmp directory then on next
838       // creation we will be getting AlreadyCreatedException.
839       // Hence delete and create the file if exists.
840       if (FSUtils.isExists(fs, tmpPath)) {
841         FSUtils.delete(fs, tmpPath, true);
842       }
843 
844       // Write HRI to a file in case we need to recover hbase:meta
845       writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
846 
847       // Move the created file to the original path
848       if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
849         throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
850       }
851     } else {
852       // Write HRI to a file in case we need to recover hbase:meta
853       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
854     }
855   }
856 
857   /**
858    * Create a new Region on file-system.
859    * @param conf the {@link Configuration} to use
860    * @param fs {@link FileSystem} from which to add the region
861    * @param tableDir {@link Path} to where the table is being stored
862    * @param regionInfo {@link HRegionInfo} for region to be added
863    * @throws IOException if the region creation fails due to a FileSystem exception.
864    */
865   public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
866       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
867     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
868     Path regionDir = regionFs.getRegionDir();
869 
870     if (fs.exists(regionDir)) {
871       LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
872       throw new IOException("The specified region already exists on disk: " + regionDir);
873     }
874 
875     // Create the region directory
876     if (!createDirOnFileSystem(fs, conf, regionDir)) {
877       LOG.warn("Unable to create the region directory: " + regionDir);
878       throw new IOException("Unable to create region directory: " + regionDir);
879     }
880 
881     // Write HRI to a file in case we need to recover hbase:meta
882     regionFs.writeRegionInfoOnFilesystem(false);
883     return regionFs;
884   }
885 
886   /**
887    * Open Region from file-system.
888    * @param conf the {@link Configuration} to use
889    * @param fs {@link FileSystem} from which to add the region
890    * @param tableDir {@link Path} to where the table is being stored
891    * @param regionInfo {@link HRegionInfo} for region to be added
892    * @param readOnly True if you don't want to edit the region data
893    * @throws IOException if the region creation fails due to a FileSystem exception.
894    */
895   public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
896       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo, boolean readOnly)
897       throws IOException {
898     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
899     Path regionDir = regionFs.getRegionDir();
900 
901     if (!fs.exists(regionDir)) {
902       LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
903       throw new IOException("The specified region do not exists on disk: " + regionDir);
904     }
905 
906     if (!readOnly) {
907       // Cleanup temporary directories
908       regionFs.cleanupTempDir();
909       regionFs.cleanupSplitsDir();
910       regionFs.cleanupMergesDir();
911 
912       // if it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
913       regionFs.checkRegionInfoOnFilesystem();
914     }
915 
916     return regionFs;
917   }
918 
919   /**
920    * Remove the region from the table directory, archiving the region's hfiles.
921    * @param conf the {@link Configuration} to use
922    * @param fs {@link FileSystem} from which to remove the region
923    * @param tableDir {@link Path} to where the table is being stored
924    * @param regionInfo {@link HRegionInfo} for region to be deleted
925    * @throws IOException if the request cannot be completed
926    */
927   public static void deleteRegionFromFileSystem(final Configuration conf,
928       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
929     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
930     Path regionDir = regionFs.getRegionDir();
931 
932     if (!fs.exists(regionDir)) {
933       LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
934       return;
935     }
936 
937     if (LOG.isDebugEnabled()) {
938       LOG.debug("DELETING region " + regionDir);
939     }
940 
941     // Archive region
942     Path rootDir = FSUtils.getRootDir(conf);
943     HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
944 
945     // Delete empty region dir
946     if (!fs.delete(regionDir, true)) {
947       LOG.warn("Failed delete of " + regionDir);
948     }
949   }
950 
951   /**
952    * Creates a directory. Assumes the user has already checked for this directory existence.
953    * @param dir
954    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
955    *         whether the directory exists or not, and returns true if it exists.
956    * @throws IOException
957    */
958   boolean createDir(Path dir) throws IOException {
959     int i = 0;
960     IOException lastIOE = null;
961     do {
962       try {
963         return fs.mkdirs(dir);
964       } catch (IOException ioe) {
965         lastIOE = ioe;
966         if (fs.exists(dir)) return true; // directory is present
967         try {
968           sleepBeforeRetry("Create Directory", i+1);
969         } catch (InterruptedException e) {
970           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
971         }
972       }
973     } while (++i <= hdfsClientRetriesNumber);
974     throw new IOException("Exception in createDir", lastIOE);
975   }
976 
977   /**
978    * Renames a directory. Assumes the user has already checked for this directory existence.
979    * @param srcpath
980    * @param dstPath
981    * @return true if rename is successful.
982    * @throws IOException
983    */
984   boolean rename(Path srcpath, Path dstPath) throws IOException {
985     IOException lastIOE = null;
986     int i = 0;
987     do {
988       try {
989         return fs.rename(srcpath, dstPath);
990       } catch (IOException ioe) {
991         lastIOE = ioe;
992         if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
993         // dir is not there, retry after some time.
994         try {
995           sleepBeforeRetry("Rename Directory", i+1);
996         } catch (InterruptedException e) {
997           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
998         }
999       }
1000     } while (++i <= hdfsClientRetriesNumber);
1001 
1002     throw new IOException("Exception in rename", lastIOE);
1003   }
1004 
1005   /**
1006    * Deletes a directory. Assumes the user has already checked for this directory existence.
1007    * @param dir
1008    * @return true if the directory is deleted.
1009    * @throws IOException
1010    */
1011   boolean deleteDir(Path dir) throws IOException {
1012     IOException lastIOE = null;
1013     int i = 0;
1014     do {
1015       try {
1016         return fs.delete(dir, true);
1017       } catch (IOException ioe) {
1018         lastIOE = ioe;
1019         if (!fs.exists(dir)) return true;
1020         // dir is there, retry deleting after some time.
1021         try {
1022           sleepBeforeRetry("Delete Directory", i+1);
1023         } catch (InterruptedException e) {
1024           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1025         }
1026       }
1027     } while (++i <= hdfsClientRetriesNumber);
1028 
1029     throw new IOException("Exception in DeleteDir", lastIOE);
1030   }
1031 
1032   /**
1033    * sleeping logic; handles the interrupt exception.
1034    */
1035   private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
1036     sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1037   }
1038 
1039   /**
1040    * Creates a directory for a filesystem and configuration object. Assumes the user has already
1041    * checked for this directory existence.
1042    * @param fs
1043    * @param conf
1044    * @param dir
1045    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1046    *         whether the directory exists or not, and returns true if it exists.
1047    * @throws IOException
1048    */
1049   private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1050       throws IOException {
1051     int i = 0;
1052     IOException lastIOE = null;
1053     int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1054       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1055     int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1056       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1057     do {
1058       try {
1059         return fs.mkdirs(dir);
1060       } catch (IOException ioe) {
1061         lastIOE = ioe;
1062         if (fs.exists(dir)) return true; // directory is present
1063         try {
1064           sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1065         } catch (InterruptedException e) {
1066           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1067         }
1068       }
1069     } while (++i <= hdfsClientRetriesNumber);
1070 
1071     throw new IOException("Exception in createDir", lastIOE);
1072   }
1073 
1074   /**
1075    * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1076    * for this to avoid re-looking for the integer values.
1077    */
1078   private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1079       int hdfsClientRetriesNumber) throws InterruptedException {
1080     if (sleepMultiplier > hdfsClientRetriesNumber) {
1081       LOG.debug(msg + ", retries exhausted");
1082       return;
1083     }
1084     LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1085     Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1086   }
1087 }