View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.io.InterruptedIOException;
25  import java.util.ArrayList;
26  import java.util.Collection;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.UUID;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.FileUtil;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.fs.permission.FsPermission;
42  import org.apache.hadoop.hbase.HColumnDescriptor;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HRegionInfo;
45  import org.apache.hadoop.hbase.HTableDescriptor;
46  import org.apache.hadoop.hbase.KeyValue;
47  import org.apache.hadoop.hbase.KeyValueUtil;
48  import org.apache.hadoop.hbase.backup.HFileArchiver;
49  import org.apache.hadoop.hbase.fs.HFileSystem;
50  import org.apache.hadoop.hbase.io.Reference;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.FSHDFSUtils;
53  import org.apache.hadoop.hbase.util.FSUtils;
54  import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
55  
56  /**
57   * View to an on-disk Region.
58   * Provides the set of methods necessary to interact with the on-disk region data.
59   */
60  @InterfaceAudience.Private
61  public class HRegionFileSystem {
62    private static final Log LOG = LogFactory.getLog(HRegionFileSystem.class);
63  
64    /** Name of the region info file that resides just under the region directory. */
65    public final static String REGION_INFO_FILE = ".regioninfo";
66  
67    /** Temporary subdirectory of the region directory used for merges. */
68    public static final String REGION_MERGES_DIR = ".merges";
69  
70    /** Temporary subdirectory of the region directory used for splits. */
71    public static final String REGION_SPLITS_DIR = ".splits";
72  
73    /** Temporary subdirectory of the region directory used for compaction output. */
74    private static final String REGION_TEMP_DIR = ".tmp";
75  
76    private final HRegionInfo regionInfo;
77    //regionInfo for interacting with FS (getting encodedName, etc)
78    private final HRegionInfo regionInfoForFs;
79    private final Configuration conf;
80    private final Path tableDir;
81    private final FileSystem fs;
82  
83    /**
84     * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
85     * client level.
86     */
87    private final int hdfsClientRetriesNumber;
88    private final int baseSleepBeforeRetries;
89    private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
90    private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
91  
92    /**
93     * Create a view to the on-disk region
94     * @param conf the {@link Configuration} to use
95     * @param fs {@link FileSystem} that contains the region
96     * @param tableDir {@link Path} to where the table is being stored
97     * @param regionInfo {@link HRegionInfo} for region
98     */
99    HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
100       final HRegionInfo regionInfo) {
101     this.fs = fs;
102     this.conf = conf;
103     this.tableDir = tableDir;
104     this.regionInfo = regionInfo;
105     this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo);
106     this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
107       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
108     this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
109       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
110  }
111 
112   /** @return the underlying {@link FileSystem} */
113   public FileSystem getFileSystem() {
114     return this.fs;
115   }
116 
117   /** @return the {@link HRegionInfo} that describe this on-disk region view */
118   public HRegionInfo getRegionInfo() {
119     return this.regionInfo;
120   }
121 
122   public HRegionInfo getRegionInfoForFS() {
123     return this.regionInfoForFs;
124   }
125 
126   /** @return {@link Path} to the region's root directory. */
127   public Path getTableDir() {
128     return this.tableDir;
129   }
130 
131   /** @return {@link Path} to the region directory. */
132   public Path getRegionDir() {
133     return new Path(this.tableDir, this.regionInfoForFs.getEncodedName());
134   }
135 
136   // ===========================================================================
137   //  Temp Helpers
138   // ===========================================================================
139   /** @return {@link Path} to the region's temp directory, used for file creations */
140   Path getTempDir() {
141     return new Path(getRegionDir(), REGION_TEMP_DIR);
142   }
143 
144   /**
145    * Clean up any temp detritus that may have been left around from previous operation attempts.
146    */
147   void cleanupTempDir() throws IOException {
148     deleteDir(getTempDir());
149   }
150 
151   // ===========================================================================
152   //  Store/StoreFile Helpers
153   // ===========================================================================
154   /**
155    * Returns the directory path of the specified family
156    * @param familyName Column Family Name
157    * @return {@link Path} to the directory of the specified family
158    */
159   public Path getStoreDir(final String familyName) {
160     return new Path(this.getRegionDir(), familyName);
161   }
162 
163   /**
164    * Create the store directory for the specified family name
165    * @param familyName Column Family Name
166    * @return {@link Path} to the directory of the specified family
167    * @throws IOException if the directory creation fails.
168    */
169   Path createStoreDir(final String familyName) throws IOException {
170     Path storeDir = getStoreDir(familyName);
171     if(!fs.exists(storeDir) && !createDir(storeDir))
172       throw new IOException("Failed creating "+storeDir);
173     return storeDir;
174   }
175 
176   /**
177    * Returns the store files available for the family.
178    * This methods performs the filtering based on the valid store files.
179    * @param familyName Column Family Name
180    * @return a set of {@link StoreFileInfo} for the specified family.
181    */
182   public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
183     return getStoreFiles(Bytes.toString(familyName));
184   }
185 
186   public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
187     return getStoreFiles(familyName, true);
188   }
189 
190   /**
191    * Returns the store files available for the family.
192    * This methods performs the filtering based on the valid store files.
193    * @param familyName Column Family Name
194    * @return a set of {@link StoreFileInfo} for the specified family.
195    */
196   public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate)
197       throws IOException {
198     Path familyDir = getStoreDir(familyName);
199     FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
200     if (files == null) {
201       LOG.debug("No StoreFiles for: " + familyDir);
202       return null;
203     }
204 
205     ArrayList<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>(files.length);
206     for (FileStatus status: files) {
207       if (validate && !StoreFileInfo.isValid(status)) {
208         LOG.warn("Invalid StoreFile: " + status.getPath());
209         continue;
210       }
211       StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
212         regionInfoForFs, familyName, status.getPath());
213       storeFiles.add(info);
214 
215     }
216     return storeFiles;
217   }
218 
219   /**
220    * Return Qualified Path of the specified family/file
221    *
222    * @param familyName Column Family Name
223    * @param fileName File Name
224    * @return The qualified Path for the specified family/file
225    */
226   Path getStoreFilePath(final String familyName, final String fileName) {
227     Path familyDir = getStoreDir(familyName);
228     return new Path(familyDir, fileName).makeQualified(this.fs);
229   }
230 
231   /**
232    * Return the store file information of the specified family/file.
233    *
234    * @param familyName Column Family Name
235    * @param fileName File Name
236    * @return The {@link StoreFileInfo} for the specified family/file
237    */
238   StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
239       throws IOException {
240     Path familyDir = getStoreDir(familyName);
241     return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
242       regionInfoForFs, familyName, new Path(familyDir, fileName));
243   }
244 
245   /**
246    * Returns true if the specified family has reference files
247    * @param familyName Column Family Name
248    * @return true if family contains reference files
249    * @throws IOException
250    */
251   public boolean hasReferences(final String familyName) throws IOException {
252     FileStatus[] files = FSUtils.listStatus(fs, getStoreDir(familyName));
253     if (files != null) {
254       for(FileStatus stat: files) {
255         if(stat.isDirectory()) {
256           continue;
257         }
258         if(StoreFileInfo.isReference(stat.getPath())) {
259           return true;
260         }
261       }
262     }
263     return false;
264   }
265 
266   /**
267    * Check whether region has Reference file
268    * @param htd table desciptor of the region
269    * @return true if region has reference file
270    * @throws IOException
271    */
272   public boolean hasReferences(final HTableDescriptor htd) throws IOException {
273     for (HColumnDescriptor family : htd.getFamilies()) {
274       if (hasReferences(family.getNameAsString())) {
275         return true;
276       }
277     }
278     return false;
279   }
280 
281   /**
282    * @return the set of families present on disk
283    * @throws IOException
284    */
285   public Collection<String> getFamilies() throws IOException {
286     FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
287     if (fds == null) return null;
288 
289     ArrayList<String> families = new ArrayList<String>(fds.length);
290     for (FileStatus status: fds) {
291       families.add(status.getPath().getName());
292     }
293 
294     return families;
295   }
296 
297   /**
298    * Remove the region family from disk, archiving the store files.
299    * @param familyName Column Family Name
300    * @throws IOException if an error occours during the archiving
301    */
302   public void deleteFamily(final String familyName) throws IOException {
303     // archive family store files
304     HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName));
305 
306     // delete the family folder
307     Path familyDir = getStoreDir(familyName);
308     if(fs.exists(familyDir) && !deleteDir(familyDir))
309       throw new IOException("Could not delete family " + familyName
310           + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "("
311           + regionInfoForFs.getEncodedName() + ")");
312   }
313 
314   /**
315    * Generate a unique file name, used by createTempName() and commitStoreFile()
316    * @param suffix extra information to append to the generated name
317    * @return Unique file name
318    */
319   private static String generateUniqueName(final String suffix) {
320     String name = UUID.randomUUID().toString().replaceAll("-", "");
321     if (suffix != null) name += suffix;
322     return name;
323   }
324 
325   /**
326    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
327    * to get a safer file creation.
328    * <code>
329    * Path file = fs.createTempName();
330    * ...StoreFile.Writer(file)...
331    * fs.commitStoreFile("family", file);
332    * </code>
333    *
334    * @return Unique {@link Path} of the temporary file
335    */
336   public Path createTempName() {
337     return createTempName(null);
338   }
339 
340   /**
341    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
342    * to get a safer file creation.
343    * <code>
344    * Path file = fs.createTempName();
345    * ...StoreFile.Writer(file)...
346    * fs.commitStoreFile("family", file);
347    * </code>
348    *
349    * @param suffix extra information to append to the generated name
350    * @return Unique {@link Path} of the temporary file
351    */
352   public Path createTempName(final String suffix) {
353     return new Path(getTempDir(), generateUniqueName(suffix));
354   }
355 
356   /**
357    * Move the file from a build/temp location to the main family store directory.
358    * @param familyName Family that will gain the file
359    * @param buildPath {@link Path} to the file to commit.
360    * @return The new {@link Path} of the committed file
361    * @throws IOException
362    */
363   public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
364     return commitStoreFile(familyName, buildPath, -1, false);
365   }
366 
367   /**
368    * Move the file from a build/temp location to the main family store directory.
369    * @param familyName Family that will gain the file
370    * @param buildPath {@link Path} to the file to commit.
371    * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
372    * @param generateNewName False if you want to keep the buildPath name
373    * @return The new {@link Path} of the committed file
374    * @throws IOException
375    */
376   private Path commitStoreFile(final String familyName, final Path buildPath,
377       final long seqNum, final boolean generateNewName) throws IOException {
378     Path storeDir = getStoreDir(familyName);
379     if(!fs.exists(storeDir) && !createDir(storeDir))
380       throw new IOException("Failed creating " + storeDir);
381 
382     String name = buildPath.getName();
383     if (generateNewName) {
384       name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
385     }
386     Path dstPath = new Path(storeDir, name);
387     if (!fs.exists(buildPath)) {
388       throw new FileNotFoundException(buildPath.toString());
389     }
390     LOG.debug("Committing store file " + buildPath + " as " + dstPath);
391     // buildPath exists, therefore not doing an exists() check.
392     if (!rename(buildPath, dstPath)) {
393       throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
394     }
395     return dstPath;
396   }
397 
398 
399   /**
400    * Moves multiple store files to the relative region's family store directory.
401    * @param storeFiles list of store files divided by family
402    * @throws IOException
403    */
404   void commitStoreFiles(final Map<byte[], List<StoreFile>> storeFiles) throws IOException {
405     for (Map.Entry<byte[], List<StoreFile>> es: storeFiles.entrySet()) {
406       String familyName = Bytes.toString(es.getKey());
407       for (StoreFile sf: es.getValue()) {
408         commitStoreFile(familyName, sf.getPath());
409       }
410     }
411   }
412 
413   /**
414    * Archives the specified store file from the specified family.
415    * @param familyName Family that contains the store files
416    * @param filePath {@link Path} to the store file to remove
417    * @throws IOException if the archiving fails
418    */
419   public void removeStoreFile(final String familyName, final Path filePath)
420       throws IOException {
421     HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs,
422         this.tableDir, Bytes.toBytes(familyName), filePath);
423   }
424 
425   /**
426    * Closes and archives the specified store files from the specified family.
427    * @param familyName Family that contains the store files
428    * @param storeFiles set of store files to remove
429    * @throws IOException if the archiving fails
430    */
431   public void removeStoreFiles(final String familyName, final Collection<StoreFile> storeFiles)
432       throws IOException {
433     HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs,
434         this.tableDir, Bytes.toBytes(familyName), storeFiles);
435   }
436 
437   /**
438    * Bulk load: Add a specified store file to the specified family.
439    * If the source file is on the same different file-system is moved from the
440    * source location to the destination location, otherwise is copied over.
441    *
442    * @param familyName Family that will gain the file
443    * @param srcPath {@link Path} to the file to import
444    * @param seqNum Bulk Load sequence number
445    * @return The destination {@link Path} of the bulk loaded file
446    * @throws IOException
447    */
448   Path bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
449       throws IOException {
450     // Copy the file if it's on another filesystem
451     FileSystem srcFs = srcPath.getFileSystem(conf);
452     FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
453 
454     // We can't compare FileSystem instances as equals() includes UGI instance
455     // as part of the comparison and won't work when doing SecureBulkLoad
456     // TODO deal with viewFS
457     if (!FSHDFSUtils.isSameHdfs(conf, srcFs, desFs)) {
458       LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
459           "the destination store. Copying file over to destination filesystem.");
460       Path tmpPath = createTempName();
461       FileUtil.copy(srcFs, srcPath, fs, tmpPath, false, conf);
462       LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
463       srcPath = tmpPath;
464     }
465 
466     return commitStoreFile(familyName, srcPath, seqNum, true);
467   }
468 
469   // ===========================================================================
470   //  Splits Helpers
471   // ===========================================================================
472   /** @return {@link Path} to the temp directory used during split operations */
473   Path getSplitsDir() {
474     return new Path(getRegionDir(), REGION_SPLITS_DIR);
475   }
476 
477   Path getSplitsDir(final HRegionInfo hri) {
478     return new Path(getSplitsDir(), hri.getEncodedName());
479   }
480 
481   /**
482    * Clean up any split detritus that may have been left around from previous split attempts.
483    */
484   void cleanupSplitsDir() throws IOException {
485     deleteDir(getSplitsDir());
486   }
487 
488   /**
489    * Clean up any split detritus that may have been left around from previous
490    * split attempts.
491    * Call this method on initial region deploy.
492    * @throws IOException
493    */
494   void cleanupAnySplitDetritus() throws IOException {
495     Path splitdir = this.getSplitsDir();
496     if (!fs.exists(splitdir)) return;
497     // Look at the splitdir.  It could have the encoded names of the daughter
498     // regions we tried to make.  See if the daughter regions actually got made
499     // out under the tabledir.  If here under splitdir still, then the split did
500     // not complete.  Try and do cleanup.  This code WILL NOT catch the case
501     // where we successfully created daughter a but regionserver crashed during
502     // the creation of region b.  In this case, there'll be an orphan daughter
503     // dir in the filesystem.  TOOD: Fix.
504     FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
505     if (daughters != null) {
506       for (FileStatus daughter: daughters) {
507         Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
508         if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
509           throw new IOException("Failed delete of " + daughterDir);
510         }
511       }
512     }
513     cleanupSplitsDir();
514     LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
515   }
516 
517   /**
518    * Remove daughter region
519    * @param regionInfo daughter {@link HRegionInfo}
520    * @throws IOException
521    */
522   void cleanupDaughterRegion(final HRegionInfo regionInfo) throws IOException {
523     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
524     if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
525       throw new IOException("Failed delete of " + regionDir);
526     }
527   }
528 
529   /**
530    * Commit a daughter region, moving it from the split temporary directory
531    * to the proper location in the filesystem.
532    *
533    * @param regionInfo                 daughter {@link org.apache.hadoop.hbase.HRegionInfo}
534    * @throws IOException
535    */
536   Path commitDaughterRegion(final HRegionInfo regionInfo)
537       throws IOException {
538     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
539     Path daughterTmpDir = this.getSplitsDir(regionInfo);
540 
541     if (fs.exists(daughterTmpDir)) {
542 
543       // Write HRI to a file in case we need to recover hbase:meta
544       Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
545       byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
546       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
547 
548       // Move the daughter temp dir to the table dir
549       if (!rename(daughterTmpDir, regionDir)) {
550         throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
551       }
552     }
553 
554     return regionDir;
555   }
556 
557   /**
558    * Create the region splits directory.
559    */
560   void createSplitsDir() throws IOException {
561     Path splitdir = getSplitsDir();
562     if (fs.exists(splitdir)) {
563       LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
564       if (!deleteDir(splitdir)) {
565         throw new IOException("Failed deletion of " + splitdir
566             + " before creating them again.");
567       }
568     }
569     // splitDir doesn't exists now. No need to do an exists() call for it.
570     if (!createDir(splitdir)) {
571       throw new IOException("Failed create of " + splitdir);
572     }
573   }
574 
575   /**
576    * Write out a split reference. Package local so it doesnt leak out of
577    * regionserver.
578    * @param hri {@link HRegionInfo} of the destination
579    * @param familyName Column Family Name
580    * @param f File to split.
581    * @param splitRow Split Row
582    * @param top True if we are referring to the top half of the hfile.
583    * @return Path to created reference.
584    * @param splitPolicy
585    * @throws IOException
586    */
587   Path splitStoreFile(final HRegionInfo hri, final String familyName, final StoreFile f,
588       final byte[] splitRow, final boolean top, RegionSplitPolicy splitPolicy) throws IOException {
589 
590     if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) {
591       // Check whether the split row lies in the range of the store file
592       // If it is outside the range, return directly.
593       try {
594         if (top) {
595           //check if larger than last key.
596           KeyValue splitKey = KeyValueUtil.createFirstOnRow(splitRow);
597           byte[] lastKey = f.getLastKey();
598           // If lastKey is null means storefile is empty.
599           if (lastKey == null) {
600             return null;
601           }
602           if (f.getComparator().compareFlatKey(splitKey.getBuffer(),
603             splitKey.getKeyOffset(), splitKey.getKeyLength(), lastKey, 0, lastKey.length) > 0) {
604             return null;
605           }
606         } else {
607           //check if smaller than first key
608           KeyValue splitKey = KeyValueUtil.createLastOnRow(splitRow);
609           byte[] firstKey = f.getFirstKey();
610           // If firstKey is null means storefile is empty.
611           if (firstKey == null) {
612             return null;
613           }
614           if (f.getComparator().compareFlatKey(splitKey.getBuffer(),
615             splitKey.getKeyOffset(), splitKey.getKeyLength(), firstKey, 0, firstKey.length) < 0) {
616             return null;
617           }
618         }
619       } finally {
620         f.closeReader(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true);
621       }
622     }
623 
624     Path splitDir = new Path(getSplitsDir(hri), familyName);
625     // A reference to the bottom half of the hsf store file.
626     Reference r =
627       top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
628     // Add the referred-to regions name as a dot separated suffix.
629     // See REF_NAME_REGEX regex above.  The referred-to regions name is
630     // up in the path of the passed in <code>f</code> -- parentdir is family,
631     // then the directory above is the region name.
632     String parentRegionName = regionInfoForFs.getEncodedName();
633     // Write reference with same file id only with the other region name as
634     // suffix and into the new region location (under same family).
635     Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
636     return r.write(fs, p);
637   }
638 
639   // ===========================================================================
640   //  Merge Helpers
641   // ===========================================================================
642   /** @return {@link Path} to the temp directory used during merge operations */
643   Path getMergesDir() {
644     return new Path(getRegionDir(), REGION_MERGES_DIR);
645   }
646 
647   Path getMergesDir(final HRegionInfo hri) {
648     return new Path(getMergesDir(), hri.getEncodedName());
649   }
650 
651   /**
652    * Clean up any merge detritus that may have been left around from previous merge attempts.
653    */
654   void cleanupMergesDir() throws IOException {
655     deleteDir(getMergesDir());
656   }
657 
658   /**
659    * Remove merged region
660    * @param mergedRegion {@link HRegionInfo}
661    * @throws IOException
662    */
663   void cleanupMergedRegion(final HRegionInfo mergedRegion) throws IOException {
664     Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
665     if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
666       throw new IOException("Failed delete of " + regionDir);
667     }
668   }
669 
670   /**
671    * Create the region merges directory.
672    * @throws IOException If merges dir already exists or we fail to create it.
673    * @see HRegionFileSystem#cleanupMergesDir()
674    */
675   void createMergesDir() throws IOException {
676     Path mergesdir = getMergesDir();
677     if (fs.exists(mergesdir)) {
678       LOG.info("The " + mergesdir
679           + " directory exists.  Hence deleting it to recreate it");
680       if (!fs.delete(mergesdir, true)) {
681         throw new IOException("Failed deletion of " + mergesdir
682             + " before creating them again.");
683       }
684     }
685     if (!fs.mkdirs(mergesdir))
686       throw new IOException("Failed create of " + mergesdir);
687   }
688 
689   /**
690    * Write out a merge reference under the given merges directory. Package local
691    * so it doesnt leak out of regionserver.
692    * @param mergedRegion {@link HRegionInfo} of the merged region
693    * @param familyName Column Family Name
694    * @param f File to create reference.
695    * @param mergedDir
696    * @return Path to created reference.
697    * @throws IOException
698    */
699   Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName,
700       final StoreFile f, final Path mergedDir)
701       throws IOException {
702     Path referenceDir = new Path(new Path(mergedDir,
703         mergedRegion.getEncodedName()), familyName);
704     // A whole reference to the store file.
705     Reference r = Reference.createTopReference(regionInfoForFs.getStartKey());
706     // Add the referred-to regions name as a dot separated suffix.
707     // See REF_NAME_REGEX regex above. The referred-to regions name is
708     // up in the path of the passed in <code>f</code> -- parentdir is family,
709     // then the directory above is the region name.
710     String mergingRegionName = regionInfoForFs.getEncodedName();
711     // Write reference with same file id only with the other region name as
712     // suffix and into the new region location (under same family).
713     Path p = new Path(referenceDir, f.getPath().getName() + "."
714         + mergingRegionName);
715     return r.write(fs, p);
716   }
717 
718   /**
719    * Commit a merged region, moving it from the merges temporary directory to
720    * the proper location in the filesystem.
721    * @param mergedRegionInfo merged region {@link HRegionInfo}
722    * @throws IOException
723    */
724   void commitMergedRegion(final HRegionInfo mergedRegionInfo) throws IOException {
725     Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
726     Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
727     // Move the tmp dir in the expected location
728     if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
729       if (!fs.rename(mergedRegionTmpDir, regionDir)) {
730         throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
731             + regionDir);
732       }
733     }
734   }
735 
736   // ===========================================================================
737   //  Create/Open/Delete Helpers
738   // ===========================================================================
739   /**
740    * Log the current state of the region
741    * @param LOG log to output information
742    * @throws IOException if an unexpected exception occurs
743    */
744   void logFileSystemState(final Log LOG) throws IOException {
745     FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
746   }
747 
748   /**
749    * @param hri
750    * @return Content of the file we write out to the filesystem under a region
751    * @throws IOException
752    */
753   private static byte[] getRegionInfoFileContent(final HRegionInfo hri) throws IOException {
754     return hri.toDelimitedByteArray();
755   }
756 
757   /**
758    * Create a {@link HRegionInfo} from the serialized version on-disk.
759    * @param fs {@link FileSystem} that contains the Region Info file
760    * @param regionDir {@link Path} to the Region Directory that contains the Info file
761    * @return An {@link HRegionInfo} instance gotten from the Region Info file.
762    * @throws IOException if an error occurred during file open/read operation.
763    */
764   public static HRegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
765       throws IOException {
766     FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
767     try {
768       return HRegionInfo.parseFrom(in);
769     } finally {
770       in.close();
771     }
772   }
773 
774   /**
775    * Write the .regioninfo file on-disk.
776    */
777   private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
778       final Path regionInfoFile, final byte[] content) throws IOException {
779     // First check to get the permissions
780     FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
781     // Write the RegionInfo file content
782     FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null);
783     try {
784       out.write(content);
785     } finally {
786       out.close();
787     }
788   }
789 
790   /**
791    * Write out an info file under the stored region directory. Useful recovering mangled regions.
792    * If the regionInfo already exists on-disk, then we fast exit.
793    */
794   void checkRegionInfoOnFilesystem() throws IOException {
795     // Compose the content of the file so we can compare to length in filesystem. If not same,
796     // rewrite it (it may have been written in the old format using Writables instead of pb). The
797     // pb version is much shorter -- we write now w/o the toString version -- so checking length
798     // only should be sufficient. I don't want to read the file every time to check if it pb
799     // serialized.
800     byte[] content = getRegionInfoFileContent(regionInfoForFs);
801 
802     // Verify if the region directory exists before opening a region. We need to do this since if
803     // the region directory doesn't exist we will re-create the region directory and a new HRI
804     // when HRegion.openHRegion() is called.
805     try {
806       FileStatus status = fs.getFileStatus(getRegionDir());
807     } catch (FileNotFoundException e) {
808       LOG.warn(getRegionDir() + " doesn't exist for region: " + regionInfoForFs.getEncodedName() +
809           " on table " + regionInfo.getTable());
810     }
811 
812     try {
813       Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
814       FileStatus status = fs.getFileStatus(regionInfoFile);
815       if (status != null && status.getLen() == content.length) {
816         // Then assume the content good and move on.
817         // NOTE: that the length is not sufficient to define the the content matches.
818         return;
819       }
820 
821       LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
822       if (!fs.delete(regionInfoFile, false)) {
823         throw new IOException("Unable to remove existing " + regionInfoFile);
824       }
825     } catch (FileNotFoundException e) {
826       LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() +
827           " on table " + regionInfo.getTable());
828     }
829 
830     // Write HRI to a file in case we need to recover hbase:meta
831     writeRegionInfoOnFilesystem(content, true);
832   }
833 
834   /**
835    * Write out an info file under the region directory. Useful recovering mangled regions.
836    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
837    */
838   private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
839     byte[] content = getRegionInfoFileContent(regionInfoForFs);
840     writeRegionInfoOnFilesystem(content, useTempDir);
841   }
842 
843   /**
844    * Write out an info file under the region directory. Useful recovering mangled regions.
845    * @param regionInfoContent serialized version of the {@link HRegionInfo}
846    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
847    */
848   private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
849       final boolean useTempDir) throws IOException {
850     Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
851     if (useTempDir) {
852       // Create in tmpDir and then move into place in case we crash after
853       // create but before close. If we don't successfully close the file,
854       // subsequent region reopens will fail the below because create is
855       // registered in NN.
856 
857       // And then create the file
858       Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
859 
860       // If datanode crashes or if the RS goes down just before the close is called while trying to
861       // close the created regioninfo file in the .tmp directory then on next
862       // creation we will be getting AlreadyCreatedException.
863       // Hence delete and create the file if exists.
864       if (FSUtils.isExists(fs, tmpPath)) {
865         FSUtils.delete(fs, tmpPath, true);
866       }
867 
868       // Write HRI to a file in case we need to recover hbase:meta
869       writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
870 
871       // Move the created file to the original path
872       if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
873         throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
874       }
875     } else {
876       // Write HRI to a file in case we need to recover hbase:meta
877       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
878     }
879   }
880 
881   /**
882    * Create a new Region on file-system.
883    * @param conf the {@link Configuration} to use
884    * @param fs {@link FileSystem} from which to add the region
885    * @param tableDir {@link Path} to where the table is being stored
886    * @param regionInfo {@link HRegionInfo} for region to be added
887    * @throws IOException if the region creation fails due to a FileSystem exception.
888    */
889   public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
890       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
891     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
892     Path regionDir = regionFs.getRegionDir();
893 
894     if (fs.exists(regionDir)) {
895       LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
896       throw new IOException("The specified region already exists on disk: " + regionDir);
897     }
898 
899     // Create the region directory
900     if (!createDirOnFileSystem(fs, conf, regionDir)) {
901       LOG.warn("Unable to create the region directory: " + regionDir);
902       throw new IOException("Unable to create region directory: " + regionDir);
903     }
904 
905     // Write HRI to a file in case we need to recover hbase:meta
906     // Only primary replicas should write region info
907     if (regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
908       regionFs.writeRegionInfoOnFilesystem(false);
909     } else {
910       if (LOG.isDebugEnabled())
911         LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
912     }
913     return regionFs;
914   }
915 
916   /**
917    * Open Region from file-system.
918    * @param conf the {@link Configuration} to use
919    * @param fs {@link FileSystem} from which to add the region
920    * @param tableDir {@link Path} to where the table is being stored
921    * @param regionInfo {@link HRegionInfo} for region to be added
922    * @param readOnly True if you don't want to edit the region data
923    * @throws IOException if the region creation fails due to a FileSystem exception.
924    */
925   public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
926       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo, boolean readOnly)
927       throws IOException {
928     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
929     Path regionDir = regionFs.getRegionDir();
930 
931     if (!fs.exists(regionDir)) {
932       LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
933       throw new IOException("The specified region do not exists on disk: " + regionDir);
934     }
935 
936     if (!readOnly) {
937       // Cleanup temporary directories
938       regionFs.cleanupTempDir();
939       regionFs.cleanupSplitsDir();
940       regionFs.cleanupMergesDir();
941 
942       // If it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
943       // Only create HRI if we are the default replica
944       if (regionInfo.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID) {
945         regionFs.checkRegionInfoOnFilesystem();
946       } else {
947         if (LOG.isDebugEnabled()) {
948           LOG.debug("Skipping creation of .regioninfo file for " + regionInfo);
949         }
950       }
951     }
952 
953     return regionFs;
954   }
955 
956   /**
957    * Remove the region from the table directory, archiving the region's hfiles.
958    * @param conf the {@link Configuration} to use
959    * @param fs {@link FileSystem} from which to remove the region
960    * @param tableDir {@link Path} to where the table is being stored
961    * @param regionInfo {@link HRegionInfo} for region to be deleted
962    * @throws IOException if the request cannot be completed
963    */
964   public static void deleteRegionFromFileSystem(final Configuration conf,
965       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
966     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
967     Path regionDir = regionFs.getRegionDir();
968 
969     if (!fs.exists(regionDir)) {
970       LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
971       return;
972     }
973 
974     if (LOG.isDebugEnabled()) {
975       LOG.debug("DELETING region " + regionDir);
976     }
977 
978     // Archive region
979     Path rootDir = FSUtils.getRootDir(conf);
980     HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
981 
982     // Delete empty region dir
983     if (!fs.delete(regionDir, true)) {
984       LOG.warn("Failed delete of " + regionDir);
985     }
986   }
987 
988   /**
989    * Creates a directory. Assumes the user has already checked for this directory existence.
990    * @param dir
991    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
992    *         whether the directory exists or not, and returns true if it exists.
993    * @throws IOException
994    */
995   boolean createDir(Path dir) throws IOException {
996     int i = 0;
997     IOException lastIOE = null;
998     do {
999       try {
1000         return fs.mkdirs(dir);
1001       } catch (IOException ioe) {
1002         lastIOE = ioe;
1003         if (fs.exists(dir)) return true; // directory is present
1004         try {
1005           sleepBeforeRetry("Create Directory", i+1);
1006         } catch (InterruptedException e) {
1007           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1008         }
1009       }
1010     } while (++i <= hdfsClientRetriesNumber);
1011     throw new IOException("Exception in createDir", lastIOE);
1012   }
1013 
1014   /**
1015    * Renames a directory. Assumes the user has already checked for this directory existence.
1016    * @param srcpath
1017    * @param dstPath
1018    * @return true if rename is successful.
1019    * @throws IOException
1020    */
1021   boolean rename(Path srcpath, Path dstPath) throws IOException {
1022     IOException lastIOE = null;
1023     int i = 0;
1024     do {
1025       try {
1026         return fs.rename(srcpath, dstPath);
1027       } catch (IOException ioe) {
1028         lastIOE = ioe;
1029         if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
1030         // dir is not there, retry after some time.
1031         try {
1032           sleepBeforeRetry("Rename Directory", i+1);
1033         } catch (InterruptedException e) {
1034           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1035         }
1036       }
1037     } while (++i <= hdfsClientRetriesNumber);
1038 
1039     throw new IOException("Exception in rename", lastIOE);
1040   }
1041 
1042   /**
1043    * Deletes a directory. Assumes the user has already checked for this directory existence.
1044    * @param dir
1045    * @return true if the directory is deleted.
1046    * @throws IOException
1047    */
1048   boolean deleteDir(Path dir) throws IOException {
1049     IOException lastIOE = null;
1050     int i = 0;
1051     do {
1052       try {
1053         return fs.delete(dir, true);
1054       } catch (IOException ioe) {
1055         lastIOE = ioe;
1056         if (!fs.exists(dir)) return true;
1057         // dir is there, retry deleting after some time.
1058         try {
1059           sleepBeforeRetry("Delete Directory", i+1);
1060         } catch (InterruptedException e) {
1061           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1062         }
1063       }
1064     } while (++i <= hdfsClientRetriesNumber);
1065 
1066     throw new IOException("Exception in DeleteDir", lastIOE);
1067   }
1068 
1069   /**
1070    * sleeping logic; handles the interrupt exception.
1071    */
1072   private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
1073     sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1074   }
1075 
1076   /**
1077    * Creates a directory for a filesystem and configuration object. Assumes the user has already
1078    * checked for this directory existence.
1079    * @param fs
1080    * @param conf
1081    * @param dir
1082    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1083    *         whether the directory exists or not, and returns true if it exists.
1084    * @throws IOException
1085    */
1086   private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1087       throws IOException {
1088     int i = 0;
1089     IOException lastIOE = null;
1090     int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1091       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1092     int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1093       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1094     do {
1095       try {
1096         return fs.mkdirs(dir);
1097       } catch (IOException ioe) {
1098         lastIOE = ioe;
1099         if (fs.exists(dir)) return true; // directory is present
1100         try {
1101           sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1102         } catch (InterruptedException e) {
1103           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1104         }
1105       }
1106     } while (++i <= hdfsClientRetriesNumber);
1107 
1108     throw new IOException("Exception in createDir", lastIOE);
1109   }
1110 
1111   /**
1112    * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1113    * for this to avoid re-looking for the integer values.
1114    */
1115   private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1116       int hdfsClientRetriesNumber) throws InterruptedException {
1117     if (sleepMultiplier > hdfsClientRetriesNumber) {
1118       LOG.debug(msg + ", retries exhausted");
1119       return;
1120     }
1121     LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1122     Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1123   }
1124 }