View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.InterruptedIOException;
24  import java.io.IOException;
25  import java.util.ArrayList;
26  import java.util.Collection;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.UUID;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.FileUtil;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.fs.PathFilter;
42  import org.apache.hadoop.fs.permission.FsPermission;
43  import org.apache.hadoop.hbase.HColumnDescriptor;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.HTableDescriptor;
47  import org.apache.hadoop.hbase.KeyValue;
48  import org.apache.hadoop.hbase.backup.HFileArchiver;
49  import org.apache.hadoop.hbase.fs.HFileSystem;
50  import org.apache.hadoop.hbase.io.Reference;
51  import org.apache.hadoop.hbase.util.Bytes;
52  import org.apache.hadoop.hbase.util.FSHDFSUtils;
53  import org.apache.hadoop.hbase.util.FSUtils;
54  
55  /**
56   * View to an on-disk Region.
57   * Provides the set of methods necessary to interact with the on-disk region data.
58   */
59  @InterfaceAudience.Private
60  public class HRegionFileSystem {
61    public static final Log LOG = LogFactory.getLog(HRegionFileSystem.class);
62  
63    /** Name of the region info file that resides just under the region directory. */
64    public final static String REGION_INFO_FILE = ".regioninfo";
65  
66    /** Temporary subdirectory of the region directory used for merges. */
67    public static final String REGION_MERGES_DIR = ".merges";
68  
69    /** Temporary subdirectory of the region directory used for splits. */
70    public static final String REGION_SPLITS_DIR = ".splits";
71  
72    /** Temporary subdirectory of the region directory used for compaction output. */
73    private static final String REGION_TEMP_DIR = ".tmp";
74  
75    private final HRegionInfo regionInfo;
76    private final Configuration conf;
77    private final Path tableDir;
78    private final FileSystem fs;
79    
80    /**
81     * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
82     * client level.
83     */
84    private final int hdfsClientRetriesNumber;
85    private final int baseSleepBeforeRetries;
86    private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
87    private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
88  
89    /**
90     * Create a view to the on-disk region
91     * @param conf the {@link Configuration} to use
92     * @param fs {@link FileSystem} that contains the region
93     * @param tableDir {@link Path} to where the table is being stored
94     * @param regionInfo {@link HRegionInfo} for region
95     */
96    HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
97        final HRegionInfo regionInfo) {
98      this.fs = fs;
99      this.conf = conf;
100     this.tableDir = tableDir;
101     this.regionInfo = regionInfo;
102     this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
103       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
104     this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
105       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
106  }
107 
108   /** @return the underlying {@link FileSystem} */
109   public FileSystem getFileSystem() {
110     return this.fs;
111   }
112 
113   /** @return the {@link HRegionInfo} that describe this on-disk region view */
114   public HRegionInfo getRegionInfo() {
115     return this.regionInfo;
116   }
117 
118   /** @return {@link Path} to the region's root directory. */
119   public Path getTableDir() {
120     return this.tableDir;
121   }
122 
123   /** @return {@link Path} to the region directory. */
124   public Path getRegionDir() {
125     return new Path(this.tableDir, this.regionInfo.getEncodedName());
126   }
127 
128   // ===========================================================================
129   //  Temp Helpers
130   // ===========================================================================
131   /** @return {@link Path} to the region's temp directory, used for file creations */
132   Path getTempDir() {
133     return new Path(getRegionDir(), REGION_TEMP_DIR);
134   }
135 
136   /**
137    * Clean up any temp detritus that may have been left around from previous operation attempts.
138    */
139   void cleanupTempDir() throws IOException {
140     deleteDir(getTempDir());
141   }
142 
143   // ===========================================================================
144   //  Store/StoreFile Helpers
145   // ===========================================================================
146   /**
147    * Returns the directory path of the specified family
148    * @param familyName Column Family Name
149    * @return {@link Path} to the directory of the specified family
150    */
151   Path getStoreDir(final String familyName) {
152     return new Path(this.getRegionDir(), familyName);
153   }
154 
155   /**
156    * Create the store directory for the specified family name
157    * @param familyName Column Family Name
158    * @return {@link Path} to the directory of the specified family
159    * @throws IOException if the directory creation fails.
160    */
161   Path createStoreDir(final String familyName) throws IOException {
162     Path storeDir = getStoreDir(familyName);
163     if(!fs.exists(storeDir) && !createDir(storeDir))
164       throw new IOException("Failed creating "+storeDir);
165     return storeDir;
166   }
167 
168   /**
169    * Returns the store files available for the family.
170    * This methods performs the filtering based on the valid store files.
171    * @param familyName Column Family Name
172    * @return a set of {@link StoreFileInfo} for the specified family.
173    */
174   public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
175     return getStoreFiles(Bytes.toString(familyName));
176   }
177 
178   /**
179    * Returns the store files available for the family.
180    * This methods performs the filtering based on the valid store files.
181    * @param familyName Column Family Name
182    * @return a set of {@link StoreFileInfo} for the specified family.
183    */
184   public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
185     Path familyDir = getStoreDir(familyName);
186     FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
187     if (files == null) return null;
188 
189     ArrayList<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>(files.length);
190     for (FileStatus status: files) {
191       if (!StoreFileInfo.isValid(status)) continue;
192 
193       storeFiles.add(new StoreFileInfo(this.conf, this.fs, status));
194     }
195     return storeFiles;
196   }
197 
198   /**
199    * Returns true if the specified family has reference files
200    * @param familyName Column Family Name
201    * @return true if family contains reference files
202    * @throws IOException
203    */
204   public boolean hasReferences(final String familyName) throws IOException {
205     FileStatus[] files = FSUtils.listStatus(fs, getStoreDir(familyName),
206       new PathFilter () {
207         public boolean accept(Path path) {
208           return StoreFileInfo.isReference(path);
209         }
210       }
211     );
212     return files != null && files.length > 0;
213   }
214 
215   /**
216    * Check whether region has Reference file
217    * @param htd table desciptor of the region
218    * @return true if region has reference file
219    * @throws IOException
220    */
221   public boolean hasReferences(final HTableDescriptor htd) throws IOException {
222     for (HColumnDescriptor family : htd.getFamilies()) {
223       if (hasReferences(family.getNameAsString())) {
224         return true;
225       }
226     }
227     return false;
228   }
229 
230   /**
231    * @return the set of families present on disk
232    * @throws IOException
233    */
234   public Collection<String> getFamilies() throws IOException {
235     FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
236     if (fds == null) return null;
237 
238     ArrayList<String> families = new ArrayList<String>(fds.length);
239     for (FileStatus status: fds) {
240       families.add(status.getPath().getName());
241     }
242 
243     return families;
244   }
245 
246   /**
247    * Remove the region family from disk, archiving the store files.
248    * @param familyName Column Family Name
249    * @throws IOException if an error occours during the archiving
250    */
251   public void deleteFamily(final String familyName) throws IOException {
252     // archive family store files
253     HFileArchiver.archiveFamily(fs, conf, regionInfo, tableDir, Bytes.toBytes(familyName));
254 
255     // delete the family folder
256     Path familyDir = getStoreDir(familyName);
257     if(fs.exists(familyDir) && !deleteDir(familyDir))
258       throw new IOException("Could not delete family " + familyName
259           + " from FileSystem for region " + regionInfo.getRegionNameAsString() + "("
260           + regionInfo.getEncodedName() + ")");
261   }
262 
263   /**
264    * Generate a unique file name, used by createTempName() and commitStoreFile()
265    * @param suffix extra information to append to the generated name
266    * @return Unique file name
267    */
268   private static String generateUniqueName(final String suffix) {
269     String name = UUID.randomUUID().toString().replaceAll("-", "");
270     if (suffix != null) name += suffix;
271     return name;
272   }
273 
274   /**
275    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
276    * to get a safer file creation.
277    * <code>
278    * Path file = fs.createTempName();
279    * ...StoreFile.Writer(file)...
280    * fs.commitStoreFile("family", file);
281    * </code>
282    *
283    * @return Unique {@link Path} of the temporary file
284    */
285   public Path createTempName() {
286     return createTempName(null);
287   }
288 
289   /**
290    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
291    * to get a safer file creation.
292    * <code>
293    * Path file = fs.createTempName();
294    * ...StoreFile.Writer(file)...
295    * fs.commitStoreFile("family", file);
296    * </code>
297    *
298    * @param suffix extra information to append to the generated name
299    * @return Unique {@link Path} of the temporary file
300    */
301   public Path createTempName(final String suffix) {
302     return new Path(getTempDir(), generateUniqueName(suffix));
303   }
304 
305   /**
306    * Move the file from a build/temp location to the main family store directory.
307    * @param familyName Family that will gain the file
308    * @param buildPath {@link Path} to the file to commit.
309    * @return The new {@link Path} of the committed file
310    * @throws IOException
311    */
312   public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
313     return commitStoreFile(familyName, buildPath, -1, false);
314   }
315 
316   /**
317    * Move the file from a build/temp location to the main family store directory.
318    * @param familyName Family that will gain the file
319    * @param buildPath {@link Path} to the file to commit.
320    * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
321    * @param generateNewName False if you want to keep the buildPath name
322    * @return The new {@link Path} of the committed file
323    * @throws IOException
324    */
325   private Path commitStoreFile(final String familyName, final Path buildPath,
326       final long seqNum, final boolean generateNewName) throws IOException {
327     Path storeDir = getStoreDir(familyName);
328     if(!fs.exists(storeDir) && !createDir(storeDir))
329       throw new IOException("Failed creating " + storeDir);
330     
331     String name = buildPath.getName();
332     if (generateNewName) {
333       name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
334     }
335     Path dstPath = new Path(storeDir, name);
336     if (!fs.exists(buildPath)) {
337       throw new FileNotFoundException(buildPath.toString());
338     }
339     LOG.debug("Committing store file " + buildPath + " as " + dstPath);
340     // buildPath exists, therefore not doing an exists() check.
341     if (!rename(buildPath, dstPath)) {
342       throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
343     }
344     return dstPath;
345   }
346 
347 
348   /**
349    * Moves multiple store files to the relative region's family store directory.
350    * @param storeFiles list of store files divided by family
351    * @throws IOException
352    */
353   void commitStoreFiles(final Map<byte[], List<StoreFile>> storeFiles) throws IOException {
354     for (Map.Entry<byte[], List<StoreFile>> es: storeFiles.entrySet()) {
355       String familyName = Bytes.toString(es.getKey());
356       for (StoreFile sf: es.getValue()) {
357         commitStoreFile(familyName, sf.getPath());
358       }
359     }
360   }
361 
362   /**
363    * Archives the specified store file from the specified family.
364    * @param familyName Family that contains the store files
365    * @param filePath {@link Path} to the store file to remove
366    * @throws IOException if the archiving fails
367    */
368   public void removeStoreFile(final String familyName, final Path filePath)
369       throws IOException {
370     HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfo,
371         this.tableDir, Bytes.toBytes(familyName), filePath);
372   }
373 
374   /**
375    * Closes and archives the specified store files from the specified family.
376    * @param familyName Family that contains the store files
377    * @param storeFiles set of store files to remove
378    * @throws IOException if the archiving fails
379    */
380   public void removeStoreFiles(final String familyName, final Collection<StoreFile> storeFiles)
381       throws IOException {
382     HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfo,
383         this.tableDir, Bytes.toBytes(familyName), storeFiles);
384   }
385 
386   /**
387    * Bulk load: Add a specified store file to the specified family.
388    * If the source file is on the same different file-system is moved from the
389    * source location to the destination location, otherwise is copied over.
390    *
391    * @param familyName Family that will gain the file
392    * @param srcPath {@link Path} to the file to import
393    * @param seqNum Bulk Load sequence number
394    * @return The destination {@link Path} of the bulk loaded file
395    * @throws IOException
396    */
397   Path bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
398       throws IOException {
399     // Copy the file if it's on another filesystem
400     FileSystem srcFs = srcPath.getFileSystem(conf);
401     FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
402 
403     // We can't compare FileSystem instances as equals() includes UGI instance
404     // as part of the comparison and won't work when doing SecureBulkLoad
405     // TODO deal with viewFS
406     if (!FSHDFSUtils.isSameHdfs(conf, srcFs, desFs)) {
407       LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
408           "the destination store. Copying file over to destination filesystem.");
409       Path tmpPath = createTempName();
410       FileUtil.copy(srcFs, srcPath, fs, tmpPath, false, conf);
411       LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
412       srcPath = tmpPath;
413     }
414 
415     return commitStoreFile(familyName, srcPath, seqNum, true);
416   }
417 
418   // ===========================================================================
419   //  Splits Helpers
420   // ===========================================================================
421   /** @return {@link Path} to the temp directory used during split operations */
422   Path getSplitsDir() {
423     return new Path(getRegionDir(), REGION_SPLITS_DIR);
424   }
425 
426   Path getSplitsDir(final HRegionInfo hri) {
427     return new Path(getSplitsDir(), hri.getEncodedName());
428   }
429 
430   /**
431    * Clean up any split detritus that may have been left around from previous split attempts.
432    */
433   void cleanupSplitsDir() throws IOException {
434     deleteDir(getSplitsDir());
435   }
436 
437   /**
438    * Clean up any split detritus that may have been left around from previous
439    * split attempts.
440    * Call this method on initial region deploy.
441    * @throws IOException
442    */
443   void cleanupAnySplitDetritus() throws IOException {
444     Path splitdir = this.getSplitsDir();
445     if (!fs.exists(splitdir)) return;
446     // Look at the splitdir.  It could have the encoded names of the daughter
447     // regions we tried to make.  See if the daughter regions actually got made
448     // out under the tabledir.  If here under splitdir still, then the split did
449     // not complete.  Try and do cleanup.  This code WILL NOT catch the case
450     // where we successfully created daughter a but regionserver crashed during
451     // the creation of region b.  In this case, there'll be an orphan daughter
452     // dir in the filesystem.  TOOD: Fix.
453     FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
454     if (daughters != null) {
455       for (FileStatus daughter: daughters) {
456         Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
457         if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
458           throw new IOException("Failed delete of " + daughterDir);
459         }
460       }
461     }
462     cleanupSplitsDir();
463     LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
464   }
465 
466   /**
467    * Remove daughter region
468    * @param regionInfo daughter {@link HRegionInfo}
469    * @throws IOException
470    */
471   void cleanupDaughterRegion(final HRegionInfo regionInfo) throws IOException {
472     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
473     if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
474       throw new IOException("Failed delete of " + regionDir);
475     }
476   }
477 
478   /**
479    * Commit a daughter region, moving it from the split temporary directory
480    * to the proper location in the filesystem.
481    * @param regionInfo daughter {@link HRegionInfo}
482    * @throws IOException
483    */
484   Path commitDaughterRegion(final HRegionInfo regionInfo) throws IOException {
485     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
486     Path daughterTmpDir = this.getSplitsDir(regionInfo);
487     if (fs.exists(daughterTmpDir)) {
488       // Write HRI to a file in case we need to recover hbase:meta
489       Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
490       byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
491       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
492 
493       // Move the daughter temp dir to the table dir
494       if (!rename(daughterTmpDir, regionDir)) {
495         throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
496       }
497     }
498     return regionDir;
499   }
500 
501   /**
502    * Create the region splits directory.
503    */
504   void createSplitsDir() throws IOException {
505     Path splitdir = getSplitsDir();
506     if (fs.exists(splitdir)) {
507       LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
508       if (!deleteDir(splitdir)) {
509         throw new IOException("Failed deletion of " + splitdir
510             + " before creating them again.");
511       }
512     }
513     // splitDir doesn't exists now. No need to do an exists() call for it.
514     if (!createDir(splitdir)) {
515       throw new IOException("Failed create of " + splitdir);
516     }
517   }
518 
519   /**
520    * Write out a split reference. Package local so it doesnt leak out of
521    * regionserver.
522    * @param hri {@link HRegionInfo} of the destination
523    * @param familyName Column Family Name
524    * @param f File to split.
525    * @param splitRow Split Row
526    * @param top True if we are referring to the top half of the hfile.
527    * @return Path to created reference.
528    * @throws IOException
529    */
530   Path splitStoreFile(final HRegionInfo hri, final String familyName,
531       final StoreFile f, final byte[] splitRow, final boolean top) throws IOException {
532     
533     // Check whether the split row lies in the range of the store file
534     // If it is outside the range, return directly.
535     if (top) {
536       //check if larger than last key.
537       KeyValue splitKey = KeyValue.createFirstOnRow(splitRow);
538       byte[] lastKey = f.createReader().getLastKey();      
539       // If lastKey is null means storefile is empty.
540       if (lastKey == null) return null;
541       if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(), 
542           splitKey.getKeyOffset(), splitKey.getKeyLength(), lastKey, 0, lastKey.length) > 0) {
543         return null;
544       }
545     } else {
546       //check if smaller than first key
547       KeyValue splitKey = KeyValue.createLastOnRow(splitRow);
548       byte[] firstKey = f.createReader().getFirstKey();
549       // If firstKey is null means storefile is empty.
550       if (firstKey == null) return null;
551       if (f.getReader().getComparator().compareFlatKey(splitKey.getBuffer(), 
552           splitKey.getKeyOffset(), splitKey.getKeyLength(), firstKey, 0, firstKey.length) < 0) {
553         return null;
554       }      
555     }
556  
557     f.getReader().close(true);
558     
559     Path splitDir = new Path(getSplitsDir(hri), familyName);
560     // A reference to the bottom half of the hsf store file.
561     Reference r =
562       top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
563     // Add the referred-to regions name as a dot separated suffix.
564     // See REF_NAME_REGEX regex above.  The referred-to regions name is
565     // up in the path of the passed in <code>f</code> -- parentdir is family,
566     // then the directory above is the region name.
567     String parentRegionName = regionInfo.getEncodedName();
568     // Write reference with same file id only with the other region name as
569     // suffix and into the new region location (under same family).
570     Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
571     return r.write(fs, p);
572   }
573 
574   // ===========================================================================
575   //  Merge Helpers
576   // ===========================================================================
577   /** @return {@link Path} to the temp directory used during merge operations */
578   Path getMergesDir() {
579     return new Path(getRegionDir(), REGION_MERGES_DIR);
580   }
581 
582   Path getMergesDir(final HRegionInfo hri) {
583     return new Path(getMergesDir(), hri.getEncodedName());
584   }
585 
586   /**
587    * Clean up any merge detritus that may have been left around from previous merge attempts.
588    */
589   void cleanupMergesDir() throws IOException {
590     deleteDir(getMergesDir());
591   }
592 
593   /**
594    * Remove merged region
595    * @param mergedRegion {@link HRegionInfo}
596    * @throws IOException
597    */
598   void cleanupMergedRegion(final HRegionInfo mergedRegion) throws IOException {
599     Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
600     if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
601       throw new IOException("Failed delete of " + regionDir);
602     }
603   }
604 
605   /**
606    * Create the region merges directory.
607    * @throws IOException If merges dir already exists or we fail to create it.
608    * @see HRegionFileSystem#cleanupMergesDir()
609    */
610   void createMergesDir() throws IOException {
611     Path mergesdir = getMergesDir();
612     if (fs.exists(mergesdir)) {
613       LOG.info("The " + mergesdir
614           + " directory exists.  Hence deleting it to recreate it");
615       if (!fs.delete(mergesdir, true)) {
616         throw new IOException("Failed deletion of " + mergesdir
617             + " before creating them again.");
618       }
619     }
620     if (!fs.mkdirs(mergesdir))
621       throw new IOException("Failed create of " + mergesdir);
622   }
623 
624   /**
625    * Write out a merge reference under the given merges directory. Package local
626    * so it doesnt leak out of regionserver.
627    * @param mergedRegion {@link HRegionInfo} of the merged region
628    * @param familyName Column Family Name
629    * @param f File to create reference.
630    * @param mergedDir
631    * @return Path to created reference.
632    * @throws IOException
633    */
634   Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName,
635       final StoreFile f, final Path mergedDir)
636       throws IOException {
637     Path referenceDir = new Path(new Path(mergedDir,
638         mergedRegion.getEncodedName()), familyName);
639     // A whole reference to the store file.
640     Reference r = Reference.createTopReference(regionInfo.getStartKey());
641     // Add the referred-to regions name as a dot separated suffix.
642     // See REF_NAME_REGEX regex above. The referred-to regions name is
643     // up in the path of the passed in <code>f</code> -- parentdir is family,
644     // then the directory above is the region name.
645     String mergingRegionName = regionInfo.getEncodedName();
646     // Write reference with same file id only with the other region name as
647     // suffix and into the new region location (under same family).
648     Path p = new Path(referenceDir, f.getPath().getName() + "."
649         + mergingRegionName);
650     return r.write(fs, p);
651   }
652 
653   /**
654    * Commit a merged region, moving it from the merges temporary directory to
655    * the proper location in the filesystem.
656    * @param mergedRegionInfo merged region {@link HRegionInfo}
657    * @throws IOException 
658    */
659   void commitMergedRegion(final HRegionInfo mergedRegionInfo) throws IOException {
660     Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
661     Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
662     // Move the tmp dir in the expected location
663     if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
664       if (!fs.rename(mergedRegionTmpDir, regionDir)) {
665         throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
666             + regionDir);
667       }
668     }
669   }
670 
671   // ===========================================================================
672   //  Create/Open/Delete Helpers
673   // ===========================================================================
674   /**
675    * Log the current state of the region
676    * @param LOG log to output information
677    * @throws IOException if an unexpected exception occurs
678    */
679   void logFileSystemState(final Log LOG) throws IOException {
680     FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
681   }
682 
683   /**
684    * @param hri
685    * @return Content of the file we write out to the filesystem under a region
686    * @throws IOException
687    */
688   private static byte[] getRegionInfoFileContent(final HRegionInfo hri) throws IOException {
689     return hri.toDelimitedByteArray();
690   }
691 
692   /**
693    * Create a {@link HRegionInfo} from the serialized version on-disk.
694    * @param fs {@link FileSystem} that contains the Region Info file
695    * @param regionDir {@link Path} to the Region Directory that contains the Info file
696    * @return An {@link HRegionInfo} instance gotten from the Region Info file.
697    * @throws IOException if an error occurred during file open/read operation.
698    */
699   public static HRegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
700       throws IOException {
701     FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
702     try {
703       return HRegionInfo.parseFrom(in);
704     } finally {
705       in.close();
706     }
707   }
708 
709   /**
710    * Write the .regioninfo file on-disk.
711    */
712   private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
713       final Path regionInfoFile, final byte[] content) throws IOException {
714     // First check to get the permissions
715     FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
716     // Write the RegionInfo file content
717     FSDataOutputStream out = FSUtils.create(fs, regionInfoFile, perms, null);
718     try {
719       out.write(content);
720     } finally {
721       out.close();
722     }
723   }
724 
725   /**
726    * Write out an info file under the stored region directory. Useful recovering mangled regions.
727    * If the regionInfo already exists on-disk, then we fast exit.
728    */
729   void checkRegionInfoOnFilesystem() throws IOException {
730     // Compose the content of the file so we can compare to length in filesystem. If not same,
731     // rewrite it (it may have been written in the old format using Writables instead of pb). The
732     // pb version is much shorter -- we write now w/o the toString version -- so checking length
733     // only should be sufficient. I don't want to read the file every time to check if it pb
734     // serialized.
735     byte[] content = getRegionInfoFileContent(regionInfo);
736     try {
737       Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
738 
739       FileStatus status = fs.getFileStatus(regionInfoFile);
740       if (status != null && status.getLen() == content.length) {
741         // Then assume the content good and move on.
742         // NOTE: that the length is not sufficient to define the the content matches.
743         return;
744       }
745 
746       LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
747       if (!fs.delete(regionInfoFile, false)) {
748         throw new IOException("Unable to remove existing " + regionInfoFile);
749       }
750     } catch (FileNotFoundException e) {
751       LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfo.getEncodedName());
752     }
753 
754     // Write HRI to a file in case we need to recover hbase:meta
755     writeRegionInfoOnFilesystem(content, true);
756   }
757 
758   /**
759    * Write out an info file under the region directory. Useful recovering mangled regions.
760    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
761    */
762   private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
763     byte[] content = getRegionInfoFileContent(regionInfo);
764     writeRegionInfoOnFilesystem(content, useTempDir);
765   }
766 
767   /**
768    * Write out an info file under the region directory. Useful recovering mangled regions.
769    * @param regionInfoContent serialized version of the {@link HRegionInfo}
770    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
771    */
772   private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
773       final boolean useTempDir) throws IOException {
774     Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
775     if (useTempDir) {
776       // Create in tmpDir and then move into place in case we crash after
777       // create but before close. If we don't successfully close the file,
778       // subsequent region reopens will fail the below because create is
779       // registered in NN.
780 
781       // And then create the file
782       Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
783 
784       // If datanode crashes or if the RS goes down just before the close is called while trying to
785       // close the created regioninfo file in the .tmp directory then on next
786       // creation we will be getting AlreadyCreatedException.
787       // Hence delete and create the file if exists.
788       if (FSUtils.isExists(fs, tmpPath)) {
789         FSUtils.delete(fs, tmpPath, true);
790       }
791 
792       // Write HRI to a file in case we need to recover hbase:meta
793       writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
794 
795       // Move the created file to the original path
796       if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
797         throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
798       }
799     } else {
800       // Write HRI to a file in case we need to recover hbase:meta
801       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
802     }
803   }
804 
805   /**
806    * Create a new Region on file-system.
807    * @param conf the {@link Configuration} to use
808    * @param fs {@link FileSystem} from which to add the region
809    * @param tableDir {@link Path} to where the table is being stored
810    * @param regionInfo {@link HRegionInfo} for region to be added
811    * @throws IOException if the region creation fails due to a FileSystem exception.
812    */
813   public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
814       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
815     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
816     Path regionDir = regionFs.getRegionDir();
817 
818     if (fs.exists(regionDir)) {
819       LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
820       throw new IOException("The specified region already exists on disk: " + regionDir);
821     }
822 
823     // Create the region directory
824     if (!createDirOnFileSystem(fs, conf, regionDir)) {
825       LOG.warn("Unable to create the region directory: " + regionDir);
826       throw new IOException("Unable to create region directory: " + regionDir);
827     }
828 
829     // Write HRI to a file in case we need to recover hbase:meta
830     regionFs.writeRegionInfoOnFilesystem(false);
831     return regionFs;
832   }
833 
834   /**
835    * Open Region from file-system.
836    * @param conf the {@link Configuration} to use
837    * @param fs {@link FileSystem} from which to add the region
838    * @param tableDir {@link Path} to where the table is being stored
839    * @param regionInfo {@link HRegionInfo} for region to be added
840    * @param readOnly True if you don't want to edit the region data
841    * @throws IOException if the region creation fails due to a FileSystem exception.
842    */
843   public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
844       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo, boolean readOnly)
845       throws IOException {
846     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
847     Path regionDir = regionFs.getRegionDir();
848 
849     if (!fs.exists(regionDir)) {
850       LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
851       throw new IOException("The specified region do not exists on disk: " + regionDir);
852     }
853 
854     if (!readOnly) {
855       // Cleanup temporary directories
856       regionFs.cleanupTempDir();
857       regionFs.cleanupSplitsDir();
858       regionFs.cleanupMergesDir();
859 
860       // if it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
861       regionFs.checkRegionInfoOnFilesystem();
862     }
863 
864     return regionFs;
865   }
866 
867   /**
868    * Remove the region from the table directory, archiving the region's hfiles.
869    * @param conf the {@link Configuration} to use
870    * @param fs {@link FileSystem} from which to remove the region
871    * @param tableDir {@link Path} to where the table is being stored
872    * @param regionInfo {@link HRegionInfo} for region to be deleted
873    * @throws IOException if the request cannot be completed
874    */
875   public static void deleteRegionFromFileSystem(final Configuration conf,
876       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
877     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
878     Path regionDir = regionFs.getRegionDir();
879 
880     if (!fs.exists(regionDir)) {
881       LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
882       return;
883     }
884 
885     if (LOG.isDebugEnabled()) {
886       LOG.debug("DELETING region " + regionDir);
887     }
888 
889     // Archive region
890     Path rootDir = FSUtils.getRootDir(conf);
891     HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
892 
893     // Delete empty region dir
894     if (!fs.delete(regionDir, true)) {
895       LOG.warn("Failed delete of " + regionDir);
896     }
897   }
898 
899   /**
900    * Creates a directory. Assumes the user has already checked for this directory existence.
901    * @param dir
902    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
903    *         whether the directory exists or not, and returns true if it exists.
904    * @throws IOException
905    */
906   boolean createDir(Path dir) throws IOException {
907     int i = 0;
908     IOException lastIOE = null;
909     do {
910       try {
911         return fs.mkdirs(dir);
912       } catch (IOException ioe) {
913         lastIOE = ioe;
914         if (fs.exists(dir)) return true; // directory is present
915         try {
916           sleepBeforeRetry("Create Directory", i+1);
917         } catch (InterruptedException e) {
918           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
919         }
920       }
921     } while (++i <= hdfsClientRetriesNumber);
922     throw new IOException("Exception in createDir", lastIOE);
923   }
924 
925   /**
926    * Renames a directory. Assumes the user has already checked for this directory existence.
927    * @param srcpath
928    * @param dstPath
929    * @return true if rename is successful.
930    * @throws IOException
931    */
932   boolean rename(Path srcpath, Path dstPath) throws IOException {
933     IOException lastIOE = null;
934     int i = 0;
935     do {
936       try {
937         return fs.rename(srcpath, dstPath);
938       } catch (IOException ioe) {
939         lastIOE = ioe;
940         if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
941         // dir is not there, retry after some time.
942         try {
943           sleepBeforeRetry("Rename Directory", i+1);
944         } catch (InterruptedException e) {
945           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
946         }
947       }
948     } while (++i <= hdfsClientRetriesNumber);
949 
950     throw new IOException("Exception in rename", lastIOE);
951   }
952 
953   /**
954    * Deletes a directory. Assumes the user has already checked for this directory existence.
955    * @param dir
956    * @return true if the directory is deleted.
957    * @throws IOException
958    */
959   boolean deleteDir(Path dir) throws IOException {
960     IOException lastIOE = null;
961     int i = 0;
962     do {
963       try {
964         return fs.delete(dir, true);
965       } catch (IOException ioe) {
966         lastIOE = ioe;
967         if (!fs.exists(dir)) return true;
968         // dir is there, retry deleting after some time.
969         try {
970           sleepBeforeRetry("Delete Directory", i+1);
971         } catch (InterruptedException e) {
972           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
973         }
974       }
975     } while (++i <= hdfsClientRetriesNumber);
976 
977     throw new IOException("Exception in DeleteDir", lastIOE);
978   }
979 
980   /**
981    * sleeping logic; handles the interrupt exception.
982    */
983   private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
984     sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
985   }
986 
987   /**
988    * Creates a directory for a filesystem and configuration object. Assumes the user has already
989    * checked for this directory existence.
990    * @param fs
991    * @param conf
992    * @param dir
993    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
994    *         whether the directory exists or not, and returns true if it exists.
995    * @throws IOException
996    */
997   private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
998       throws IOException {
999     int i = 0;
1000     IOException lastIOE = null;
1001     int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1002       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1003     int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1004       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1005     do {
1006       try {
1007         return fs.mkdirs(dir);
1008       } catch (IOException ioe) {
1009         lastIOE = ioe;
1010         if (fs.exists(dir)) return true; // directory is present
1011         try {
1012           sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1013         } catch (InterruptedException e) {
1014           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1015         }
1016       }
1017     } while (++i <= hdfsClientRetriesNumber);
1018 
1019     throw new IOException("Exception in createDir", lastIOE);
1020   }
1021 
1022   /**
1023    * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1024    * for this to avoid re-looking for the integer values.
1025    */
1026   private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1027       int hdfsClientRetriesNumber) throws InterruptedException {
1028     if (sleepMultiplier > hdfsClientRetriesNumber) {
1029       LOG.debug(msg + ", retries exhausted");
1030       return;
1031     }
1032     LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1033     Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1034   }
1035 }