View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.regionserver;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.io.InterruptedIOException;
25  import java.util.ArrayList;
26  import java.util.Collection;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.UUID;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.FileUtil;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.fs.permission.FsPermission;
42  import org.apache.hadoop.hbase.Cell;
43  import org.apache.hadoop.hbase.HColumnDescriptor;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HRegionInfo;
46  import org.apache.hadoop.hbase.HTableDescriptor;
47  import org.apache.hadoop.hbase.KeyValue;
48  import org.apache.hadoop.hbase.KeyValueUtil;
49  import org.apache.hadoop.hbase.backup.HFileArchiver;
50  import org.apache.hadoop.hbase.fs.HFileSystem;
51  import org.apache.hadoop.hbase.io.Reference;
52  import org.apache.hadoop.hbase.util.Bytes;
53  import org.apache.hadoop.hbase.util.FSHDFSUtils;
54  import org.apache.hadoop.hbase.util.FSUtils;
55  import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
56  
57  /**
58   * View to an on-disk Region.
59   * Provides the set of methods necessary to interact with the on-disk region data.
60   */
61  @InterfaceAudience.Private
62  public class HRegionFileSystem {
63    private static final Log LOG = LogFactory.getLog(HRegionFileSystem.class);
64  
65    /** Name of the region info file that resides just under the region directory. */
66    public final static String REGION_INFO_FILE = ".regioninfo";
67  
68    /** Temporary subdirectory of the region directory used for merges. */
69    public static final String REGION_MERGES_DIR = ".merges";
70  
71    /** Temporary subdirectory of the region directory used for splits. */
72    public static final String REGION_SPLITS_DIR = ".splits";
73  
74    /** Temporary subdirectory of the region directory used for compaction output. */
75    private static final String REGION_TEMP_DIR = ".tmp";
76  
77    private final HRegionInfo regionInfo;
78    //regionInfo for interacting with FS (getting encodedName, etc)
79    private final HRegionInfo regionInfoForFs;
80    private final Configuration conf;
81    private final Path tableDir;
82    private final FileSystem fs;
83  
84    /**
85     * In order to handle NN connectivity hiccups, one need to retry non-idempotent operation at the
86     * client level.
87     */
88    private final int hdfsClientRetriesNumber;
89    private final int baseSleepBeforeRetries;
90    private static final int DEFAULT_HDFS_CLIENT_RETRIES_NUMBER = 10;
91    private static final int DEFAULT_BASE_SLEEP_BEFORE_RETRIES = 1000;
92  
93    /**
94     * Create a view to the on-disk region
95     * @param conf the {@link Configuration} to use
96     * @param fs {@link FileSystem} that contains the region
97     * @param tableDir {@link Path} to where the table is being stored
98     * @param regionInfo {@link HRegionInfo} for region
99     */
100   HRegionFileSystem(final Configuration conf, final FileSystem fs, final Path tableDir,
101       final HRegionInfo regionInfo) {
102     this.fs = fs;
103     this.conf = conf;
104     this.tableDir = tableDir;
105     this.regionInfo = regionInfo;
106     this.regionInfoForFs = ServerRegionReplicaUtil.getRegionInfoForFs(regionInfo);
107     this.hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
108       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
109     this.baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
110       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
111  }
112 
113   /** @return the underlying {@link FileSystem} */
114   public FileSystem getFileSystem() {
115     return this.fs;
116   }
117 
118   /** @return the {@link HRegionInfo} that describe this on-disk region view */
119   public HRegionInfo getRegionInfo() {
120     return this.regionInfo;
121   }
122 
123   public HRegionInfo getRegionInfoForFS() {
124     return this.regionInfoForFs;
125   }
126 
127   /** @return {@link Path} to the region's root directory. */
128   public Path getTableDir() {
129     return this.tableDir;
130   }
131 
132   /** @return {@link Path} to the region directory. */
133   public Path getRegionDir() {
134     return new Path(this.tableDir, this.regionInfoForFs.getEncodedName());
135   }
136 
137   // ===========================================================================
138   //  Temp Helpers
139   // ===========================================================================
140   /** @return {@link Path} to the region's temp directory, used for file creations */
141   Path getTempDir() {
142     return new Path(getRegionDir(), REGION_TEMP_DIR);
143   }
144 
145   /**
146    * Clean up any temp detritus that may have been left around from previous operation attempts.
147    */
148   void cleanupTempDir() throws IOException {
149     deleteDir(getTempDir());
150   }
151 
152   // ===========================================================================
153   //  Store/StoreFile Helpers
154   // ===========================================================================
155   /**
156    * Returns the directory path of the specified family
157    * @param familyName Column Family Name
158    * @return {@link Path} to the directory of the specified family
159    */
160   public Path getStoreDir(final String familyName) {
161     return new Path(this.getRegionDir(), familyName);
162   }
163 
164   /**
165    * Create the store directory for the specified family name
166    * @param familyName Column Family Name
167    * @return {@link Path} to the directory of the specified family
168    * @throws IOException if the directory creation fails.
169    */
170   Path createStoreDir(final String familyName) throws IOException {
171     Path storeDir = getStoreDir(familyName);
172     if(!fs.exists(storeDir) && !createDir(storeDir))
173       throw new IOException("Failed creating "+storeDir);
174     return storeDir;
175   }
176 
177   /**
178    * Returns the store files available for the family.
179    * This methods performs the filtering based on the valid store files.
180    * @param familyName Column Family Name
181    * @return a set of {@link StoreFileInfo} for the specified family.
182    */
183   public Collection<StoreFileInfo> getStoreFiles(final byte[] familyName) throws IOException {
184     return getStoreFiles(Bytes.toString(familyName));
185   }
186 
187   public Collection<StoreFileInfo> getStoreFiles(final String familyName) throws IOException {
188     return getStoreFiles(familyName, true);
189   }
190 
191   /**
192    * Returns the store files available for the family.
193    * This methods performs the filtering based on the valid store files.
194    * @param familyName Column Family Name
195    * @return a set of {@link StoreFileInfo} for the specified family.
196    */
197   public Collection<StoreFileInfo> getStoreFiles(final String familyName, final boolean validate)
198       throws IOException {
199     Path familyDir = getStoreDir(familyName);
200     FileStatus[] files = FSUtils.listStatus(this.fs, familyDir);
201     if (files == null) {
202       if (LOG.isTraceEnabled()) {
203         LOG.trace("No StoreFiles for: " + familyDir);
204       }
205       return null;
206     }
207
208     ArrayList<StoreFileInfo> storeFiles = new ArrayList<StoreFileInfo>(files.length);
209     for (FileStatus status: files) {
210       if (validate && !StoreFileInfo.isValid(status)) {
211         LOG.warn("Invalid StoreFile: " + status.getPath());
212         continue;
213       }
214       StoreFileInfo info = ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
215         regionInfoForFs, familyName, status.getPath());
216       storeFiles.add(info);
217
218     }
219     return storeFiles;
220   }
221
222   /**
223    * Return Qualified Path of the specified family/file
224    *
225    * @param familyName Column Family Name
226    * @param fileName File Name
227    * @return The qualified Path for the specified family/file
228    */
229   Path getStoreFilePath(final String familyName, final String fileName) {
230     Path familyDir = getStoreDir(familyName);
231     return new Path(familyDir, fileName).makeQualified(this.fs);
232   }
233
234   /**
235    * Return the store file information of the specified family/file.
236    *
237    * @param familyName Column Family Name
238    * @param fileName File Name
239    * @return The {@link StoreFileInfo} for the specified family/file
240    */
241   StoreFileInfo getStoreFileInfo(final String familyName, final String fileName)
242       throws IOException {
243     Path familyDir = getStoreDir(familyName);
244     return ServerRegionReplicaUtil.getStoreFileInfo(conf, fs, regionInfo,
245       regionInfoForFs, familyName, new Path(familyDir, fileName));
246   }
247
248   /**
249    * Returns true if the specified family has reference files
250    * @param familyName Column Family Name
251    * @return true if family contains reference files
252    * @throws IOException
253    */
254   public boolean hasReferences(final String familyName) throws IOException {
255     FileStatus[] files = FSUtils.listStatus(fs, getStoreDir(familyName));
256     if (files != null) {
257       for(FileStatus stat: files) {
258         if(stat.isDirectory()) {
259           continue;
260         }
261         if(StoreFileInfo.isReference(stat.getPath())) {
262           return true;
263         }
264       }
265     }
266     return false;
267   }
268
269   /**
270    * Check whether region has Reference file
271    * @param htd table desciptor of the region
272    * @return true if region has reference file
273    * @throws IOException
274    */
275   public boolean hasReferences(final HTableDescriptor htd) throws IOException {
276     for (HColumnDescriptor family : htd.getFamilies()) {
277       if (hasReferences(family.getNameAsString())) {
278         return true;
279       }
280     }
281     return false;
282   }
283
284   /**
285    * @return the set of families present on disk
286    * @throws IOException
287    */
288   public Collection<String> getFamilies() throws IOException {
289     FileStatus[] fds = FSUtils.listStatus(fs, getRegionDir(), new FSUtils.FamilyDirFilter(fs));
290     if (fds == null) return null;
291
292     ArrayList<String> families = new ArrayList<String>(fds.length);
293     for (FileStatus status: fds) {
294       families.add(status.getPath().getName());
295     }
296
297     return families;
298   }
299
300   /**
301    * Remove the region family from disk, archiving the store files.
302    * @param familyName Column Family Name
303    * @throws IOException if an error occours during the archiving
304    */
305   public void deleteFamily(final String familyName) throws IOException {
306     // archive family store files
307     HFileArchiver.archiveFamily(fs, conf, regionInfoForFs, tableDir, Bytes.toBytes(familyName));
308
309     // delete the family folder
310     Path familyDir = getStoreDir(familyName);
311     if(fs.exists(familyDir) && !deleteDir(familyDir))
312       throw new IOException("Could not delete family " + familyName
313           + " from FileSystem for region " + regionInfoForFs.getRegionNameAsString() + "("
314           + regionInfoForFs.getEncodedName() + ")");
315   }
316
317   /**
318    * Generate a unique file name, used by createTempName() and commitStoreFile()
319    * @param suffix extra information to append to the generated name
320    * @return Unique file name
321    */
322   private static String generateUniqueName(final String suffix) {
323     String name = UUID.randomUUID().toString().replaceAll("-", "");
324     if (suffix != null) name += suffix;
325     return name;
326   }
327
328   /**
329    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
330    * to get a safer file creation.
331    * <code>
332    * Path file = fs.createTempName();
333    * ...StoreFile.Writer(file)...
334    * fs.commitStoreFile("family", file);
335    * </code>
336    *
337    * @return Unique {@link Path} of the temporary file
338    */
339   public Path createTempName() {
340     return createTempName(null);
341   }
342
343   /**
344    * Generate a unique temporary Path. Used in conjuction with commitStoreFile()
345    * to get a safer file creation.
346    * <code>
347    * Path file = fs.createTempName();
348    * ...StoreFile.Writer(file)...
349    * fs.commitStoreFile("family", file);
350    * </code>
351    *
352    * @param suffix extra information to append to the generated name
353    * @return Unique {@link Path} of the temporary file
354    */
355   public Path createTempName(final String suffix) {
356     return new Path(getTempDir(), generateUniqueName(suffix));
357   }
358
359   /**
360    * Move the file from a build/temp location to the main family store directory.
361    * @param familyName Family that will gain the file
362    * @param buildPath {@link Path} to the file to commit.
363    * @return The new {@link Path} of the committed file
364    * @throws IOException
365    */
366   public Path commitStoreFile(final String familyName, final Path buildPath) throws IOException {
367     return commitStoreFile(familyName, buildPath, -1, false);
368   }
369
370   /**
371    * Move the file from a build/temp location to the main family store directory.
372    * @param familyName Family that will gain the file
373    * @param buildPath {@link Path} to the file to commit.
374    * @param seqNum Sequence Number to append to the file name (less then 0 if no sequence number)
375    * @param generateNewName False if you want to keep the buildPath name
376    * @return The new {@link Path} of the committed file
377    * @throws IOException
378    */
379   private Path commitStoreFile(final String familyName, final Path buildPath,
380       final long seqNum, final boolean generateNewName) throws IOException {
381     Path storeDir = getStoreDir(familyName);
382     if(!fs.exists(storeDir) && !createDir(storeDir))
383       throw new IOException("Failed creating " + storeDir);
384
385     String name = buildPath.getName();
386     if (generateNewName) {
387       name = generateUniqueName((seqNum < 0) ? null : "_SeqId_" + seqNum + "_");
388     }
389     Path dstPath = new Path(storeDir, name);
390     if (!fs.exists(buildPath)) {
391       throw new FileNotFoundException(buildPath.toString());
392     }
393     if (LOG.isDebugEnabled()) {
394       LOG.debug("Committing store file " + buildPath + " as " + dstPath);
395     }
396     // buildPath exists, therefore not doing an exists() check.
397     if (!rename(buildPath, dstPath)) {
398       throw new IOException("Failed rename of " + buildPath + " to " + dstPath);
399     }
400     return dstPath;
401   }
402
403
404   /**
405    * Moves multiple store files to the relative region's family store directory.
406    * @param storeFiles list of store files divided by family
407    * @throws IOException
408    */
409   void commitStoreFiles(final Map<byte[], List<StoreFile>> storeFiles) throws IOException {
410     for (Map.Entry<byte[], List<StoreFile>> es: storeFiles.entrySet()) {
411       String familyName = Bytes.toString(es.getKey());
412       for (StoreFile sf: es.getValue()) {
413         commitStoreFile(familyName, sf.getPath());
414       }
415     }
416   }
417
418   /**
419    * Archives the specified store file from the specified family.
420    * @param familyName Family that contains the store files
421    * @param filePath {@link Path} to the store file to remove
422    * @throws IOException if the archiving fails
423    */
424   public void removeStoreFile(final String familyName, final Path filePath)
425       throws IOException {
426     HFileArchiver.archiveStoreFile(this.conf, this.fs, this.regionInfoForFs,
427         this.tableDir, Bytes.toBytes(familyName), filePath);
428   }
429
430   /**
431    * Closes and archives the specified store files from the specified family.
432    * @param familyName Family that contains the store files
433    * @param storeFiles set of store files to remove
434    * @throws IOException if the archiving fails
435    */
436   public void removeStoreFiles(final String familyName, final Collection<StoreFile> storeFiles)
437       throws IOException {
438     HFileArchiver.archiveStoreFiles(this.conf, this.fs, this.regionInfoForFs,
439         this.tableDir, Bytes.toBytes(familyName), storeFiles);
440   }
441
442   /**
443    * Bulk load: Add a specified store file to the specified family.
444    * If the source file is on the same different file-system is moved from the
445    * source location to the destination location, otherwise is copied over.
446    *
447    * @param familyName Family that will gain the file
448    * @param srcPath {@link Path} to the file to import
449    * @param seqNum Bulk Load sequence number
450    * @return The destination {@link Path} of the bulk loaded file
451    * @throws IOException
452    */
453   Path bulkLoadStoreFile(final String familyName, Path srcPath, long seqNum)
454       throws IOException {
455     // Copy the file if it's on another filesystem
456     FileSystem srcFs = srcPath.getFileSystem(conf);
457     FileSystem desFs = fs instanceof HFileSystem ? ((HFileSystem)fs).getBackingFs() : fs;
458
459     // We can't compare FileSystem instances as equals() includes UGI instance
460     // as part of the comparison and won't work when doing SecureBulkLoad
461     // TODO deal with viewFS
462     if (!FSHDFSUtils.isSameHdfs(conf, srcFs, desFs)) {
463       LOG.info("Bulk-load file " + srcPath + " is on different filesystem than " +
464           "the destination store. Copying file over to destination filesystem.");
465       Path tmpPath = createTempName();
466       FileUtil.copy(srcFs, srcPath, fs, tmpPath, false, conf);
467       LOG.info("Copied " + srcPath + " to temporary path on destination filesystem: " + tmpPath);
468       srcPath = tmpPath;
469     }
470
471     return commitStoreFile(familyName, srcPath, seqNum, true);
472   }
473
474   // ===========================================================================
475   //  Splits Helpers
476   // ===========================================================================
477   /** @return {@link Path} to the temp directory used during split operations */
478   Path getSplitsDir() {
479     return new Path(getRegionDir(), REGION_SPLITS_DIR);
480   }
481
482   Path getSplitsDir(final HRegionInfo hri) {
483     return new Path(getSplitsDir(), hri.getEncodedName());
484   }
485
486   /**
487    * Clean up any split detritus that may have been left around from previous split attempts.
488    */
489   void cleanupSplitsDir() throws IOException {
490     deleteDir(getSplitsDir());
491   }
492
493   /**
494    * Clean up any split detritus that may have been left around from previous
495    * split attempts.
496    * Call this method on initial region deploy.
497    * @throws IOException
498    */
499   void cleanupAnySplitDetritus() throws IOException {
500     Path splitdir = this.getSplitsDir();
501     if (!fs.exists(splitdir)) return;
502     // Look at the splitdir.  It could have the encoded names of the daughter
503     // regions we tried to make.  See if the daughter regions actually got made
504     // out under the tabledir.  If here under splitdir still, then the split did
505     // not complete.  Try and do cleanup.  This code WILL NOT catch the case
506     // where we successfully created daughter a but regionserver crashed during
507     // the creation of region b.  In this case, there'll be an orphan daughter
508     // dir in the filesystem.  TOOD: Fix.
509     FileStatus[] daughters = FSUtils.listStatus(fs, splitdir, new FSUtils.DirFilter(fs));
510     if (daughters != null) {
511       for (FileStatus daughter: daughters) {
512         Path daughterDir = new Path(getTableDir(), daughter.getPath().getName());
513         if (fs.exists(daughterDir) && !deleteDir(daughterDir)) {
514           throw new IOException("Failed delete of " + daughterDir);
515         }
516       }
517     }
518     cleanupSplitsDir();
519     LOG.info("Cleaned up old failed split transaction detritus: " + splitdir);
520   }
521
522   /**
523    * Remove daughter region
524    * @param regionInfo daughter {@link HRegionInfo}
525    * @throws IOException
526    */
527   void cleanupDaughterRegion(final HRegionInfo regionInfo) throws IOException {
528     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
529     if (this.fs.exists(regionDir) && !deleteDir(regionDir)) {
530       throw new IOException("Failed delete of " + regionDir);
531     }
532   }
533
534   /**
535    * Commit a daughter region, moving it from the split temporary directory
536    * to the proper location in the filesystem.
537    *
538    * @param regionInfo                 daughter {@link org.apache.hadoop.hbase.HRegionInfo}
539    * @throws IOException
540    */
541   Path commitDaughterRegion(final HRegionInfo regionInfo)
542       throws IOException {
543     Path regionDir = new Path(this.tableDir, regionInfo.getEncodedName());
544     Path daughterTmpDir = this.getSplitsDir(regionInfo);
545
546     if (fs.exists(daughterTmpDir)) {
547
548       // Write HRI to a file in case we need to recover hbase:meta
549       Path regionInfoFile = new Path(daughterTmpDir, REGION_INFO_FILE);
550       byte[] regionInfoContent = getRegionInfoFileContent(regionInfo);
551       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
552
553       // Move the daughter temp dir to the table dir
554       if (!rename(daughterTmpDir, regionDir)) {
555         throw new IOException("Unable to rename " + daughterTmpDir + " to " + regionDir);
556       }
557     }
558
559     return regionDir;
560   }
561
562   /**
563    * Create the region splits directory.
564    */
565   void createSplitsDir() throws IOException {
566     Path splitdir = getSplitsDir();
567     if (fs.exists(splitdir)) {
568       LOG.info("The " + splitdir + " directory exists.  Hence deleting it to recreate it");
569       if (!deleteDir(splitdir)) {
570         throw new IOException("Failed deletion of " + splitdir
571             + " before creating them again.");
572       }
573     }
574     // splitDir doesn't exists now. No need to do an exists() call for it.
575     if (!createDir(splitdir)) {
576       throw new IOException("Failed create of " + splitdir);
577     }
578   }
579
580   /**
581    * Write out a split reference. Package local so it doesnt leak out of
582    * regionserver.
583    * @param hri {@link HRegionInfo} of the destination
584    * @param familyName Column Family Name
585    * @param f File to split.
586    * @param splitRow Split Row
587    * @param top True if we are referring to the top half of the hfile.
588    * @param splitPolicy
589    * @return Path to created reference.
590    * @throws IOException
591    */
592   Path splitStoreFile(final HRegionInfo hri, final String familyName, final StoreFile f,
593       final byte[] splitRow, final boolean top, RegionSplitPolicy splitPolicy)
594           throws IOException {
595     if (splitPolicy == null || !splitPolicy.skipStoreFileRangeCheck(familyName)) {
596       // Check whether the split row lies in the range of the store file
597       // If it is outside the range, return directly.
598       try {
599         if (top) {
600           //check if larger than last key.
601           KeyValue splitKey = KeyValueUtil.createFirstOnRow(splitRow);
602           Cell lastKey = f.getLastKey();
603           // If lastKey is null means storefile is empty.
604           if (lastKey == null) {
605             return null;
606           }
607           if (f.getComparator().compare(splitKey, lastKey) > 0) {
608             return null;
609           }
610         } else {
611           //check if smaller than first key
612           KeyValue splitKey = KeyValueUtil.createLastOnRow(splitRow);
613           Cell firstKey = f.getFirstKey();
614           // If firstKey is null means storefile is empty.
615           if (firstKey == null) {
616             return null;
617           }
618           if (f.getComparator().compare(splitKey, firstKey) < 0) {
619             return null;
620           }
621         }
622       } finally {
623         f.closeReader(f.getCacheConf() != null ? f.getCacheConf().shouldEvictOnClose() : true);
624       }
625     }
626
627     Path splitDir = new Path(getSplitsDir(hri), familyName);
628     // A reference to the bottom half of the hsf store file.
629     Reference r =
630       top ? Reference.createTopReference(splitRow): Reference.createBottomReference(splitRow);
631     // Add the referred-to regions name as a dot separated suffix.
632     // See REF_NAME_REGEX regex above.  The referred-to regions name is
633     // up in the path of the passed in <code>f</code> -- parentdir is family,
634     // then the directory above is the region name.
635     String parentRegionName = regionInfoForFs.getEncodedName();
636     // Write reference with same file id only with the other region name as
637     // suffix and into the new region location (under same family).
638     Path p = new Path(splitDir, f.getPath().getName() + "." + parentRegionName);
639     return r.write(fs, p);
640   }
641
642   // ===========================================================================
643   //  Merge Helpers
644   // ===========================================================================
645   /** @return {@link Path} to the temp directory used during merge operations */
646   Path getMergesDir() {
647     return new Path(getRegionDir(), REGION_MERGES_DIR);
648   }
649
650   Path getMergesDir(final HRegionInfo hri) {
651     return new Path(getMergesDir(), hri.getEncodedName());
652   }
653
654   /**
655    * Clean up any merge detritus that may have been left around from previous merge attempts.
656    */
657   void cleanupMergesDir() throws IOException {
658     deleteDir(getMergesDir());
659   }
660
661   /**
662    * Remove merged region
663    * @param mergedRegion {@link HRegionInfo}
664    * @throws IOException
665    */
666   void cleanupMergedRegion(final HRegionInfo mergedRegion) throws IOException {
667     Path regionDir = new Path(this.tableDir, mergedRegion.getEncodedName());
668     if (this.fs.exists(regionDir) && !this.fs.delete(regionDir, true)) {
669       throw new IOException("Failed delete of " + regionDir);
670     }
671   }
672
673   /**
674    * Create the region merges directory.
675    * @throws IOException If merges dir already exists or we fail to create it.
676    * @see HRegionFileSystem#cleanupMergesDir()
677    */
678   void createMergesDir() throws IOException {
679     Path mergesdir = getMergesDir();
680     if (fs.exists(mergesdir)) {
681       LOG.info("The " + mergesdir
682           + " directory exists.  Hence deleting it to recreate it");
683       if (!fs.delete(mergesdir, true)) {
684         throw new IOException("Failed deletion of " + mergesdir
685             + " before creating them again.");
686       }
687     }
688     if (!fs.mkdirs(mergesdir))
689       throw new IOException("Failed create of " + mergesdir);
690   }
691
692   /**
693    * Write out a merge reference under the given merges directory. Package local
694    * so it doesnt leak out of regionserver.
695    * @param mergedRegion {@link HRegionInfo} of the merged region
696    * @param familyName Column Family Name
697    * @param f File to create reference.
698    * @param mergedDir
699    * @return Path to created reference.
700    * @throws IOException
701    */
702   Path mergeStoreFile(final HRegionInfo mergedRegion, final String familyName,
703       final StoreFile f, final Path mergedDir)
704       throws IOException {
705     Path referenceDir = new Path(new Path(mergedDir,
706         mergedRegion.getEncodedName()), familyName);
707     // A whole reference to the store file.
708     Reference r = Reference.createTopReference(regionInfoForFs.getStartKey());
709     // Add the referred-to regions name as a dot separated suffix.
710     // See REF_NAME_REGEX regex above. The referred-to regions name is
711     // up in the path of the passed in <code>f</code> -- parentdir is family,
712     // then the directory above is the region name.
713     String mergingRegionName = regionInfoForFs.getEncodedName();
714     // Write reference with same file id only with the other region name as
715     // suffix and into the new region location (under same family).
716     Path p = new Path(referenceDir, f.getPath().getName() + "."
717         + mergingRegionName);
718     return r.write(fs, p);
719   }
720
721   /**
722    * Commit a merged region, moving it from the merges temporary directory to
723    * the proper location in the filesystem.
724    * @param mergedRegionInfo merged region {@link HRegionInfo}
725    * @throws IOException
726    */
727   void commitMergedRegion(final HRegionInfo mergedRegionInfo) throws IOException {
728     Path regionDir = new Path(this.tableDir, mergedRegionInfo.getEncodedName());
729     Path mergedRegionTmpDir = this.getMergesDir(mergedRegionInfo);
730     // Move the tmp dir in the expected location
731     if (mergedRegionTmpDir != null && fs.exists(mergedRegionTmpDir)) {
732       if (!fs.rename(mergedRegionTmpDir, regionDir)) {
733         throw new IOException("Unable to rename " + mergedRegionTmpDir + " to "
734             + regionDir);
735       }
736     }
737   }
738
739   // ===========================================================================
740   //  Create/Open/Delete Helpers
741   // ===========================================================================
742   /**
743    * Log the current state of the region
744    * @param LOG log to output information
745    * @throws IOException if an unexpected exception occurs
746    */
747   void logFileSystemState(final Log LOG) throws IOException {
748     FSUtils.logFileSystemState(fs, this.getRegionDir(), LOG);
749   }
750
751   /**
752    * @param hri
753    * @return Content of the file we write out to the filesystem under a region
754    * @throws IOException
755    */
756   private static byte[] getRegionInfoFileContent(final HRegionInfo hri) throws IOException {
757     return hri.toDelimitedByteArray();
758   }
759
760   /**
761    * Create a {@link HRegionInfo} from the serialized version on-disk.
762    * @param fs {@link FileSystem} that contains the Region Info file
763    * @param regionDir {@link Path} to the Region Directory that contains the Info file
764    * @return An {@link HRegionInfo} instance gotten from the Region Info file.
765    * @throws IOException if an error occurred during file open/read operation.
766    */
767   public static HRegionInfo loadRegionInfoFileContent(final FileSystem fs, final Path regionDir)
768       throws IOException {
769     FSDataInputStream in = fs.open(new Path(regionDir, REGION_INFO_FILE));
770     try {
771       return HRegionInfo.parseFrom(in);
772     } finally {
773       in.close();
774     }
775   }
776
777   /**
778    * Write the .regioninfo file on-disk.
779    */
780   private static void writeRegionInfoFileContent(final Configuration conf, final FileSystem fs,
781       final Path regionInfoFile, final byte[] content) throws IOException {
782     // First check to get the permissions
783     FsPermission perms = FSUtils.getFilePermissions(fs, conf, HConstants.DATA_FILE_UMASK_KEY);
784     // Write the RegionInfo file content
785     FSDataOutputStream out = FSUtils.create(conf, fs, regionInfoFile, perms, null);
786     try {
787       out.write(content);
788     } finally {
789       out.close();
790     }
791   }
792
793   /**
794    * Write out an info file under the stored region directory. Useful recovering mangled regions.
795    * If the regionInfo already exists on-disk, then we fast exit.
796    */
797   void checkRegionInfoOnFilesystem() throws IOException {
798     // Compose the content of the file so we can compare to length in filesystem. If not same,
799     // rewrite it (it may have been written in the old format using Writables instead of pb). The
800     // pb version is much shorter -- we write now w/o the toString version -- so checking length
801     // only should be sufficient. I don't want to read the file every time to check if it pb
802     // serialized.
803     byte[] content = getRegionInfoFileContent(regionInfoForFs);
804     try {
805       Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
806
807       FileStatus status = fs.getFileStatus(regionInfoFile);
808       if (status != null && status.getLen() == content.length) {
809         // Then assume the content good and move on.
810         // NOTE: that the length is not sufficient to define the the content matches.
811         return;
812       }
813 
814       LOG.info("Rewriting .regioninfo file at: " + regionInfoFile);
815       if (!fs.delete(regionInfoFile, false)) {
816         throw new IOException("Unable to remove existing " + regionInfoFile);
817       }
818     } catch (FileNotFoundException e) {
819       LOG.warn(REGION_INFO_FILE + " file not found for region: " + regionInfoForFs.getEncodedName() +
820           " on table " + regionInfo.getTable());
821     }
822
823     // Write HRI to a file in case we need to recover hbase:meta
824     writeRegionInfoOnFilesystem(content, true);
825   }
826
827   /**
828    * Write out an info file under the region directory. Useful recovering mangled regions.
829    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
830    */
831   private void writeRegionInfoOnFilesystem(boolean useTempDir) throws IOException {
832     byte[] content = getRegionInfoFileContent(regionInfoForFs);
833     writeRegionInfoOnFilesystem(content, useTempDir);
834   }
835
836   /**
837    * Write out an info file under the region directory. Useful recovering mangled regions.
838    * @param regionInfoContent serialized version of the {@link HRegionInfo}
839    * @param useTempDir indicate whether or not using the region .tmp dir for a safer file creation.
840    */
841   private void writeRegionInfoOnFilesystem(final byte[] regionInfoContent,
842       final boolean useTempDir) throws IOException {
843     Path regionInfoFile = new Path(getRegionDir(), REGION_INFO_FILE);
844     if (useTempDir) {
845       // Create in tmpDir and then move into place in case we crash after
846       // create but before close. If we don't successfully close the file,
847       // subsequent region reopens will fail the below because create is
848       // registered in NN.
849
850       // And then create the file
851       Path tmpPath = new Path(getTempDir(), REGION_INFO_FILE);
852
853       // If datanode crashes or if the RS goes down just before the close is called while trying to
854       // close the created regioninfo file in the .tmp directory then on next
855       // creation we will be getting AlreadyCreatedException.
856       // Hence delete and create the file if exists.
857       if (FSUtils.isExists(fs, tmpPath)) {
858         FSUtils.delete(fs, tmpPath, true);
859       }
860
861       // Write HRI to a file in case we need to recover hbase:meta
862       writeRegionInfoFileContent(conf, fs, tmpPath, regionInfoContent);
863
864       // Move the created file to the original path
865       if (fs.exists(tmpPath) &&  !rename(tmpPath, regionInfoFile)) {
866         throw new IOException("Unable to rename " + tmpPath + " to " + regionInfoFile);
867       }
868     } else {
869       // Write HRI to a file in case we need to recover hbase:meta
870       writeRegionInfoFileContent(conf, fs, regionInfoFile, regionInfoContent);
871     }
872   }
873
874   /**
875    * Create a new Region on file-system.
876    * @param conf the {@link Configuration} to use
877    * @param fs {@link FileSystem} from which to add the region
878    * @param tableDir {@link Path} to where the table is being stored
879    * @param regionInfo {@link HRegionInfo} for region to be added
880    * @throws IOException if the region creation fails due to a FileSystem exception.
881    */
882   public static HRegionFileSystem createRegionOnFileSystem(final Configuration conf,
883       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
884     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
885     Path regionDir = regionFs.getRegionDir();
886
887     if (fs.exists(regionDir)) {
888       LOG.warn("Trying to create a region that already exists on disk: " + regionDir);
889       throw new IOException("The specified region already exists on disk: " + regionDir);
890     }
891
892     // Create the region directory
893     if (!createDirOnFileSystem(fs, conf, regionDir)) {
894       LOG.warn("Unable to create the region directory: " + regionDir);
895       throw new IOException("Unable to create region directory: " + regionDir);
896     }
897
898     // Write HRI to a file in case we need to recover hbase:meta
899     regionFs.writeRegionInfoOnFilesystem(false);
900     return regionFs;
901   }
902
903   /**
904    * Open Region from file-system.
905    * @param conf the {@link Configuration} to use
906    * @param fs {@link FileSystem} from which to add the region
907    * @param tableDir {@link Path} to where the table is being stored
908    * @param regionInfo {@link HRegionInfo} for region to be added
909    * @param readOnly True if you don't want to edit the region data
910    * @throws IOException if the region creation fails due to a FileSystem exception.
911    */
912   public static HRegionFileSystem openRegionFromFileSystem(final Configuration conf,
913       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo, boolean readOnly)
914       throws IOException {
915     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
916     Path regionDir = regionFs.getRegionDir();
917
918     if (!fs.exists(regionDir)) {
919       LOG.warn("Trying to open a region that do not exists on disk: " + regionDir);
920       throw new IOException("The specified region do not exists on disk: " + regionDir);
921     }
922
923     if (!readOnly) {
924       // Cleanup temporary directories
925       regionFs.cleanupTempDir();
926       regionFs.cleanupSplitsDir();
927       regionFs.cleanupMergesDir();
928
929       // if it doesn't exists, Write HRI to a file, in case we need to recover hbase:meta
930       regionFs.checkRegionInfoOnFilesystem();
931     }
932
933     return regionFs;
934   }
935
936   /**
937    * Remove the region from the table directory, archiving the region's hfiles.
938    * @param conf the {@link Configuration} to use
939    * @param fs {@link FileSystem} from which to remove the region
940    * @param tableDir {@link Path} to where the table is being stored
941    * @param regionInfo {@link HRegionInfo} for region to be deleted
942    * @throws IOException if the request cannot be completed
943    */
944   public static void deleteRegionFromFileSystem(final Configuration conf,
945       final FileSystem fs, final Path tableDir, final HRegionInfo regionInfo) throws IOException {
946     HRegionFileSystem regionFs = new HRegionFileSystem(conf, fs, tableDir, regionInfo);
947     Path regionDir = regionFs.getRegionDir();
948 
949     if (!fs.exists(regionDir)) {
950       LOG.warn("Trying to delete a region that do not exists on disk: " + regionDir);
951       return;
952     }
953
954     if (LOG.isDebugEnabled()) {
955       LOG.debug("DELETING region " + regionDir);
956     }
957
958     // Archive region
959     Path rootDir = FSUtils.getRootDir(conf);
960     HFileArchiver.archiveRegion(fs, rootDir, tableDir, regionDir);
961
962     // Delete empty region dir
963     if (!fs.delete(regionDir, true)) {
964       LOG.warn("Failed delete of " + regionDir);
965     }
966   }
967
968   /**
969    * Creates a directory. Assumes the user has already checked for this directory existence.
970    * @param dir
971    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
972    *         whether the directory exists or not, and returns true if it exists.
973    * @throws IOException
974    */
975   boolean createDir(Path dir) throws IOException {
976     int i = 0;
977     IOException lastIOE = null;
978     do {
979       try {
980         return fs.mkdirs(dir);
981       } catch (IOException ioe) {
982         lastIOE = ioe;
983         if (fs.exists(dir)) return true; // directory is present
984         try {
985           sleepBeforeRetry("Create Directory", i+1);
986         } catch (InterruptedException e) {
987           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
988         }
989       }
990     } while (++i <= hdfsClientRetriesNumber);
991     throw new IOException("Exception in createDir", lastIOE);
992   }
993
994   /**
995    * Renames a directory. Assumes the user has already checked for this directory existence.
996    * @param srcpath
997    * @param dstPath
998    * @return true if rename is successful.
999    * @throws IOException
1000    */
1001   boolean rename(Path srcpath, Path dstPath) throws IOException {
1002     IOException lastIOE = null;
1003     int i = 0;
1004     do {
1005       try {
1006         return fs.rename(srcpath, dstPath);
1007       } catch (IOException ioe) {
1008         lastIOE = ioe;
1009         if (!fs.exists(srcpath) && fs.exists(dstPath)) return true; // successful move
1010         // dir is not there, retry after some time.
1011         try {
1012           sleepBeforeRetry("Rename Directory", i+1);
1013         } catch (InterruptedException e) {
1014           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1015         }
1016       }
1017     } while (++i <= hdfsClientRetriesNumber);
1018
1019     throw new IOException("Exception in rename", lastIOE);
1020   }
1021
1022   /**
1023    * Deletes a directory. Assumes the user has already checked for this directory existence.
1024    * @param dir
1025    * @return true if the directory is deleted.
1026    * @throws IOException
1027    */
1028   boolean deleteDir(Path dir) throws IOException {
1029     IOException lastIOE = null;
1030     int i = 0;
1031     do {
1032       try {
1033         return fs.delete(dir, true);
1034       } catch (IOException ioe) {
1035         lastIOE = ioe;
1036         if (!fs.exists(dir)) return true;
1037         // dir is there, retry deleting after some time.
1038         try {
1039           sleepBeforeRetry("Delete Directory", i+1);
1040         } catch (InterruptedException e) {
1041           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1042         }
1043       }
1044     } while (++i <= hdfsClientRetriesNumber);
1045
1046     throw new IOException("Exception in DeleteDir", lastIOE);
1047   }
1048
1049   /**
1050    * sleeping logic; handles the interrupt exception.
1051    */
1052   private void sleepBeforeRetry(String msg, int sleepMultiplier) throws InterruptedException {
1053     sleepBeforeRetry(msg, sleepMultiplier, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1054   }
1055
1056   /**
1057    * Creates a directory for a filesystem and configuration object. Assumes the user has already
1058    * checked for this directory existence.
1059    * @param fs
1060    * @param conf
1061    * @param dir
1062    * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks
1063    *         whether the directory exists or not, and returns true if it exists.
1064    * @throws IOException
1065    */
1066   private static boolean createDirOnFileSystem(FileSystem fs, Configuration conf, Path dir)
1067       throws IOException {
1068     int i = 0;
1069     IOException lastIOE = null;
1070     int hdfsClientRetriesNumber = conf.getInt("hdfs.client.retries.number",
1071       DEFAULT_HDFS_CLIENT_RETRIES_NUMBER);
1072     int baseSleepBeforeRetries = conf.getInt("hdfs.client.sleep.before.retries",
1073       DEFAULT_BASE_SLEEP_BEFORE_RETRIES);
1074     do {
1075       try {
1076         return fs.mkdirs(dir);
1077       } catch (IOException ioe) {
1078         lastIOE = ioe;
1079         if (fs.exists(dir)) return true; // directory is present
1080         try {
1081           sleepBeforeRetry("Create Directory", i+1, baseSleepBeforeRetries, hdfsClientRetriesNumber);
1082         } catch (InterruptedException e) {
1083           throw (InterruptedIOException)new InterruptedIOException().initCause(e);
1084         }
1085       }
1086     } while (++i <= hdfsClientRetriesNumber);
1087
1088     throw new IOException("Exception in createDir", lastIOE);
1089   }
1090
1091   /**
1092    * sleeping logic for static methods; handles the interrupt exception. Keeping a static version
1093    * for this to avoid re-looking for the integer values.
1094    */
1095   private static void sleepBeforeRetry(String msg, int sleepMultiplier, int baseSleepBeforeRetries,
1096       int hdfsClientRetriesNumber) throws InterruptedException {
1097     if (sleepMultiplier > hdfsClientRetriesNumber) {
1098       if (LOG.isDebugEnabled()) {
1099         LOG.debug(msg + ", retries exhausted");
1100       }
1101       return;
1102     }
1103     if (LOG.isDebugEnabled()) {
1104       LOG.debug(msg + ", sleeping " + baseSleepBeforeRetries + " times " + sleepMultiplier);
1105     }
1106     Thread.sleep((long)baseSleepBeforeRetries * sleepMultiplier);
1107   }
1108 }