View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.Comparator;
23  import java.util.List;
24  import java.util.Map;
25  import java.util.TreeMap;
26  import java.util.concurrent.ConcurrentHashMap;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29  
30  import org.apache.commons.lang.NotImplementedException;
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.classification.InterfaceAudience;
34  import org.apache.hadoop.conf.Configuration;
35  import org.apache.hadoop.fs.FSDataInputStream;
36  import org.apache.hadoop.fs.FSDataOutputStream;
37  import org.apache.hadoop.fs.FileStatus;
38  import org.apache.hadoop.fs.FileSystem;
39  import org.apache.hadoop.fs.Path;
40  import org.apache.hadoop.fs.PathFilter;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.exceptions.DeserializationException;
43  import org.apache.hadoop.hbase.HConstants;
44  import org.apache.hadoop.hbase.HTableDescriptor;
45  import org.apache.hadoop.hbase.TableDescriptors;
46  import org.apache.hadoop.hbase.TableInfoMissingException;
47  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
48  
49  import com.google.common.annotations.VisibleForTesting;
50  import com.google.common.primitives.Ints;
51  
52  
53  /**
54   * Implementation of {@link TableDescriptors} that reads descriptors from the
55   * passed filesystem.  It expects descriptors to be in a file in the
56   * {@link #TABLEINFO_DIR} subdir of the table's directory in FS.  Can be read-only
57   *  -- i.e. does not modify the filesystem or can be read and write.
58   *
59   * <p>Also has utility for keeping up the table descriptors tableinfo file.
60   * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir
61   * of the table directory in the filesystem.
62   * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the
63   * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>.  This sequenceid
64   * is always increasing.  It starts at zero.  The table schema file with the
65   * highest sequenceid has the most recent schema edit. Usually there is one file
66   * only, the most recent but there may be short periods where there are more
67   * than one file. Old files are eventually cleaned.  Presumption is that there
68   * will not be lots of concurrent clients making table schema edits.  If so,
69   * the below needs a bit of a reworking and perhaps some supporting api in hdfs.
70   */
71  @InterfaceAudience.Private
72  public class FSTableDescriptors implements TableDescriptors {
73    private static final Log LOG = LogFactory.getLog(FSTableDescriptors.class);
74    private final FileSystem fs;
75    private final Path rootdir;
76    private final boolean fsreadonly;
77    private volatile boolean usecache;
78    private volatile boolean fsvisited;
79  
80    @VisibleForTesting long cachehits = 0;
81    @VisibleForTesting long invocations = 0;
82  
83    /** The file name prefix used to store HTD in HDFS  */
84    static final String TABLEINFO_FILE_PREFIX = ".tableinfo";
85    static final String TABLEINFO_DIR = ".tabledesc";
86    static final String TMP_DIR = ".tmp";
87  
88    // This cache does not age out the old stuff.  Thinking is that the amount
89    // of data we keep up in here is so small, no need to do occasional purge.
90    // TODO.
91    private final Map<TableName, HTableDescriptor> cache =
92      new ConcurrentHashMap<TableName, HTableDescriptor>();
93  
94    /**
95     * Table descriptor for <code>hbase:meta</code> catalog table
96     */
97     private final HTableDescriptor metaTableDescriptor;
98  
99     /**
100    * Construct a FSTableDescriptors instance using the hbase root dir of the given
101    * conf and the filesystem where that root dir lives.
102    * This instance can do write operations (is not read only).
103    */
104   public FSTableDescriptors(final Configuration conf) throws IOException {
105     this(conf, FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf));
106   }
107 
108   public FSTableDescriptors(final Configuration conf, final FileSystem fs, final Path rootdir)
109       throws IOException {
110     this(conf, fs, rootdir, false, true);
111   }
112 
113   /**
114    * @param fsreadonly True if we are read-only when it comes to filesystem
115    * operations; i.e. on remove, we do not do delete in fs.
116    */
117   public FSTableDescriptors(final Configuration conf, final FileSystem fs,
118     final Path rootdir, final boolean fsreadonly, final boolean usecache) throws IOException {
119     super();
120     this.fs = fs;
121     this.rootdir = rootdir;
122     this.fsreadonly = fsreadonly;
123     this.usecache = usecache;
124     this.metaTableDescriptor = HTableDescriptor.metaTableDescriptor(conf);
125   }
126 
127   @Override
128   public void setCacheOn() throws IOException {
129     this.cache.clear();
130     this.usecache = true;
131   }
132 
133   @Override
134   public void setCacheOff() throws IOException {
135     this.usecache = false;
136     this.cache.clear();
137   }
138 
139   @VisibleForTesting
140   public boolean isUsecache() {
141     return this.usecache;
142   }
143 
144   /**
145    * Get the current table descriptor for the given table, or null if none exists.
146    *
147    * Uses a local cache of the descriptor but still checks the filesystem on each call
148    * to see if a newer file has been created since the cached one was read.
149    */
150   @Override
151   public HTableDescriptor get(final TableName tablename)
152   throws IOException {
153     invocations++;
154     if (TableName.META_TABLE_NAME.equals(tablename)) {
155       cachehits++;
156       return metaTableDescriptor;
157     }
158     // hbase:meta is already handled. If some one tries to get the descriptor for
159     // .logs, .oldlogs or .corrupt throw an exception.
160     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename.getNameAsString())) {
161        throw new IOException("No descriptor found for non table = " + tablename);
162     }
163 
164     if (usecache) {
165       // Look in cache of descriptors.
166       HTableDescriptor cachedtdm = this.cache.get(tablename);
167       if (cachedtdm != null) {
168         cachehits++;
169         return cachedtdm;
170       }
171     }
172     HTableDescriptor tdmt = null;
173     try {
174       tdmt = getTableDescriptorFromFs(fs, rootdir, tablename, !fsreadonly);
175     } catch (NullPointerException e) {
176       LOG.debug("Exception during readTableDecriptor. Current table name = "
177           + tablename, e);
178     } catch (TableInfoMissingException e) {
179       // ignore. This is regular operation
180     } catch (IOException ioe) {
181       LOG.debug("Exception during readTableDecriptor. Current table name = "
182           + tablename, ioe);
183     }
184     // last HTD written wins
185     if (usecache && tdmt != null) {
186       this.cache.put(tablename, tdmt);
187     }
188 
189     return tdmt;
190   }
191 
192   /**
193    * Returns a map from table name to table descriptor for all tables.
194    */
195   @Override
196   public Map<String, HTableDescriptor> getAll()
197   throws IOException {
198     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
199 
200     if (fsvisited && usecache) {
201       for (Map.Entry<TableName, HTableDescriptor> entry: this.cache.entrySet()) {
202         htds.put(entry.getKey().toString(), entry.getValue());
203       }
204       // add hbase:meta to the response
205       htds.put(HTableDescriptor.META_TABLEDESC.getTableName().getNameAsString(),
206         HTableDescriptor.META_TABLEDESC);
207     } else {
208       LOG.debug("Fetching table descriptors from the filesystem.");
209       boolean allvisited = true;
210       for (Path d : FSUtils.getTableDirs(fs, rootdir)) {
211         HTableDescriptor htd = null;
212         try {
213           htd = get(FSUtils.getTableName(d));
214         } catch (FileNotFoundException fnfe) {
215           // inability of retrieving one HTD shouldn't stop getting the remaining
216           LOG.warn("Trouble retrieving htd", fnfe);
217         }
218         if (htd == null) {
219           allvisited = false;
220           continue;
221         } else {
222           htds.put(htd.getTableName().getNameAsString(), htd);
223         }
224         fsvisited = allvisited;
225       }
226     }
227     return htds;
228   }
229 
230   /* (non-Javadoc)
231    * @see org.apache.hadoop.hbase.TableDescriptors#getTableDescriptors(org.apache.hadoop.fs.FileSystem, org.apache.hadoop.fs.Path)
232    */
233   @Override
234   public Map<String, HTableDescriptor> getByNamespace(String name)
235   throws IOException {
236     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
237     List<Path> tableDirs =
238         FSUtils.getLocalTableDirs(fs, FSUtils.getNamespaceDir(rootdir, name));
239     for (Path d: tableDirs) {
240       HTableDescriptor htd = null;
241       try {
242         htd = get(FSUtils.getTableName(d));
243       } catch (FileNotFoundException fnfe) {
244         // inability of retrieving one HTD shouldn't stop getting the remaining
245         LOG.warn("Trouble retrieving htd", fnfe);
246       }
247       if (htd == null) continue;
248       htds.put(FSUtils.getTableName(d).getNameAsString(), htd);
249     }
250     return htds;
251   }
252 
253   /**
254    * Adds (or updates) the table descriptor to the FileSystem
255    * and updates the local cache with it.
256    */
257   @Override
258   public void add(HTableDescriptor htd) throws IOException {
259     if (fsreadonly) {
260       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
261     }
262     if (TableName.META_TABLE_NAME.equals(htd.getTableName())) {
263       throw new NotImplementedException();
264     }
265     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(htd.getTableName().getNameAsString())) {
266       throw new NotImplementedException(
267         "Cannot add a table descriptor for a reserved subdirectory name: " + htd.getNameAsString());
268     }
269     updateTableDescriptor(htd);
270   }
271 
272   /**
273    * Removes the table descriptor from the local cache and returns it.
274    * If not in read only mode, it also deletes the entire table directory(!)
275    * from the FileSystem.
276    */
277   @Override
278   public HTableDescriptor remove(final TableName tablename)
279   throws IOException {
280     if (fsreadonly) {
281       throw new NotImplementedException("Cannot remove a table descriptor - in read only mode");
282     }
283     Path tabledir = getTableDir(tablename);
284     if (this.fs.exists(tabledir)) {
285       if (!this.fs.delete(tabledir, true)) {
286         throw new IOException("Failed delete of " + tabledir.toString());
287       }
288     }
289     HTableDescriptor descriptor = this.cache.remove(tablename);
290     if (descriptor == null) {
291       return null;
292     } else {
293       return descriptor;
294     }
295   }
296 
297   /**
298    * Checks if a current table info file exists for the given table
299    *
300    * @param tableName name of table
301    * @return true if exists
302    * @throws IOException
303    */
304   public boolean isTableInfoExists(TableName tableName) throws IOException {
305     return getTableInfoPath(tableName) != null;
306   }
307 
308   /**
309    * Find the most current table info file for the given table in the hbase root directory.
310    * @return The file status of the current table info file or null if it does not exist
311    */
312   private FileStatus getTableInfoPath(final TableName tableName) throws IOException {
313     Path tableDir = getTableDir(tableName);
314     return getTableInfoPath(tableDir);
315   }
316 
317   private FileStatus getTableInfoPath(Path tableDir)
318   throws IOException {
319     return getTableInfoPath(fs, tableDir, !fsreadonly);
320   }
321 
322   /**
323    * Find the most current table info file for the table located in the given table directory.
324    *
325    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
326    * files and takes the 'current' one - meaning the one with the highest sequence number if present
327    * or no sequence number at all if none exist (for backward compatibility from before there
328    * were sequence numbers).
329    *
330    * @return The file status of the current table info file or null if it does not exist
331    * @throws IOException
332    */
333   public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir)
334   throws IOException {
335     return getTableInfoPath(fs, tableDir, false);
336   }
337 
338   /**
339    * Find the most current table info file for the table in the given table directory.
340    *
341    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
342    * files and takes the 'current' one - meaning the one with the highest sequence number if
343    * present or no sequence number at all if none exist (for backward compatibility from before
344    * there were sequence numbers).
345    * If there are multiple table info files found and removeOldFiles is true it also deletes the
346    * older files.
347    *
348    * @return The file status of the current table info file or null if none exist
349    * @throws IOException
350    */
351   private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles)
352   throws IOException {
353     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
354     return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles);
355   }
356 
357   /**
358    * Find the most current table info file in the given directory
359    *
360    * Looks within the given directory for any table info files
361    * and takes the 'current' one - meaning the one with the highest sequence number if present
362    * or no sequence number at all if none exist (for backward compatibility from before there
363    * were sequence numbers).
364    * If there are multiple possible files found
365    * and the we're not in read only mode it also deletes the older files.
366    *
367    * @return The file status of the current table info file or null if it does not exist
368    * @throws IOException
369    */
370   // only visible for FSTableDescriptorMigrationToSubdir, can be removed with that
371   static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir, boolean removeOldFiles)
372   throws IOException {
373     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
374     if (status == null || status.length < 1) return null;
375     FileStatus mostCurrent = null;
376     for (FileStatus file : status) {
377       if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) {
378         mostCurrent = file;
379       }
380     }
381     if (removeOldFiles && status.length > 1) {
382       // Clean away old versions
383       for (FileStatus file : status) {
384         Path path = file.getPath();
385         if (file != mostCurrent) {
386           if (!fs.delete(file.getPath(), false)) {
387             LOG.warn("Failed cleanup of " + path);
388           } else {
389             LOG.debug("Cleaned up old tableinfo file " + path);
390           }
391         }
392       }
393     }
394     return mostCurrent;
395   }
396 
397   /**
398    * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in
399    * reverse order.
400    */
401   @VisibleForTesting
402   static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR =
403   new Comparator<FileStatus>() {
404     @Override
405     public int compare(FileStatus left, FileStatus right) {
406       return right.compareTo(left);
407     }};
408 
409   /**
410    * Return the table directory in HDFS
411    */
412   @VisibleForTesting Path getTableDir(final TableName tableName) {
413     return FSUtils.getTableDir(rootdir, tableName);
414   }
415 
416   private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() {
417     @Override
418     public boolean accept(Path p) {
419       // Accept any file that starts with TABLEINFO_NAME
420       return p.getName().startsWith(TABLEINFO_FILE_PREFIX);
421     }};
422 
423   /**
424    * Width of the sequenceid that is a suffix on a tableinfo file.
425    */
426   @VisibleForTesting static final int WIDTH_OF_SEQUENCE_ID = 10;
427 
428   /*
429    * @param number Number to use as suffix.
430    * @return Returns zero-prefixed decimal version of passed
431    * number (Does absolute in case number is negative).
432    */
433   private static String formatTableInfoSequenceId(final int number) {
434     byte [] b = new byte[WIDTH_OF_SEQUENCE_ID];
435     int d = Math.abs(number);
436     for (int i = b.length - 1; i >= 0; i--) {
437       b[i] = (byte)((d % 10) + '0');
438       d /= 10;
439     }
440     return Bytes.toString(b);
441   }
442 
443   /**
444    * Regex to eat up sequenceid suffix on a .tableinfo file.
445    * Use regex because may encounter oldstyle .tableinfos where there is no
446    * sequenceid on the end.
447    */
448   private static final Pattern TABLEINFO_FILE_REGEX =
449     Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$");
450 
451   /**
452    * @param p Path to a <code>.tableinfo</code> file.
453    * @return The current editid or 0 if none found.
454    */
455   @VisibleForTesting static int getTableInfoSequenceId(final Path p) {
456     if (p == null) return 0;
457     Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName());
458     if (!m.matches()) throw new IllegalArgumentException(p.toString());
459     String suffix = m.group(2);
460     if (suffix == null || suffix.length() <= 0) return 0;
461     return Integer.parseInt(m.group(2));
462   }
463 
464   /**
465    * @param sequenceid
466    * @return Name of tableinfo file.
467    */
468   @VisibleForTesting static String getTableInfoFileName(final int sequenceid) {
469     return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid);
470   }
471 
472   /**
473    * Returns the latest table descriptor for the given table directly from the file system
474    * if it exists, bypassing the local cache.
475    * Returns null if it's not found.
476    */
477   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs,
478     Path hbaseRootDir, TableName tableName) throws IOException {
479     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
480     return getTableDescriptorFromFs(fs, tableDir);
481   }
482 
483   /**
484    * Returns the latest table descriptor for the table located at the given directory
485    * directly from the file system if it exists.
486    * @throws TableInfoMissingException if there is no descriptor
487    */
488   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs,
489     Path hbaseRootDir, TableName tableName, boolean rewritePb) throws IOException {
490     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
491     return getTableDescriptorFromFs(fs, tableDir, rewritePb);
492   }
493   /**
494    * Returns the latest table descriptor for the table located at the given directory
495    * directly from the file system if it exists.
496    * @throws TableInfoMissingException if there is no descriptor
497    */
498   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir)
499     throws IOException {
500     return getTableDescriptorFromFs(fs, tableDir, false);
501   }
502 
503   /**
504    * Returns the latest table descriptor for the table located at the given directory
505    * directly from the file system if it exists.
506    * @throws TableInfoMissingException if there is no descriptor
507    */
508   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir,
509     boolean rewritePb)
510   throws IOException {
511     FileStatus status = getTableInfoPath(fs, tableDir, false);
512     if (status == null) {
513       throw new TableInfoMissingException("No table descriptor file under " + tableDir);
514     }
515     return readTableDescriptor(fs, status, rewritePb);
516   }
517 
518   private static HTableDescriptor readTableDescriptor(FileSystem fs, FileStatus status,
519       boolean rewritePb) throws IOException {
520     int len = Ints.checkedCast(status.getLen());
521     byte [] content = new byte[len];
522     FSDataInputStream fsDataInputStream = fs.open(status.getPath());
523     try {
524       fsDataInputStream.readFully(content);
525     } finally {
526       fsDataInputStream.close();
527     }
528     HTableDescriptor htd = null;
529     try {
530       htd = HTableDescriptor.parseFrom(content);
531     } catch (DeserializationException e) {
532       // we have old HTableDescriptor here
533       try {
534         HTableDescriptor ohtd = HTableDescriptor.parseFrom(content);
535         LOG.warn("Found old table descriptor, converting to new format for table " +
536           ohtd.getTableName());
537         htd = new HTableDescriptor(ohtd);
538         if (rewritePb) rewriteTableDescriptor(fs, status, htd);
539       } catch (DeserializationException e1) {
540         throw new IOException("content=" + Bytes.toShort(content), e1);
541       }
542     }
543     if (rewritePb && !ProtobufUtil.isPBMagicPrefix(content)) {
544       // Convert the file over to be pb before leaving here.
545       rewriteTableDescriptor(fs, status, htd);
546     }
547     return htd;
548   }
549 
550   private static void rewriteTableDescriptor(final FileSystem fs, final FileStatus status,
551     final HTableDescriptor td)
552   throws IOException {
553     Path tableInfoDir = status.getPath().getParent();
554     Path tableDir = tableInfoDir.getParent();
555     writeTableDescriptor(fs, td, tableDir, status);
556   }
557 
558   /**
559    * Update table descriptor on the file system
560    * @throws IOException Thrown if failed update.
561    * @throws NotImplementedException if in read only mode
562    */
563   @VisibleForTesting Path updateTableDescriptor(HTableDescriptor htd)
564   throws IOException {
565     if (fsreadonly) {
566       throw new NotImplementedException("Cannot update a table descriptor - in read only mode");
567     }
568     Path tableDir = getTableDir(htd.getTableName());
569     Path p = writeTableDescriptor(fs, htd, tableDir, getTableInfoPath(tableDir));
570     if (p == null) throw new IOException("Failed update");
571     LOG.info("Updated tableinfo=" + p);
572     if (usecache) {
573       this.cache.put(htd.getTableName(), htd);
574     }
575     return p;
576   }
577 
578   /**
579    * Deletes all the table descriptor files from the file system.
580    * Used in unit tests only.
581    * @throws NotImplementedException if in read only mode
582    */
583   public void deleteTableDescriptorIfExists(TableName tableName) throws IOException {
584     if (fsreadonly) {
585       throw new NotImplementedException("Cannot delete a table descriptor - in read only mode");
586     }
587 
588     Path tableDir = getTableDir(tableName);
589     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
590     deleteTableDescriptorFiles(fs, tableInfoDir, Integer.MAX_VALUE);
591   }
592 
593   /**
594    * Deletes files matching the table info file pattern within the given directory
595    * whose sequenceId is at most the given max sequenceId.
596    */
597   private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId)
598   throws IOException {
599     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
600     for (FileStatus file : status) {
601       Path path = file.getPath();
602       int sequenceId = getTableInfoSequenceId(path);
603       if (sequenceId <= maxSequenceId) {
604         boolean success = FSUtils.delete(fs, path, false);
605         if (success) {
606           LOG.debug("Deleted table descriptor at " + path);
607         } else {
608           LOG.error("Failed to delete descriptor at " + path);
609         }
610       }
611     }
612   }
613 
614   /**
615    * Attempts to write a new table descriptor to the given table's directory.
616    * It first writes it to the .tmp dir then uses an atomic rename to move it into place.
617    * It begins at the currentSequenceId + 1 and tries 10 times to find a new sequence number
618    * not already in use.
619    * Removes the current descriptor file if passed in.
620    *
621    * @return Descriptor file or null if we failed write.
622    */
623   private static Path writeTableDescriptor(final FileSystem fs,
624     final HTableDescriptor htd, final Path tableDir,
625     final FileStatus currentDescriptorFile)
626   throws IOException {
627     // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon.
628     // This directory is never removed to avoid removing it out from under a concurrent writer.
629     Path tmpTableDir = new Path(tableDir, TMP_DIR);
630     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
631 
632     // What is current sequenceid?  We read the current sequenceid from
633     // the current file.  After we read it, another thread could come in and
634     // compete with us writing out next version of file.  The below retries
635     // should help in this case some but its hard to do guarantees in face of
636     // concurrent schema edits.
637     int currentSequenceId = currentDescriptorFile == null ? 0 :
638       getTableInfoSequenceId(currentDescriptorFile.getPath());
639     int newSequenceId = currentSequenceId;
640 
641     // Put arbitrary upperbound on how often we retry
642     int retries = 10;
643     int retrymax = currentSequenceId + retries;
644     Path tableInfoDirPath = null;
645     do {
646       newSequenceId += 1;
647       String filename = getTableInfoFileName(newSequenceId);
648       Path tempPath = new Path(tmpTableDir, filename);
649       if (fs.exists(tempPath)) {
650         LOG.debug(tempPath + " exists; retrying up to " + retries + " times");
651         continue;
652       }
653       tableInfoDirPath = new Path(tableInfoDir, filename);
654       try {
655         writeHTD(fs, tempPath, htd);
656         fs.mkdirs(tableInfoDirPath.getParent());
657         if (!fs.rename(tempPath, tableInfoDirPath)) {
658           throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath);
659         }
660         LOG.debug("Wrote descriptor into: " + tableInfoDirPath);
661       } catch (IOException ioe) {
662         // Presume clash of names or something; go around again.
663         LOG.debug("Failed write and/or rename; retrying", ioe);
664         if (!FSUtils.deleteDirectory(fs, tempPath)) {
665           LOG.warn("Failed cleanup of " + tempPath);
666         }
667         tableInfoDirPath = null;
668         continue;
669       }
670       break;
671     } while (newSequenceId < retrymax);
672     if (tableInfoDirPath != null) {
673       // if we succeeded, remove old table info files.
674       deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1);
675     }
676     return tableInfoDirPath;
677   }
678 
679   private static void writeHTD(final FileSystem fs, final Path p, final HTableDescriptor htd)
680   throws IOException {
681     FSDataOutputStream out = fs.create(p, false);
682     try {
683       // We used to write this file out as a serialized HTD Writable followed by two '\n's and then
684       // the toString version of HTD.  Now we just write out the pb serialization.
685       out.write(htd.toByteArray());
686     } finally {
687       out.close();
688     }
689   }
690 
691   /**
692    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
693    * Used by tests.
694    * @return True if we successfully created file.
695    */
696   public boolean createTableDescriptor(HTableDescriptor htd) throws IOException {
697     return createTableDescriptor(htd, false);
698   }
699 
700   /**
701    * Create new HTableDescriptor in HDFS. Happens when we are creating table. If
702    * forceCreation is true then even if previous table descriptor is present it
703    * will be overwritten
704    *
705    * @return True if we successfully created file.
706    */
707   public boolean createTableDescriptor(HTableDescriptor htd, boolean forceCreation)
708   throws IOException {
709     Path tableDir = getTableDir(htd.getTableName());
710     return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation);
711   }
712 
713   /**
714    * Create a new HTableDescriptor in HDFS in the specified table directory. Happens when we create
715    * a new table or snapshot a table.
716    * @param tableDir table directory under which we should write the file
717    * @param htd description of the table to write
718    * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
719    *          be overwritten
720    * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
721    *         already exists and we weren't forcing the descriptor creation.
722    * @throws IOException if a filesystem error occurs
723    */
724   public boolean createTableDescriptorForTableDirectory(Path tableDir,
725       HTableDescriptor htd, boolean forceCreation) throws IOException {
726     if (fsreadonly) {
727       throw new NotImplementedException("Cannot create a table descriptor - in read only mode");
728     }
729     FileStatus status = getTableInfoPath(fs, tableDir);
730     if (status != null) {
731       LOG.debug("Current tableInfoPath = " + status.getPath());
732       if (!forceCreation) {
733         if (fs.exists(status.getPath()) && status.getLen() > 0) {
734           if (readTableDescriptor(fs, status, false).equals(htd)) {
735             LOG.debug("TableInfo already exists.. Skipping creation");
736             return false;
737           }
738         }
739       }
740     }
741     Path p = writeTableDescriptor(fs, htd, tableDir, status);
742     return p != null;
743   }
744 
745 }
746