View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import javax.annotation.Nullable;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.ConcurrentHashMap;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import com.google.common.annotations.VisibleForTesting;
32  import com.google.common.primitives.Ints;
33  import org.apache.commons.lang.NotImplementedException;
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.hbase.classification.InterfaceAudience;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FSDataInputStream;
39  import org.apache.hadoop.fs.FSDataOutputStream;
40  import org.apache.hadoop.fs.FileStatus;
41  import org.apache.hadoop.fs.FileSystem;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.fs.PathFilter;
44  import org.apache.hadoop.hbase.HConstants;
45  import org.apache.hadoop.hbase.HTableDescriptor;
46  import org.apache.hadoop.hbase.TableDescriptor;
47  import org.apache.hadoop.hbase.TableDescriptors;
48  import org.apache.hadoop.hbase.TableInfoMissingException;
49  import org.apache.hadoop.hbase.TableName;
50  import org.apache.hadoop.hbase.client.TableState;
51  import org.apache.hadoop.hbase.exceptions.DeserializationException;
52  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
53  
54  /**
55   * Implementation of {@link TableDescriptors} that reads descriptors from the
56   * passed filesystem.  It expects descriptors to be in a file in the
57   * {@link #TABLEINFO_DIR} subdir of the table's directory in FS.  Can be read-only
58   *  -- i.e. does not modify the filesystem or can be read and write.
59   *
60   * <p>Also has utility for keeping up the table descriptors tableinfo file.
61   * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir
62   * of the table directory in the filesystem.
63   * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the
64   * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>.  This sequenceid
65   * is always increasing.  It starts at zero.  The table schema file with the
66   * highest sequenceid has the most recent schema edit. Usually there is one file
67   * only, the most recent but there may be short periods where there are more
68   * than one file. Old files are eventually cleaned.  Presumption is that there
69   * will not be lots of concurrent clients making table schema edits.  If so,
70   * the below needs a bit of a reworking and perhaps some supporting api in hdfs.
71   */
72  @InterfaceAudience.Private
73  public class FSTableDescriptors implements TableDescriptors {
74    private static final Log LOG = LogFactory.getLog(FSTableDescriptors.class);
75    private final FileSystem fs;
76    private final Path rootdir;
77    private final boolean fsreadonly;
78    private volatile boolean usecache;
79    private volatile boolean fsvisited;
80  
81    @VisibleForTesting long cachehits = 0;
82    @VisibleForTesting long invocations = 0;
83  
84    /** The file name prefix used to store HTD in HDFS  */
85    static final String TABLEINFO_FILE_PREFIX = ".tableinfo";
86    static final String TABLEINFO_DIR = ".tabledesc";
87    static final String TMP_DIR = ".tmp";
88  
89    // This cache does not age out the old stuff.  Thinking is that the amount
90    // of data we keep up in here is so small, no need to do occasional purge.
91    // TODO.
92    private final Map<TableName, TableDescriptor> cache =
93      new ConcurrentHashMap<TableName, TableDescriptor>();
94  
95    /**
96     * Table descriptor for <code>hbase:meta</code> catalog table
97     */
98    private final HTableDescriptor metaTableDescritor;
99  
100   /**
101    * Construct a FSTableDescriptors instance using the hbase root dir of the given
102    * conf and the filesystem where that root dir lives.
103    * This instance can do write operations (is not read only).
104    */
105   public FSTableDescriptors(final Configuration conf) throws IOException {
106     this(conf, FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf));
107   }
108 
109   public FSTableDescriptors(final Configuration conf, final FileSystem fs, final Path rootdir)
110   throws IOException {
111     this(conf, fs, rootdir, false, true);
112   }
113 
114   /**
115    * @param fsreadonly True if we are read-only when it comes to filesystem
116    * operations; i.e. on remove, we do not do delete in fs.
117    */
118   public FSTableDescriptors(final Configuration conf, final FileSystem fs,
119     final Path rootdir, final boolean fsreadonly, final boolean usecache) throws IOException {
120     super();
121     this.fs = fs;
122     this.rootdir = rootdir;
123     this.fsreadonly = fsreadonly;
124     this.usecache = usecache;
125 
126     this.metaTableDescritor = TableDescriptor.metaTableDescriptor(conf);
127   }
128 
129   public void setCacheOn() throws IOException {
130     this.cache.clear();
131     this.usecache = true;
132   }
133 
134   public void setCacheOff() throws IOException {
135     this.usecache = false;
136     this.cache.clear();
137   }
138 
139   @VisibleForTesting
140   public boolean isUsecache() {
141     return this.usecache;
142   }
143 
144   /**
145    * Get the current table descriptor for the given table, or null if none exists.
146    *
147    * Uses a local cache of the descriptor but still checks the filesystem on each call
148    * to see if a newer file has been created since the cached one was read.
149    */
150   @Override
151   @Nullable
152   public TableDescriptor getDescriptor(final TableName tablename)
153   throws IOException {
154     invocations++;
155     if (TableName.META_TABLE_NAME.equals(tablename)) {
156       cachehits++;
157       return new TableDescriptor(metaTableDescritor, TableState.State.ENABLED);
158     }
159     // hbase:meta is already handled. If some one tries to get the descriptor for
160     // .logs, .oldlogs or .corrupt throw an exception.
161     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename.getNameAsString())) {
162        throw new IOException("No descriptor found for non table = " + tablename);
163     }
164 
165     if (usecache) {
166       // Look in cache of descriptors.
167       TableDescriptor cachedtdm = this.cache.get(tablename);
168       if (cachedtdm != null) {
169         cachehits++;
170         return cachedtdm;
171       }
172     }
173     TableDescriptor tdmt = null;
174     try {
175       tdmt = getTableDescriptorFromFs(fs, rootdir, tablename, !fsreadonly);
176     } catch (NullPointerException e) {
177       LOG.debug("Exception during readTableDecriptor. Current table name = "
178           + tablename, e);
179     } catch (IOException ioe) {
180       LOG.debug("Exception during readTableDecriptor. Current table name = "
181           + tablename, ioe);
182     }
183     // last HTD written wins
184     if (usecache && tdmt != null) {
185       this.cache.put(tablename, tdmt);
186     }
187 
188     return tdmt;
189   }
190 
191   /**
192    * Get the current table descriptor for the given table, or null if none exists.
193    *
194    * Uses a local cache of the descriptor but still checks the filesystem on each call
195    * to see if a newer file has been created since the cached one was read.
196    */
197   @Override
198   public HTableDescriptor get(TableName tableName) throws IOException {
199     if (TableName.META_TABLE_NAME.equals(tableName)) {
200       cachehits++;
201       return metaTableDescritor;
202     }
203     TableDescriptor descriptor = getDescriptor(tableName);
204     return descriptor == null ? null : descriptor.getHTableDescriptor();
205   }
206 
207   /**
208    * Returns a map from table name to table descriptor for all tables.
209    */
210   @Override
211   public Map<String, TableDescriptor> getAllDescriptors()
212   throws IOException {
213     Map<String, TableDescriptor> tds = new TreeMap<String, TableDescriptor>();
214 
215     if (fsvisited && usecache) {
216       for (Map.Entry<TableName, TableDescriptor> entry: this.cache.entrySet()) {
217         tds.put(entry.getKey().toString(), entry.getValue());
218       }
219       // add hbase:meta to the response
220       tds.put(this.metaTableDescritor.getNameAsString(),
221         new TableDescriptor(metaTableDescritor, TableState.State.ENABLED));
222     } else {
223       LOG.debug("Fetching table descriptors from the filesystem.");
224       boolean allvisited = true;
225       for (Path d : FSUtils.getTableDirs(fs, rootdir)) {
226         TableDescriptor htd = null;
227         try {
228           htd = getDescriptor(FSUtils.getTableName(d));
229         } catch (FileNotFoundException fnfe) {
230           // inability of retrieving one HTD shouldn't stop getting the remaining
231           LOG.warn("Trouble retrieving htd", fnfe);
232         }
233         if (htd == null) {
234           allvisited = false;
235           continue;
236         } else {
237           tds.put(htd.getHTableDescriptor().getTableName().getNameAsString(), htd);
238         }
239         fsvisited = allvisited;
240       }
241     }
242     return tds;
243   }
244 
245   /**
246    * Returns a map from table name to table descriptor for all tables.
247    */
248   @Override
249   public Map<String, HTableDescriptor> getAll() throws IOException {
250     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
251     Map<String, TableDescriptor> allDescriptors = getAllDescriptors();
252     for (Map.Entry<String, TableDescriptor> entry : allDescriptors
253         .entrySet()) {
254       htds.put(entry.getKey(), entry.getValue().getHTableDescriptor());
255     }
256     return htds;
257   }
258 
259   /**
260     * Find descriptors by namespace.
261     * @see #get(org.apache.hadoop.hbase.TableName)
262     */
263   @Override
264   public Map<String, HTableDescriptor> getByNamespace(String name)
265   throws IOException {
266     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
267     List<Path> tableDirs =
268         FSUtils.getLocalTableDirs(fs, FSUtils.getNamespaceDir(rootdir, name));
269     for (Path d: tableDirs) {
270       HTableDescriptor htd = null;
271       try {
272         htd = get(FSUtils.getTableName(d));
273       } catch (FileNotFoundException fnfe) {
274         // inability of retrieving one HTD shouldn't stop getting the remaining
275         LOG.warn("Trouble retrieving htd", fnfe);
276       }
277       if (htd == null) continue;
278       htds.put(FSUtils.getTableName(d).getNameAsString(), htd);
279     }
280     return htds;
281   }
282 
283   /**
284    * Adds (or updates) the table descriptor to the FileSystem
285    * and updates the local cache with it.
286    */
287   @Override
288   public void add(TableDescriptor htd) throws IOException {
289     if (fsreadonly) {
290       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
291     }
292     TableName tableName = htd.getHTableDescriptor().getTableName();
293     if (TableName.META_TABLE_NAME.equals(tableName)) {
294       throw new NotImplementedException();
295     }
296     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) {
297       throw new NotImplementedException(
298         "Cannot add a table descriptor for a reserved subdirectory name: "
299             + htd.getHTableDescriptor().getNameAsString());
300     }
301     updateTableDescriptor(htd);
302   }
303 
304   /**
305    * Adds (or updates) the table descriptor to the FileSystem
306    * and updates the local cache with it.
307    */
308   @Override
309   public void add(HTableDescriptor htd) throws IOException {
310     if (fsreadonly) {
311       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
312     }
313     TableName tableName = htd.getTableName();
314     if (TableName.META_TABLE_NAME.equals(tableName)) {
315       throw new NotImplementedException();
316     }
317     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) {
318       throw new NotImplementedException(
319           "Cannot add a table descriptor for a reserved subdirectory name: "
320               + htd.getNameAsString());
321     }
322     TableDescriptor descriptor = getDescriptor(htd.getTableName());
323     if (descriptor == null)
324       descriptor = new TableDescriptor(htd);
325     else
326       descriptor.setHTableDescriptor(htd);
327     updateTableDescriptor(descriptor);
328   }
329 
330   /**
331    * Removes the table descriptor from the local cache and returns it.
332    * If not in read only mode, it also deletes the entire table directory(!)
333    * from the FileSystem.
334    */
335   @Override
336   public HTableDescriptor remove(final TableName tablename)
337   throws IOException {
338     if (fsreadonly) {
339       throw new NotImplementedException("Cannot remove a table descriptor - in read only mode");
340     }
341     Path tabledir = getTableDir(tablename);
342     if (this.fs.exists(tabledir)) {
343       if (!this.fs.delete(tabledir, true)) {
344         throw new IOException("Failed delete of " + tabledir.toString());
345       }
346     }
347     TableDescriptor descriptor = this.cache.remove(tablename);
348     if (descriptor == null) {
349       return null;
350     } else {
351       return descriptor.getHTableDescriptor();
352     }
353   }
354 
355   /**
356    * Checks if a current table info file exists for the given table
357    *
358    * @param tableName name of table
359    * @return true if exists
360    * @throws IOException
361    */
362   public boolean isTableInfoExists(TableName tableName) throws IOException {
363     return getTableInfoPath(tableName) != null;
364   }
365 
366   /**
367    * Find the most current table info file for the given table in the hbase root directory.
368    * @return The file status of the current table info file or null if it does not exist
369    */
370   private FileStatus getTableInfoPath(final TableName tableName) throws IOException {
371     Path tableDir = getTableDir(tableName);
372     return getTableInfoPath(tableDir);
373   }
374 
375   private FileStatus getTableInfoPath(Path tableDir)
376   throws IOException {
377     return getTableInfoPath(fs, tableDir, !fsreadonly);
378   }
379 
380   /**
381    * Find the most current table info file for the table located in the given table directory.
382    *
383    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
384    * files and takes the 'current' one - meaning the one with the highest sequence number if present
385    * or no sequence number at all if none exist (for backward compatibility from before there
386    * were sequence numbers).
387    *
388    * @return The file status of the current table info file or null if it does not exist
389    * @throws IOException
390    */
391   public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir)
392   throws IOException {
393     return getTableInfoPath(fs, tableDir, false);
394   }
395 
396   /**
397    * Find the most current table info file for the table in the given table directory.
398    *
399    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
400    * files and takes the 'current' one - meaning the one with the highest sequence number if
401    * present or no sequence number at all if none exist (for backward compatibility from before
402    * there were sequence numbers).
403    * If there are multiple table info files found and removeOldFiles is true it also deletes the
404    * older files.
405    *
406    * @return The file status of the current table info file or null if none exist
407    * @throws IOException
408    */
409   private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles)
410   throws IOException {
411     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
412     return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles);
413   }
414 
415   /**
416    * Find the most current table info file in the given directory
417    *
418    * Looks within the given directory for any table info files
419    * and takes the 'current' one - meaning the one with the highest sequence number if present
420    * or no sequence number at all if none exist (for backward compatibility from before there
421    * were sequence numbers).
422    * If there are multiple possible files found
423    * and the we're not in read only mode it also deletes the older files.
424    *
425    * @return The file status of the current table info file or null if it does not exist
426    * @throws IOException
427    */
428   // only visible for FSTableDescriptorMigrationToSubdir, can be removed with that
429   static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir, boolean removeOldFiles)
430   throws IOException {
431     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
432     if (status == null || status.length < 1) return null;
433     FileStatus mostCurrent = null;
434     for (FileStatus file : status) {
435       if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) {
436         mostCurrent = file;
437       }
438     }
439     if (removeOldFiles && status.length > 1) {
440       // Clean away old versions
441       for (FileStatus file : status) {
442         Path path = file.getPath();
443         if (file != mostCurrent) {
444           if (!fs.delete(file.getPath(), false)) {
445             LOG.warn("Failed cleanup of " + path);
446           } else {
447             LOG.debug("Cleaned up old tableinfo file " + path);
448           }
449         }
450       }
451     }
452     return mostCurrent;
453   }
454 
455   /**
456    * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in
457    * reverse order.
458    */
459   @VisibleForTesting
460   static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR =
461   new Comparator<FileStatus>() {
462     @Override
463     public int compare(FileStatus left, FileStatus right) {
464       return right.compareTo(left);
465     }};
466 
467   /**
468    * Return the table directory in HDFS
469    */
470   @VisibleForTesting Path getTableDir(final TableName tableName) {
471     return FSUtils.getTableDir(rootdir, tableName);
472   }
473 
474   private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() {
475     @Override
476     public boolean accept(Path p) {
477       // Accept any file that starts with TABLEINFO_NAME
478       return p.getName().startsWith(TABLEINFO_FILE_PREFIX);
479     }};
480 
481   /**
482    * Width of the sequenceid that is a suffix on a tableinfo file.
483    */
484   @VisibleForTesting static final int WIDTH_OF_SEQUENCE_ID = 10;
485 
486   /*
487    * @param number Number to use as suffix.
488    * @return Returns zero-prefixed decimal version of passed
489    * number (Does absolute in case number is negative).
490    */
491   private static String formatTableInfoSequenceId(final int number) {
492     byte [] b = new byte[WIDTH_OF_SEQUENCE_ID];
493     int d = Math.abs(number);
494     for (int i = b.length - 1; i >= 0; i--) {
495       b[i] = (byte)((d % 10) + '0');
496       d /= 10;
497     }
498     return Bytes.toString(b);
499   }
500 
501   /**
502    * Regex to eat up sequenceid suffix on a .tableinfo file.
503    * Use regex because may encounter oldstyle .tableinfos where there is no
504    * sequenceid on the end.
505    */
506   private static final Pattern TABLEINFO_FILE_REGEX =
507     Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$");
508 
509   /**
510    * @param p Path to a <code>.tableinfo</code> file.
511    * @return The current editid or 0 if none found.
512    */
513   @VisibleForTesting static int getTableInfoSequenceId(final Path p) {
514     if (p == null) return 0;
515     Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName());
516     if (!m.matches()) throw new IllegalArgumentException(p.toString());
517     String suffix = m.group(2);
518     if (suffix == null || suffix.length() <= 0) return 0;
519     return Integer.parseInt(m.group(2));
520   }
521 
522   /**
523    * @param sequenceid
524    * @return Name of tableinfo file.
525    */
526   @VisibleForTesting static String getTableInfoFileName(final int sequenceid) {
527     return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid);
528   }
529 
530   /**
531    * Returns the latest table descriptor for the given table directly from the file system
532    * if it exists, bypassing the local cache.
533    * Returns null if it's not found.
534    */
535   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs,
536       Path hbaseRootDir, TableName tableName) throws IOException {
537     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
538     return getTableDescriptorFromFs(fs, tableDir);
539   }
540 
541   /**
542    * Returns the latest table descriptor for the given table directly from the file system
543    * if it exists, bypassing the local cache.
544    * Returns null if it's not found.
545    */
546   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs,
547    Path hbaseRootDir, TableName tableName, boolean rewritePb) throws IOException {
548     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
549     return getTableDescriptorFromFs(fs, tableDir, rewritePb);
550   }
551   /**
552    * Returns the latest table descriptor for the table located at the given directory
553    * directly from the file system if it exists.
554    * @throws TableInfoMissingException if there is no descriptor
555    */
556   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir)
557     throws IOException {
558     return getTableDescriptorFromFs(fs, tableDir, false);
559   }
560 
561   /**
562    * Returns the latest table descriptor for the table located at the given directory
563    * directly from the file system if it exists.
564    * @throws TableInfoMissingException if there is no descriptor
565    */
566   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir,
567     boolean rewritePb)
568   throws IOException {
569     FileStatus status = getTableInfoPath(fs, tableDir, false);
570     if (status == null) {
571       throw new TableInfoMissingException("No table descriptor file under " + tableDir);
572     }
573     return readTableDescriptor(fs, status, rewritePb);
574   }
575 
576   private static TableDescriptor readTableDescriptor(FileSystem fs, FileStatus status,
577       boolean rewritePb) throws IOException {
578     int len = Ints.checkedCast(status.getLen());
579     byte [] content = new byte[len];
580     FSDataInputStream fsDataInputStream = fs.open(status.getPath());
581     try {
582       fsDataInputStream.readFully(content);
583     } finally {
584       fsDataInputStream.close();
585     }
586     TableDescriptor td = null;
587     try {
588       td = TableDescriptor.parseFrom(content);
589     } catch (DeserializationException e) {
590       // we have old HTableDescriptor here
591       try {
592         HTableDescriptor htd = HTableDescriptor.parseFrom(content);
593         LOG.warn("Found old table descriptor, converting to new format for table " +
594             htd.getTableName() + "; NOTE table will be in ENABLED state!");
595         td = new TableDescriptor(htd, TableState.State.ENABLED);
596         if (rewritePb) rewriteTableDescriptor(fs, status, td);
597       } catch (DeserializationException e1) {
598         throw new IOException("content=" + Bytes.toShort(content), e);
599       }
600     }
601     if (rewritePb && !ProtobufUtil.isPBMagicPrefix(content)) {
602       // Convert the file over to be pb before leaving here.
603       rewriteTableDescriptor(fs, status, td);
604     }
605     return td;
606   }
607 
608   private static void rewriteTableDescriptor(final FileSystem fs, final FileStatus status,
609       final TableDescriptor td)
610   throws IOException {
611     Path tableInfoDir = status.getPath().getParent();
612     Path tableDir = tableInfoDir.getParent();
613     writeTableDescriptor(fs, td, tableDir, status);
614   }
615 
616   /**
617    * Update table descriptor on the file system
618    * @throws IOException Thrown if failed update.
619    * @throws NotImplementedException if in read only mode
620    */
621   @VisibleForTesting Path updateTableDescriptor(TableDescriptor td)
622   throws IOException {
623     if (fsreadonly) {
624       throw new NotImplementedException("Cannot update a table descriptor - in read only mode");
625     }
626     TableName tableName = td.getHTableDescriptor().getTableName();
627     Path tableDir = getTableDir(tableName);
628     Path p = writeTableDescriptor(fs, td, tableDir, getTableInfoPath(tableDir));
629     if (p == null) throw new IOException("Failed update");
630     LOG.info("Updated tableinfo=" + p);
631     if (usecache) {
632       this.cache.put(td.getHTableDescriptor().getTableName(), td);
633     }
634     return p;
635   }
636 
637   /**
638    * Deletes all the table descriptor files from the file system.
639    * Used in unit tests only.
640    * @throws NotImplementedException if in read only mode
641    */
642   public void deleteTableDescriptorIfExists(TableName tableName) throws IOException {
643     if (fsreadonly) {
644       throw new NotImplementedException("Cannot delete a table descriptor - in read only mode");
645     }
646 
647     Path tableDir = getTableDir(tableName);
648     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
649     deleteTableDescriptorFiles(fs, tableInfoDir, Integer.MAX_VALUE);
650   }
651 
652   /**
653    * Deletes files matching the table info file pattern within the given directory
654    * whose sequenceId is at most the given max sequenceId.
655    */
656   private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId)
657   throws IOException {
658     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
659     for (FileStatus file : status) {
660       Path path = file.getPath();
661       int sequenceId = getTableInfoSequenceId(path);
662       if (sequenceId <= maxSequenceId) {
663         boolean success = FSUtils.delete(fs, path, false);
664         if (success) {
665           LOG.debug("Deleted table descriptor at " + path);
666         } else {
667           LOG.error("Failed to delete descriptor at " + path);
668         }
669       }
670     }
671   }
672 
673   /**
674    * Attempts to write a new table descriptor to the given table's directory.
675    * It first writes it to the .tmp dir then uses an atomic rename to move it into place.
676    * It begins at the currentSequenceId + 1 and tries 10 times to find a new sequence number
677    * not already in use.
678    * Removes the current descriptor file if passed in.
679    *
680    * @return Descriptor file or null if we failed write.
681    */
682   private static Path writeTableDescriptor(final FileSystem fs,
683     final TableDescriptor htd, final Path tableDir,
684     final FileStatus currentDescriptorFile)
685   throws IOException {
686     // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon.
687     // This directory is never removed to avoid removing it out from under a concurrent writer.
688     Path tmpTableDir = new Path(tableDir, TMP_DIR);
689     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
690 
691     // What is current sequenceid?  We read the current sequenceid from
692     // the current file.  After we read it, another thread could come in and
693     // compete with us writing out next version of file.  The below retries
694     // should help in this case some but its hard to do guarantees in face of
695     // concurrent schema edits.
696     int currentSequenceId = currentDescriptorFile == null ? 0 :
697       getTableInfoSequenceId(currentDescriptorFile.getPath());
698     int newSequenceId = currentSequenceId;
699 
700     // Put arbitrary upperbound on how often we retry
701     int retries = 10;
702     int retrymax = currentSequenceId + retries;
703     Path tableInfoDirPath = null;
704     do {
705       newSequenceId += 1;
706       String filename = getTableInfoFileName(newSequenceId);
707       Path tempPath = new Path(tmpTableDir, filename);
708       if (fs.exists(tempPath)) {
709         LOG.debug(tempPath + " exists; retrying up to " + retries + " times");
710         continue;
711       }
712       tableInfoDirPath = new Path(tableInfoDir, filename);
713       try {
714         writeTD(fs, tempPath, htd);
715         fs.mkdirs(tableInfoDirPath.getParent());
716         if (!fs.rename(tempPath, tableInfoDirPath)) {
717           throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath);
718         }
719         LOG.debug("Wrote descriptor into: " + tableInfoDirPath);
720       } catch (IOException ioe) {
721         // Presume clash of names or something; go around again.
722         LOG.debug("Failed write and/or rename; retrying", ioe);
723         if (!FSUtils.deleteDirectory(fs, tempPath)) {
724           LOG.warn("Failed cleanup of " + tempPath);
725         }
726         tableInfoDirPath = null;
727         continue;
728       }
729       break;
730     } while (newSequenceId < retrymax);
731     if (tableInfoDirPath != null) {
732       // if we succeeded, remove old table info files.
733       deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1);
734     }
735     return tableInfoDirPath;
736   }
737 
738   private static void writeTD(final FileSystem fs, final Path p, final TableDescriptor htd)
739   throws IOException {
740     FSDataOutputStream out = fs.create(p, false);
741     try {
742       // We used to write this file out as a serialized HTD Writable followed by two '\n's and then
743       // the toString version of HTD.  Now we just write out the pb serialization.
744       out.write(htd.toByteArray());
745     } finally {
746       out.close();
747     }
748   }
749 
750   /**
751    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
752    * Used by tests.
753    * @return True if we successfully created file.
754    */
755   public boolean createTableDescriptor(TableDescriptor htd) throws IOException {
756     return createTableDescriptor(htd, false);
757   }
758 
759   /**
760    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
761    * Used by tests.
762    * @return True if we successfully created file.
763    */
764   public boolean createTableDescriptor(HTableDescriptor htd) throws IOException {
765     return createTableDescriptor(new TableDescriptor(htd), false);
766   }
767 
768   /**
769    * Create new HTableDescriptor in HDFS. Happens when we are creating table. If
770    * forceCreation is true then even if previous table descriptor is present it
771    * will be overwritten
772    *
773    * @return True if we successfully created file.
774    */
775   public boolean createTableDescriptor(TableDescriptor htd, boolean forceCreation)
776   throws IOException {
777     Path tableDir = getTableDir(htd.getHTableDescriptor().getTableName());
778     return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation);
779   }
780 
781   /**
782    * Create tables descriptor for given HTableDescriptor. Default TableDescriptor state
783    * will be used (typically ENABLED).
784    */
785   public boolean createTableDescriptor(HTableDescriptor htd, boolean forceCreation)
786       throws IOException {
787     return createTableDescriptor(new TableDescriptor(htd), forceCreation);
788   }
789 
790   /**
791    * Create a new HTableDescriptor in HDFS in the specified table directory. Happens when we create
792    * a new table or snapshot a table.
793    * @param tableDir table directory under which we should write the file
794    * @param htd description of the table to write
795    * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
796    *          be overwritten
797    * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
798    *         already exists and we weren't forcing the descriptor creation.
799    * @throws IOException if a filesystem error occurs
800    */
801   public boolean createTableDescriptorForTableDirectory(Path tableDir,
802       TableDescriptor htd, boolean forceCreation) throws IOException {
803     if (fsreadonly) {
804       throw new NotImplementedException("Cannot create a table descriptor - in read only mode");
805     }
806     FileStatus status = getTableInfoPath(fs, tableDir);
807     if (status != null) {
808       LOG.debug("Current tableInfoPath = " + status.getPath());
809       if (!forceCreation) {
810         if (fs.exists(status.getPath()) && status.getLen() > 0) {
811           if (readTableDescriptor(fs, status, false).equals(htd)) {
812             LOG.debug("TableInfo already exists.. Skipping creation");
813             return false;
814           }
815         }
816       }
817     }
818     Path p = writeTableDescriptor(fs, htd, tableDir, status);
819     return p != null;
820   }
821 
822 }
823