View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import javax.annotation.Nullable;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.ConcurrentHashMap;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import com.google.common.annotations.VisibleForTesting;
32  import com.google.common.primitives.Ints;
33  import org.apache.commons.lang.NotImplementedException;
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.hbase.classification.InterfaceAudience;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FSDataInputStream;
39  import org.apache.hadoop.fs.FSDataOutputStream;
40  import org.apache.hadoop.fs.FileStatus;
41  import org.apache.hadoop.fs.FileSystem;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.fs.PathFilter;
44  import org.apache.hadoop.hbase.Coprocessor;
45  import org.apache.hadoop.hbase.HColumnDescriptor;
46  import org.apache.hadoop.hbase.HConstants;
47  import org.apache.hadoop.hbase.HTableDescriptor;
48  import org.apache.hadoop.hbase.TableDescriptor;
49  import org.apache.hadoop.hbase.TableDescriptors;
50  import org.apache.hadoop.hbase.TableInfoMissingException;
51  import org.apache.hadoop.hbase.TableName;
52  import org.apache.hadoop.hbase.client.TableState;
53  import org.apache.hadoop.hbase.exceptions.DeserializationException;
54  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
55  import org.apache.hadoop.hbase.regionserver.BloomType;
56  
57  /**
58   * Implementation of {@link TableDescriptors} that reads descriptors from the
59   * passed filesystem.  It expects descriptors to be in a file in the
60   * {@link #TABLEINFO_DIR} subdir of the table's directory in FS.  Can be read-only
61   *  -- i.e. does not modify the filesystem or can be read and write.
62   * 
63   * <p>Also has utility for keeping up the table descriptors tableinfo file.
64   * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir
65   * of the table directory in the filesystem.
66   * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the
67   * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>.  This sequenceid
68   * is always increasing.  It starts at zero.  The table schema file with the
69   * highest sequenceid has the most recent schema edit. Usually there is one file
70   * only, the most recent but there may be short periods where there are more
71   * than one file. Old files are eventually cleaned.  Presumption is that there
72   * will not be lots of concurrent clients making table schema edits.  If so,
73   * the below needs a bit of a reworking and perhaps some supporting api in hdfs.
74   */
75  @InterfaceAudience.Private
76  public class FSTableDescriptors implements TableDescriptors {
77    private static final Log LOG = LogFactory.getLog(FSTableDescriptors.class);
78    private final FileSystem fs;
79    private final Path rootdir;
80    private final boolean fsreadonly;
81    @VisibleForTesting long cachehits = 0;
82    @VisibleForTesting long invocations = 0;
83  
84    /** The file name prefix used to store HTD in HDFS  */
85    static final String TABLEINFO_FILE_PREFIX = ".tableinfo";
86    static final String TABLEINFO_DIR = ".tabledesc";
87    static final String TMP_DIR = ".tmp";
88  
89    // This cache does not age out the old stuff.  Thinking is that the amount
90    // of data we keep up in here is so small, no need to do occasional purge.
91    // TODO.
92    private final Map<TableName, TableDescriptorAndModtime> cache =
93      new ConcurrentHashMap<TableName, TableDescriptorAndModtime>();
94  
95    /**
96     * Table descriptor for <code>hbase:meta</code> catalog table
97     */
98    private final HTableDescriptor metaTableDescritor;
99  
100   /**
101    * Data structure to hold modification time and table descriptor.
102    */
103   private static class TableDescriptorAndModtime {
104     private final TableDescriptor td;
105     private final long modtime;
106 
107     TableDescriptorAndModtime(final long modtime, final TableDescriptor td) {
108       this.td = td;
109       this.modtime = modtime;
110     }
111 
112     long getModtime() {
113       return this.modtime;
114     }
115 
116     TableDescriptor getTableDescriptor() {
117       return this.td;
118     }
119 
120     HTableDescriptor getHTableDescriptor() {
121       return this.td.getHTableDescriptor();
122     }
123 
124     TableState.State getTableState() {
125       return this.td.getTableState();
126     }
127   }
128 
129   /**
130    * Construct a FSTableDescriptors instance using the hbase root dir of the given
131    * conf and the filesystem where that root dir lives.
132    * This instance can do write operations (is not read only).
133    */
134   public FSTableDescriptors(final Configuration conf) throws IOException {
135     this(conf, FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf));
136   }
137   
138   public FSTableDescriptors(final Configuration conf, final FileSystem fs, final Path rootdir)
139       throws IOException {
140     this(conf, fs, rootdir, false);
141   }
142 
143   /**
144    * @param fsreadonly True if we are read-only when it comes to filesystem
145    * operations; i.e. on remove, we do not do delete in fs.
146    */
147   public FSTableDescriptors(final Configuration conf, final FileSystem fs,
148       final Path rootdir, final boolean fsreadonly) throws IOException {
149     super();
150     this.fs = fs;
151     this.rootdir = rootdir;
152     this.fsreadonly = fsreadonly;
153 
154     this.metaTableDescritor = TableDescriptor.metaTableDescriptor(conf);
155   }
156 
157   /**
158    * Get the current table descriptor for the given table, or null if none exists.
159    * 
160    * Uses a local cache of the descriptor but still checks the filesystem on each call
161    * to see if a newer file has been created since the cached one was read.
162    */
163   @Override
164   @Nullable
165   public TableDescriptor getDescriptor(final TableName tablename)
166   throws IOException {
167     invocations++;
168     if (TableName.META_TABLE_NAME.equals(tablename)) {
169       cachehits++;
170       return new TableDescriptor(metaTableDescritor, TableState.State.ENABLED);
171     }
172     // hbase:meta is already handled. If some one tries to get the descriptor for
173     // .logs, .oldlogs or .corrupt throw an exception.
174     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename.getNameAsString())) {
175        throw new IOException("No descriptor found for non table = " + tablename);
176     }
177 
178     // Look in cache of descriptors.
179     TableDescriptorAndModtime cachedtdm = this.cache.get(tablename);
180 
181     if (cachedtdm != null) {
182       // Check mod time has not changed (this is trip to NN).
183       if (getTableInfoModtime(tablename) <= cachedtdm.getModtime()) {
184         cachehits++;
185         return cachedtdm.getTableDescriptor();
186       }
187     }
188     
189     TableDescriptorAndModtime tdmt = null;
190     try {
191       tdmt = getTableDescriptorAndModtime(tablename);
192     } catch (NullPointerException e) {
193       LOG.debug("Exception during readTableDecriptor. Current table name = "
194           + tablename, e);
195     } catch (IOException ioe) {
196       LOG.debug("Exception during readTableDecriptor. Current table name = "
197           + tablename, ioe);
198     }
199     
200     if (tdmt != null) {
201       this.cache.put(tablename, tdmt);
202     }
203     return tdmt == null ? null : tdmt.getTableDescriptor();
204   }
205 
206   /**
207    * Get the current table descriptor for the given table, or null if none exists.
208    *
209    * Uses a local cache of the descriptor but still checks the filesystem on each call
210    * to see if a newer file has been created since the cached one was read.
211    */
212   @Override
213   public HTableDescriptor get(TableName tableName) throws IOException {
214     if (TableName.META_TABLE_NAME.equals(tableName)) {
215       cachehits++;
216       return metaTableDescritor;
217     }
218     TableDescriptor descriptor = getDescriptor(tableName);
219     return descriptor == null ? null : descriptor.getHTableDescriptor();
220   }
221 
222   /**
223    * Returns a map from table name to table descriptor for all tables.
224    */
225   @Override
226   public Map<String, TableDescriptor> getAllDescriptors()
227   throws IOException {
228     Map<String, TableDescriptor> tds = new TreeMap<String, TableDescriptor>();
229     List<Path> tableDirs = FSUtils.getTableDirs(fs, rootdir);
230     for (Path d: tableDirs) {
231       TableDescriptor htd = null;
232       try {
233         htd = getDescriptor(FSUtils.getTableName(d));
234       } catch (FileNotFoundException fnfe) {
235         // inability of retrieving one HTD shouldn't stop getting the remaining
236         LOG.warn("Trouble retrieving htd", fnfe);
237       }
238       if (htd == null) continue;
239       tds.put(htd.getHTableDescriptor().getTableName().getNameAsString(), htd);
240     }
241     return tds;
242   }
243 
244   /**
245    * Returns a map from table name to table descriptor for all tables.
246    */
247   @Override
248   public Map<String, HTableDescriptor> getAll() throws IOException {
249     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
250     Map<String, TableDescriptor> allDescriptors = getAllDescriptors();
251     for (Map.Entry<String, TableDescriptor> entry : allDescriptors
252         .entrySet()) {
253       htds.put(entry.getKey(), entry.getValue().getHTableDescriptor());
254     }
255     return htds;
256   }
257 
258   /**
259     * Find descriptors by namespace.
260     * @see #get(org.apache.hadoop.hbase.TableName)
261     */
262   @Override
263   public Map<String, HTableDescriptor> getByNamespace(String name)
264   throws IOException {
265     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
266     List<Path> tableDirs =
267         FSUtils.getLocalTableDirs(fs, FSUtils.getNamespaceDir(rootdir, name));
268     for (Path d: tableDirs) {
269       HTableDescriptor htd = null;
270       try {
271         htd = get(FSUtils.getTableName(d));
272       } catch (FileNotFoundException fnfe) {
273         // inability of retrieving one HTD shouldn't stop getting the remaining
274         LOG.warn("Trouble retrieving htd", fnfe);
275       }
276       if (htd == null) continue;
277       htds.put(FSUtils.getTableName(d).getNameAsString(), htd);
278     }
279     return htds;
280   }
281 
282   /**
283    * Adds (or updates) the table descriptor to the FileSystem
284    * and updates the local cache with it.
285    */
286   @Override
287   public void add(TableDescriptor htd) throws IOException {
288     if (fsreadonly) {
289       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
290     }
291     TableName tableName = htd.getHTableDescriptor().getTableName();
292     if (TableName.META_TABLE_NAME.equals(tableName)) {
293       throw new NotImplementedException();
294     }
295     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) {
296       throw new NotImplementedException(
297         "Cannot add a table descriptor for a reserved subdirectory name: "
298             + htd.getHTableDescriptor().getNameAsString());
299     }
300     updateTableDescriptor(htd);
301   }
302 
303   /**
304    * Adds (or updates) the table descriptor to the FileSystem
305    * and updates the local cache with it.
306    */
307   @Override
308   public void add(HTableDescriptor htd) throws IOException {
309     if (fsreadonly) {
310       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
311     }
312     TableName tableName = htd.getTableName();
313     if (TableName.META_TABLE_NAME.equals(tableName)) {
314       throw new NotImplementedException();
315     }
316     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) {
317       throw new NotImplementedException(
318           "Cannot add a table descriptor for a reserved subdirectory name: "
319               + htd.getNameAsString());
320     }
321     TableDescriptor descriptor = getDescriptor(htd.getTableName());
322     if (descriptor == null)
323       descriptor = new TableDescriptor(htd);
324     else
325       descriptor.setHTableDescriptor(htd);
326     updateTableDescriptor(descriptor);
327   }
328 
329   /**
330    * Removes the table descriptor from the local cache and returns it.
331    * If not in read only mode, it also deletes the entire table directory(!)
332    * from the FileSystem.
333    */
334   @Override
335   public HTableDescriptor remove(final TableName tablename)
336   throws IOException {
337     if (fsreadonly) {
338       throw new NotImplementedException("Cannot remove a table descriptor - in read only mode");
339     }
340     Path tabledir = getTableDir(tablename);
341     if (this.fs.exists(tabledir)) {
342       if (!this.fs.delete(tabledir, true)) {
343         throw new IOException("Failed delete of " + tabledir.toString());
344       }
345     }
346     TableDescriptorAndModtime tdm = this.cache.remove(tablename);
347     return tdm == null ? null : tdm.getHTableDescriptor();
348   }
349 
350   /**
351    * Checks if a current table info file exists for the given table
352    * 
353    * @param tableName name of table
354    * @return true if exists
355    * @throws IOException
356    */
357   public boolean isTableInfoExists(TableName tableName) throws IOException {
358     return getTableInfoPath(tableName) != null;
359   }
360   
361   /**
362    * Find the most current table info file for the given table in the hbase root directory.
363    * @return The file status of the current table info file or null if it does not exist
364    */
365   private FileStatus getTableInfoPath(final TableName tableName) throws IOException {
366     Path tableDir = getTableDir(tableName);
367     return getTableInfoPath(tableDir);
368   }
369 
370   private FileStatus getTableInfoPath(Path tableDir)
371   throws IOException {
372     return getTableInfoPath(fs, tableDir, !fsreadonly);
373   }
374   
375   /**
376    * Find the most current table info file for the table located in the given table directory.
377    * 
378    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
379    * files and takes the 'current' one - meaning the one with the highest sequence number if present
380    * or no sequence number at all if none exist (for backward compatibility from before there
381    * were sequence numbers).
382    * 
383    * @return The file status of the current table info file or null if it does not exist
384    * @throws IOException
385    */
386   public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir)
387   throws IOException {
388     return getTableInfoPath(fs, tableDir, false);
389   }
390   
391   /**
392    * Find the most current table info file for the table in the given table directory.
393    * 
394    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
395    * files and takes the 'current' one - meaning the one with the highest sequence number if
396    * present or no sequence number at all if none exist (for backward compatibility from before
397    * there were sequence numbers).
398    * If there are multiple table info files found and removeOldFiles is true it also deletes the
399    * older files.
400    * 
401    * @return The file status of the current table info file or null if none exist
402    * @throws IOException
403    */
404   private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles)
405   throws IOException {
406     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
407     return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles);
408   }
409   
410   /**
411    * Find the most current table info file in the given directory
412    * 
413    * Looks within the given directory for any table info files
414    * and takes the 'current' one - meaning the one with the highest sequence number if present
415    * or no sequence number at all if none exist (for backward compatibility from before there
416    * were sequence numbers).
417    * If there are multiple possible files found
418    * and the we're not in read only mode it also deletes the older files.
419    * 
420    * @return The file status of the current table info file or null if it does not exist
421    * @throws IOException
422    */
423   // only visible for FSTableDescriptorMigrationToSubdir, can be removed with that
424   static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir, boolean removeOldFiles)
425   throws IOException {
426     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
427     if (status == null || status.length < 1) return null;
428     FileStatus mostCurrent = null;
429     for (FileStatus file : status) {
430       if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) {
431         mostCurrent = file;
432       }
433     }
434     if (removeOldFiles && status.length > 1) {
435       // Clean away old versions
436       for (FileStatus file : status) {
437         Path path = file.getPath();
438         if (file != mostCurrent) {
439           if (!fs.delete(file.getPath(), false)) {
440             LOG.warn("Failed cleanup of " + path);
441           } else {
442             LOG.debug("Cleaned up old tableinfo file " + path);
443           }
444         }
445       }
446     }
447     return mostCurrent;
448   }
449   
450   /**
451    * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in
452    * reverse order.
453    */
454   @VisibleForTesting
455   static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR =
456   new Comparator<FileStatus>() {
457     @Override
458     public int compare(FileStatus left, FileStatus right) {
459       return right.compareTo(left);
460     }};
461 
462   /**
463    * Return the table directory in HDFS
464    */
465   @VisibleForTesting Path getTableDir(final TableName tableName) {
466     return FSUtils.getTableDir(rootdir, tableName);
467   }
468 
469   private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() {
470     @Override
471     public boolean accept(Path p) {
472       // Accept any file that starts with TABLEINFO_NAME
473       return p.getName().startsWith(TABLEINFO_FILE_PREFIX);
474     }}; 
475 
476   /**
477    * Width of the sequenceid that is a suffix on a tableinfo file.
478    */
479   @VisibleForTesting static final int WIDTH_OF_SEQUENCE_ID = 10;
480 
481   /*
482    * @param number Number to use as suffix.
483    * @return Returns zero-prefixed decimal version of passed
484    * number (Does absolute in case number is negative).
485    */
486   private static String formatTableInfoSequenceId(final int number) {
487     byte [] b = new byte[WIDTH_OF_SEQUENCE_ID];
488     int d = Math.abs(number);
489     for (int i = b.length - 1; i >= 0; i--) {
490       b[i] = (byte)((d % 10) + '0');
491       d /= 10;
492     }
493     return Bytes.toString(b);
494   }
495 
496   /**
497    * Regex to eat up sequenceid suffix on a .tableinfo file.
498    * Use regex because may encounter oldstyle .tableinfos where there is no
499    * sequenceid on the end.
500    */
501   private static final Pattern TABLEINFO_FILE_REGEX =
502     Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$");
503 
504   /**
505    * @param p Path to a <code>.tableinfo</code> file.
506    * @return The current editid or 0 if none found.
507    */
508   @VisibleForTesting static int getTableInfoSequenceId(final Path p) {
509     if (p == null) return 0;
510     Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName());
511     if (!m.matches()) throw new IllegalArgumentException(p.toString());
512     String suffix = m.group(2);
513     if (suffix == null || suffix.length() <= 0) return 0;
514     return Integer.parseInt(m.group(2));
515   }
516 
517   /**
518    * @param tabledir
519    * @param sequenceid
520    * @return Name of tableinfo file.
521    */
522   @VisibleForTesting static String getTableInfoFileName(final int sequenceid) {
523     return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid);
524   }
525 
526   /**
527    * @param fs
528    * @param rootdir
529    * @param tableName
530    * @return Modification time for the table {@link #TABLEINFO_FILE_PREFIX} file
531    * or <code>0</code> if no tableinfo file found.
532    * @throws IOException
533    */
534   private long getTableInfoModtime(final TableName tableName) throws IOException {
535     FileStatus status = getTableInfoPath(tableName);
536     return status == null ? 0 : status.getModificationTime();
537   }
538 
539   /**
540    * Returns the latest table descriptor for the given table directly from the file system
541    * if it exists, bypassing the local cache.
542    * Returns null if it's not found.
543    */
544   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs,
545       Path hbaseRootDir, TableName tableName) throws IOException {
546     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
547     return getTableDescriptorFromFs(fs, tableDir);
548   }
549 
550   /**
551    * Returns the latest table descriptor for the table located at the given directory
552    * directly from the file system if it exists.
553    * @throws TableInfoMissingException if there is no descriptor
554    */
555   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir)
556   throws IOException {
557     FileStatus status = getTableInfoPath(fs, tableDir, false);
558     if (status == null) {
559       throw new TableInfoMissingException("No table descriptor file under " + tableDir);
560     }
561     return readTableDescriptor(fs, status, false);
562   }
563   
564   /**
565    * @param tableName table name
566    * @return TableDescriptorAndModtime or null if no table descriptor was found
567    * @throws IOException
568    */
569   private TableDescriptorAndModtime getTableDescriptorAndModtime(TableName tableName)
570   throws IOException {
571     // ignore both -ROOT- and hbase:meta tables
572     if (tableName.equals(TableName.META_TABLE_NAME)) {
573       return null;
574     }
575     return getTableDescriptorAndModtime(getTableDir(tableName));
576   }
577 
578   /**
579    * @param tableDir path to table directory
580    * @return TableDescriptorAndModtime or null if no table descriptor was found
581    * at the specified path
582    * @throws IOException
583    */
584   private TableDescriptorAndModtime getTableDescriptorAndModtime(Path tableDir)
585   throws IOException {
586     FileStatus status = getTableInfoPath(tableDir);
587     if (status == null) {
588       return null;
589     }
590     TableDescriptor td = readTableDescriptor(fs, status, !fsreadonly);
591     return new TableDescriptorAndModtime(status.getModificationTime(), td);
592   }
593 
594   private static TableDescriptor readTableDescriptor(FileSystem fs, FileStatus status,
595       boolean rewritePb) throws IOException {
596     int len = Ints.checkedCast(status.getLen());
597     byte [] content = new byte[len];
598     FSDataInputStream fsDataInputStream = fs.open(status.getPath());
599     try {
600       fsDataInputStream.readFully(content);
601     } finally {
602       fsDataInputStream.close();
603     }
604     TableDescriptor td = null;
605     try {
606       td = TableDescriptor.parseFrom(content);
607     } catch (DeserializationException e) {
608       // we have old HTableDescriptor here
609       try {
610         HTableDescriptor htd = HTableDescriptor.parseFrom(content);
611         LOG.warn("Found old table descriptor, converting to new format for table " +
612             htd.getTableName() + "; NOTE table will be in ENABLED state!");
613         td = new TableDescriptor(htd, TableState.State.ENABLED);
614         if (rewritePb) rewriteTableDescriptor(fs, status, td);
615       } catch (DeserializationException e1) {
616         throw new IOException("content=" + Bytes.toShort(content), e);
617       }
618     }
619     if (rewritePb && !ProtobufUtil.isPBMagicPrefix(content)) {
620       // Convert the file over to be pb before leaving here.
621       rewriteTableDescriptor(fs, status, td);
622     }
623     return td;
624   }
625 
626   private static void rewriteTableDescriptor(final FileSystem fs, final FileStatus status,
627       final TableDescriptor td)
628   throws IOException {
629     Path tableInfoDir = status.getPath().getParent();
630     Path tableDir = tableInfoDir.getParent();
631     writeTableDescriptor(fs, td, tableDir, status);
632   }
633 
634   /**
635    * Update table descriptor on the file system
636    * @throws IOException Thrown if failed update.
637    * @throws NotImplementedException if in read only mode
638    */
639   @VisibleForTesting Path updateTableDescriptor(TableDescriptor td)
640   throws IOException {
641     if (fsreadonly) {
642       throw new NotImplementedException("Cannot update a table descriptor - in read only mode");
643     }
644     TableName tableName = td.getHTableDescriptor().getTableName();
645     Path tableDir = getTableDir(tableName);
646     Path p = writeTableDescriptor(fs, td, tableDir, getTableInfoPath(tableDir));
647     if (p == null) throw new IOException("Failed update");
648     LOG.info("Updated tableinfo=" + p);
649     long modtime = getTableInfoModtime(tableName);
650     this.cache.put(tableName, new TableDescriptorAndModtime(modtime, td));
651     return p;
652   }
653 
654   /**
655    * Deletes all the table descriptor files from the file system.
656    * Used in unit tests only.
657    * @throws NotImplementedException if in read only mode
658    */
659   public void deleteTableDescriptorIfExists(TableName tableName) throws IOException {
660     if (fsreadonly) {
661       throw new NotImplementedException("Cannot delete a table descriptor - in read only mode");
662     }
663    
664     Path tableDir = getTableDir(tableName);
665     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
666     deleteTableDescriptorFiles(fs, tableInfoDir, Integer.MAX_VALUE);
667   }
668 
669   /**
670    * Deletes files matching the table info file pattern within the given directory 
671    * whose sequenceId is at most the given max sequenceId.
672    */
673   private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId)
674   throws IOException {
675     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
676     for (FileStatus file : status) {
677       Path path = file.getPath();
678       int sequenceId = getTableInfoSequenceId(path);
679       if (sequenceId <= maxSequenceId) {
680         boolean success = FSUtils.delete(fs, path, false);
681         if (success) {
682           LOG.debug("Deleted table descriptor at " + path);
683         } else {
684           LOG.error("Failed to delete descriptor at " + path);
685         }
686       }
687     }
688   }
689   
690   /**
691    * Attempts to write a new table descriptor to the given table's directory.
692    * It first writes it to the .tmp dir then uses an atomic rename to move it into place.
693    * It begins at the currentSequenceId + 1 and tries 10 times to find a new sequence number
694    * not already in use.
695    * Removes the current descriptor file if passed in.
696    * 
697    * @return Descriptor file or null if we failed write.
698    */
699   private static Path writeTableDescriptor(final FileSystem fs, 
700     final TableDescriptor htd, final Path tableDir,
701     final FileStatus currentDescriptorFile)
702   throws IOException {  
703     // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon.
704     // This directory is never removed to avoid removing it out from under a concurrent writer.
705     Path tmpTableDir = new Path(tableDir, TMP_DIR);
706     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
707     
708     // What is current sequenceid?  We read the current sequenceid from
709     // the current file.  After we read it, another thread could come in and
710     // compete with us writing out next version of file.  The below retries
711     // should help in this case some but its hard to do guarantees in face of
712     // concurrent schema edits.
713     int currentSequenceId = currentDescriptorFile == null ? 0 :
714       getTableInfoSequenceId(currentDescriptorFile.getPath());
715     int newSequenceId = currentSequenceId;
716     
717     // Put arbitrary upperbound on how often we retry
718     int retries = 10;
719     int retrymax = currentSequenceId + retries;
720     Path tableInfoDirPath = null;
721     do {
722       newSequenceId += 1;
723       String filename = getTableInfoFileName(newSequenceId);
724       Path tempPath = new Path(tmpTableDir, filename);
725       if (fs.exists(tempPath)) {
726         LOG.debug(tempPath + " exists; retrying up to " + retries + " times");
727         continue;
728       }
729       tableInfoDirPath = new Path(tableInfoDir, filename);
730       try {
731         writeTD(fs, tempPath, htd);
732         fs.mkdirs(tableInfoDirPath.getParent());
733         if (!fs.rename(tempPath, tableInfoDirPath)) {
734           throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath);
735         }
736         LOG.debug("Wrote descriptor into: " + tableInfoDirPath);
737       } catch (IOException ioe) {
738         // Presume clash of names or something; go around again.
739         LOG.debug("Failed write and/or rename; retrying", ioe);
740         if (!FSUtils.deleteDirectory(fs, tempPath)) {
741           LOG.warn("Failed cleanup of " + tempPath);
742         }
743         tableInfoDirPath = null;
744         continue;
745       }
746       break;
747     } while (newSequenceId < retrymax);
748     if (tableInfoDirPath != null) {
749       // if we succeeded, remove old table info files.
750       deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1);
751     }
752     return tableInfoDirPath;
753   }
754   
755   private static void writeTD(final FileSystem fs, final Path p, final TableDescriptor htd)
756   throws IOException {
757     FSDataOutputStream out = fs.create(p, false);
758     try {
759       // We used to write this file out as a serialized HTD Writable followed by two '\n's and then
760       // the toString version of HTD.  Now we just write out the pb serialization.
761       out.write(htd.toByteArray());
762     } finally {
763       out.close();
764     }
765   }
766 
767   /**
768    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
769    * Used by tests.
770    * @return True if we successfully created file.
771    */
772   public boolean createTableDescriptor(TableDescriptor htd) throws IOException {
773     return createTableDescriptor(htd, false);
774   }
775 
776   /**
777    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
778    * Used by tests.
779    * @return True if we successfully created file.
780    */
781   public boolean createTableDescriptor(HTableDescriptor htd) throws IOException {
782     return createTableDescriptor(new TableDescriptor(htd), false);
783   }
784 
785   /**
786    * Create new HTableDescriptor in HDFS. Happens when we are creating table. If
787    * forceCreation is true then even if previous table descriptor is present it
788    * will be overwritten
789    * 
790    * @return True if we successfully created file.
791    */
792   public boolean createTableDescriptor(TableDescriptor htd, boolean forceCreation)
793   throws IOException {
794     Path tableDir = getTableDir(htd.getHTableDescriptor().getTableName());
795     return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation);
796   }
797 
798   /**
799    * Create tables descriptor for given HTableDescriptor. Default TableDescriptor state
800    * will be used (typically ENABLED).
801    */
802   public boolean createTableDescriptor(HTableDescriptor htd, boolean forceCreation)
803       throws IOException {
804     return createTableDescriptor(new TableDescriptor(htd), forceCreation);
805   }
806 
807   /**
808    * Create a new HTableDescriptor in HDFS in the specified table directory. Happens when we create
809    * a new table or snapshot a table.
810    * @param tableDir table directory under which we should write the file
811    * @param htd description of the table to write
812    * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
813    *          be overwritten
814    * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
815    *         already exists and we weren't forcing the descriptor creation.
816    * @throws IOException if a filesystem error occurs
817    */
818   public boolean createTableDescriptorForTableDirectory(Path tableDir,
819       TableDescriptor htd, boolean forceCreation) throws IOException {
820     if (fsreadonly) {
821       throw new NotImplementedException("Cannot create a table descriptor - in read only mode");
822     }
823     FileStatus status = getTableInfoPath(fs, tableDir);
824     if (status != null) {
825       LOG.debug("Current tableInfoPath = " + status.getPath());
826       if (!forceCreation) {
827         if (fs.exists(status.getPath()) && status.getLen() > 0) {
828           if (readTableDescriptor(fs, status, false).equals(htd)) {
829             LOG.debug("TableInfo already exists.. Skipping creation");
830             return false;
831           }
832         }
833       }
834     }
835     Path p = writeTableDescriptor(fs, htd, tableDir, status);
836     return p != null;
837   }
838 
839 }
840