View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import javax.annotation.Nullable;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.ConcurrentHashMap;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import org.apache.commons.lang.NotImplementedException;
32  import org.apache.commons.logging.Log;
33  import org.apache.commons.logging.LogFactory;
34  import org.apache.hadoop.classification.InterfaceAudience;
35  import org.apache.hadoop.conf.Configuration;
36  import org.apache.hadoop.fs.FSDataInputStream;
37  import org.apache.hadoop.fs.FSDataOutputStream;
38  import org.apache.hadoop.fs.FileStatus;
39  import org.apache.hadoop.fs.FileSystem;
40  import org.apache.hadoop.fs.Path;
41  import org.apache.hadoop.fs.PathFilter;
42  import org.apache.hadoop.hbase.TableDescriptor;
43  import org.apache.hadoop.hbase.TableName;
44  import org.apache.hadoop.hbase.client.TableState;
45  import org.apache.hadoop.hbase.exceptions.DeserializationException;
46  import org.apache.hadoop.hbase.HConstants;
47  import org.apache.hadoop.hbase.HTableDescriptor;
48  import org.apache.hadoop.hbase.TableDescriptors;
49  import org.apache.hadoop.hbase.TableInfoMissingException;
50  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
51  
52  import com.google.common.annotations.VisibleForTesting;
53  import com.google.common.primitives.Ints;
54  
55  
56  /**
57   * Implementation of {@link TableDescriptors} that reads descriptors from the
58   * passed filesystem.  It expects descriptors to be in a file in the
59   * {@link #TABLEINFO_DIR} subdir of the table's directory in FS.  Can be read-only
60   *  -- i.e. does not modify the filesystem or can be read and write.
61   * 
62   * <p>Also has utility for keeping up the table descriptors tableinfo file.
63   * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir
64   * of the table directory in the filesystem.
65   * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the
66   * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>.  This sequenceid
67   * is always increasing.  It starts at zero.  The table schema file with the
68   * highest sequenceid has the most recent schema edit. Usually there is one file
69   * only, the most recent but there may be short periods where there are more
70   * than one file. Old files are eventually cleaned.  Presumption is that there
71   * will not be lots of concurrent clients making table schema edits.  If so,
72   * the below needs a bit of a reworking and perhaps some supporting api in hdfs.
73   */
74  @InterfaceAudience.Private
75  public class FSTableDescriptors implements TableDescriptors {
76    private static final Log LOG = LogFactory.getLog(FSTableDescriptors.class);
77    private final FileSystem fs;
78    private final Path rootdir;
79    private final boolean fsreadonly;
80    @VisibleForTesting long cachehits = 0;
81    @VisibleForTesting long invocations = 0;
82  
83    /** The file name prefix used to store HTD in HDFS  */
84    static final String TABLEINFO_FILE_PREFIX = ".tableinfo";
85    static final String TABLEINFO_DIR = ".tabledesc";
86    static final String TMP_DIR = ".tmp";
87  
88    // This cache does not age out the old stuff.  Thinking is that the amount
89    // of data we keep up in here is so small, no need to do occasional purge.
90    // TODO.
91    private final Map<TableName, TableDescriptorAndModtime> cache =
92      new ConcurrentHashMap<TableName, TableDescriptorAndModtime>();
93  
94    /**
95     * Data structure to hold modification time and table descriptor.
96     */
97    private static class TableDescriptorAndModtime {
98      private final TableDescriptor td;
99      private final long modtime;
100 
101     TableDescriptorAndModtime(final long modtime, final TableDescriptor td) {
102       this.td = td;
103       this.modtime = modtime;
104     }
105 
106     long getModtime() {
107       return this.modtime;
108     }
109 
110     TableDescriptor getTableDescriptor() {
111       return this.td;
112     }
113 
114     HTableDescriptor getHTableDescriptor() {
115       return this.td.getHTableDescriptor();
116     }
117 
118     TableState.State getTableState() {
119       return this.td.getTableState();
120     }
121   }
122 
123   /**
124    * Construct a FSTableDescriptors instance using the hbase root dir of the given
125    * conf and the filesystem where that root dir lives.
126    * This instance can do write operations (is not read only).
127    */
128   public FSTableDescriptors(final Configuration conf) throws IOException {
129     this(FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf));
130   }
131   
132   public FSTableDescriptors(final FileSystem fs, final Path rootdir) {
133     this(fs, rootdir, false);
134   }
135 
136   /**
137    * @param fsreadonly True if we are read-only when it comes to filesystem
138    * operations; i.e. on remove, we do not do delete in fs.
139    */
140   public FSTableDescriptors(final FileSystem fs,
141       final Path rootdir, final boolean fsreadonly) {
142     super();
143     this.fs = fs;
144     this.rootdir = rootdir;
145     this.fsreadonly = fsreadonly;
146   }
147 
148   /**
149    * Get the current table descriptor for the given table, or null if none exists.
150    * 
151    * Uses a local cache of the descriptor but still checks the filesystem on each call
152    * to see if a newer file has been created since the cached one was read.
153    */
154   @Override
155   @Nullable
156   public TableDescriptor getDescriptor(final TableName tablename)
157   throws IOException {
158     invocations++;
159     if (HTableDescriptor.META_TABLEDESC.getTableName().equals(tablename)) {
160       cachehits++;
161       return new TableDescriptor(HTableDescriptor.META_TABLEDESC, TableState.State.ENABLED);
162     }
163     // hbase:meta is already handled. If some one tries to get the descriptor for
164     // .logs, .oldlogs or .corrupt throw an exception.
165     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename.getNameAsString())) {
166        throw new IOException("No descriptor found for non table = " + tablename);
167     }
168 
169     // Look in cache of descriptors.
170     TableDescriptorAndModtime cachedtdm = this.cache.get(tablename);
171 
172     if (cachedtdm != null) {
173       // Check mod time has not changed (this is trip to NN).
174       if (getTableInfoModtime(tablename) <= cachedtdm.getModtime()) {
175         cachehits++;
176         return cachedtdm.getTableDescriptor();
177       }
178     }
179     
180     TableDescriptorAndModtime tdmt = null;
181     try {
182       tdmt = getTableDescriptorAndModtime(tablename);
183     } catch (NullPointerException e) {
184       LOG.debug("Exception during readTableDecriptor. Current table name = "
185           + tablename, e);
186     } catch (IOException ioe) {
187       LOG.debug("Exception during readTableDecriptor. Current table name = "
188           + tablename, ioe);
189     }
190     
191     if (tdmt != null) {
192       this.cache.put(tablename, tdmt);
193     }
194     return tdmt == null ? null : tdmt.getTableDescriptor();
195   }
196 
197   /**
198    * Get the current table descriptor for the given table, or null if none exists.
199    *
200    * Uses a local cache of the descriptor but still checks the filesystem on each call
201    * to see if a newer file has been created since the cached one was read.
202    */
203   @Override
204   public HTableDescriptor get(TableName tableName) throws IOException {
205     if (HTableDescriptor.META_TABLEDESC.getTableName().equals(tableName)) {
206       cachehits++;
207       return HTableDescriptor.META_TABLEDESC;
208     }
209     TableDescriptor descriptor = getDescriptor(tableName);
210     return descriptor == null ? null : descriptor.getHTableDescriptor();
211   }
212 
213   /**
214    * Returns a map from table name to table descriptor for all tables.
215    */
216   @Override
217   public Map<String, TableDescriptor> getAllDescriptors()
218   throws IOException {
219     Map<String, TableDescriptor> tds = new TreeMap<String, TableDescriptor>();
220     List<Path> tableDirs = FSUtils.getTableDirs(fs, rootdir);
221     for (Path d: tableDirs) {
222       TableDescriptor htd = null;
223       try {
224         htd = getDescriptor(FSUtils.getTableName(d));
225       } catch (FileNotFoundException fnfe) {
226         // inability of retrieving one HTD shouldn't stop getting the remaining
227         LOG.warn("Trouble retrieving htd", fnfe);
228       }
229       if (htd == null) continue;
230       tds.put(htd.getHTableDescriptor().getTableName().getNameAsString(), htd);
231     }
232     return tds;
233   }
234 
235   /**
236    * Returns a map from table name to table descriptor for all tables.
237    */
238   @Override
239   public Map<String, HTableDescriptor> getAll() throws IOException {
240     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
241     Map<String, TableDescriptor> allDescriptors = getAllDescriptors();
242     for (Map.Entry<String, TableDescriptor> entry : allDescriptors
243         .entrySet()) {
244       htds.put(entry.getKey(), entry.getValue().getHTableDescriptor());
245     }
246     return htds;
247   }
248 
249   /**
250     * Find descriptors by namespace.
251     * @see #get(org.apache.hadoop.hbase.TableName)
252     */
253   @Override
254   public Map<String, HTableDescriptor> getByNamespace(String name)
255   throws IOException {
256     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
257     List<Path> tableDirs =
258         FSUtils.getLocalTableDirs(fs, FSUtils.getNamespaceDir(rootdir, name));
259     for (Path d: tableDirs) {
260       HTableDescriptor htd = null;
261       try {
262         htd = get(FSUtils.getTableName(d));
263       } catch (FileNotFoundException fnfe) {
264         // inability of retrieving one HTD shouldn't stop getting the remaining
265         LOG.warn("Trouble retrieving htd", fnfe);
266       }
267       if (htd == null) continue;
268       htds.put(FSUtils.getTableName(d).getNameAsString(), htd);
269     }
270     return htds;
271   }
272 
273   /**
274    * Adds (or updates) the table descriptor to the FileSystem
275    * and updates the local cache with it.
276    */
277   @Override
278   public void add(TableDescriptor htd) throws IOException {
279     if (fsreadonly) {
280       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
281     }
282     TableName tableName = htd.getHTableDescriptor().getTableName();
283     if (TableName.META_TABLE_NAME.equals(tableName)) {
284       throw new NotImplementedException();
285     }
286     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) {
287       throw new NotImplementedException(
288         "Cannot add a table descriptor for a reserved subdirectory name: "
289             + htd.getHTableDescriptor().getNameAsString());
290     }
291     updateTableDescriptor(htd);
292   }
293 
294   /**
295    * Adds (or updates) the table descriptor to the FileSystem
296    * and updates the local cache with it.
297    */
298   @Override
299   public void add(HTableDescriptor htd) throws IOException {
300     if (fsreadonly) {
301       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
302     }
303     TableName tableName = htd.getTableName();
304     if (TableName.META_TABLE_NAME.equals(tableName)) {
305       throw new NotImplementedException();
306     }
307     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) {
308       throw new NotImplementedException(
309           "Cannot add a table descriptor for a reserved subdirectory name: "
310               + htd.getNameAsString());
311     }
312     TableDescriptor descriptor = getDescriptor(htd.getTableName());
313     if (descriptor == null)
314       descriptor = new TableDescriptor(htd);
315     else
316       descriptor.setHTableDescriptor(htd);
317     updateTableDescriptor(descriptor);
318   }
319 
320   /**
321    * Removes the table descriptor from the local cache and returns it.
322    * If not in read only mode, it also deletes the entire table directory(!)
323    * from the FileSystem.
324    */
325   @Override
326   public HTableDescriptor remove(final TableName tablename)
327   throws IOException {
328     if (fsreadonly) {
329       throw new NotImplementedException("Cannot remove a table descriptor - in read only mode");
330     }
331     Path tabledir = getTableDir(tablename);
332     if (this.fs.exists(tabledir)) {
333       if (!this.fs.delete(tabledir, true)) {
334         throw new IOException("Failed delete of " + tabledir.toString());
335       }
336     }
337     TableDescriptorAndModtime tdm = this.cache.remove(tablename);
338     return tdm == null ? null : tdm.getHTableDescriptor();
339   }
340 
341   /**
342    * Checks if a current table info file exists for the given table
343    * 
344    * @param tableName name of table
345    * @return true if exists
346    * @throws IOException
347    */
348   public boolean isTableInfoExists(TableName tableName) throws IOException {
349     return getTableInfoPath(tableName) != null;
350   }
351   
352   /**
353    * Find the most current table info file for the given table in the hbase root directory.
354    * @return The file status of the current table info file or null if it does not exist
355    */
356   private FileStatus getTableInfoPath(final TableName tableName) throws IOException {
357     Path tableDir = getTableDir(tableName);
358     return getTableInfoPath(tableDir);
359   }
360 
361   private FileStatus getTableInfoPath(Path tableDir)
362   throws IOException {
363     return getTableInfoPath(fs, tableDir, !fsreadonly);
364   }
365   
366   /**
367    * Find the most current table info file for the table located in the given table directory.
368    * 
369    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
370    * files and takes the 'current' one - meaning the one with the highest sequence number if present
371    * or no sequence number at all if none exist (for backward compatibility from before there
372    * were sequence numbers).
373    * 
374    * @return The file status of the current table info file or null if it does not exist
375    * @throws IOException
376    */
377   public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir)
378   throws IOException {
379     return getTableInfoPath(fs, tableDir, false);
380   }
381   
382   /**
383    * Find the most current table info file for the table in the given table directory.
384    * 
385    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
386    * files and takes the 'current' one - meaning the one with the highest sequence number if
387    * present or no sequence number at all if none exist (for backward compatibility from before
388    * there were sequence numbers).
389    * If there are multiple table info files found and removeOldFiles is true it also deletes the
390    * older files.
391    * 
392    * @return The file status of the current table info file or null if none exist
393    * @throws IOException
394    */
395   private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles)
396   throws IOException {
397     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
398     return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles);
399   }
400   
401   /**
402    * Find the most current table info file in the given directory
403    * 
404    * Looks within the given directory for any table info files
405    * and takes the 'current' one - meaning the one with the highest sequence number if present
406    * or no sequence number at all if none exist (for backward compatibility from before there
407    * were sequence numbers).
408    * If there are multiple possible files found
409    * and the we're not in read only mode it also deletes the older files.
410    * 
411    * @return The file status of the current table info file or null if it does not exist
412    * @throws IOException
413    */
414   // only visible for FSTableDescriptorMigrationToSubdir, can be removed with that
415   static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir, boolean removeOldFiles)
416   throws IOException {
417     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
418     if (status == null || status.length < 1) return null;
419     FileStatus mostCurrent = null;
420     for (FileStatus file : status) {
421       if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) {
422         mostCurrent = file;
423       }
424     }
425     if (removeOldFiles && status.length > 1) {
426       // Clean away old versions
427       for (FileStatus file : status) {
428         Path path = file.getPath();
429         if (file != mostCurrent) {
430           if (!fs.delete(file.getPath(), false)) {
431             LOG.warn("Failed cleanup of " + path);
432           } else {
433             LOG.debug("Cleaned up old tableinfo file " + path);
434           }
435         }
436       }
437     }
438     return mostCurrent;
439   }
440   
441   /**
442    * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in
443    * reverse order.
444    */
445   @VisibleForTesting
446   static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR =
447   new Comparator<FileStatus>() {
448     @Override
449     public int compare(FileStatus left, FileStatus right) {
450       return right.compareTo(left);
451     }};
452 
453   /**
454    * Return the table directory in HDFS
455    */
456   @VisibleForTesting Path getTableDir(final TableName tableName) {
457     return FSUtils.getTableDir(rootdir, tableName);
458   }
459 
460   private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() {
461     @Override
462     public boolean accept(Path p) {
463       // Accept any file that starts with TABLEINFO_NAME
464       return p.getName().startsWith(TABLEINFO_FILE_PREFIX);
465     }}; 
466 
467   /**
468    * Width of the sequenceid that is a suffix on a tableinfo file.
469    */
470   @VisibleForTesting static final int WIDTH_OF_SEQUENCE_ID = 10;
471 
472   /*
473    * @param number Number to use as suffix.
474    * @return Returns zero-prefixed decimal version of passed
475    * number (Does absolute in case number is negative).
476    */
477   private static String formatTableInfoSequenceId(final int number) {
478     byte [] b = new byte[WIDTH_OF_SEQUENCE_ID];
479     int d = Math.abs(number);
480     for (int i = b.length - 1; i >= 0; i--) {
481       b[i] = (byte)((d % 10) + '0');
482       d /= 10;
483     }
484     return Bytes.toString(b);
485   }
486 
487   /**
488    * Regex to eat up sequenceid suffix on a .tableinfo file.
489    * Use regex because may encounter oldstyle .tableinfos where there is no
490    * sequenceid on the end.
491    */
492   private static final Pattern TABLEINFO_FILE_REGEX =
493     Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$");
494 
495   /**
496    * @param p Path to a <code>.tableinfo</code> file.
497    * @return The current editid or 0 if none found.
498    */
499   @VisibleForTesting static int getTableInfoSequenceId(final Path p) {
500     if (p == null) return 0;
501     Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName());
502     if (!m.matches()) throw new IllegalArgumentException(p.toString());
503     String suffix = m.group(2);
504     if (suffix == null || suffix.length() <= 0) return 0;
505     return Integer.parseInt(m.group(2));
506   }
507 
508   /**
509    * @param tabledir
510    * @param sequenceid
511    * @return Name of tableinfo file.
512    */
513   @VisibleForTesting static String getTableInfoFileName(final int sequenceid) {
514     return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid);
515   }
516 
517   /**
518    * @param fs
519    * @param rootdir
520    * @param tableName
521    * @return Modification time for the table {@link #TABLEINFO_FILE_PREFIX} file
522    * or <code>0</code> if no tableinfo file found.
523    * @throws IOException
524    */
525   private long getTableInfoModtime(final TableName tableName) throws IOException {
526     FileStatus status = getTableInfoPath(tableName);
527     return status == null ? 0 : status.getModificationTime();
528   }
529 
530   /**
531    * Returns the latest table descriptor for the given table directly from the file system
532    * if it exists, bypassing the local cache.
533    * Returns null if it's not found.
534    */
535   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs,
536       Path hbaseRootDir, TableName tableName) throws IOException {
537     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
538     return getTableDescriptorFromFs(fs, tableDir);
539   }
540 
541   /**
542    * Returns the latest table descriptor for the table located at the given directory
543    * directly from the file system if it exists.
544    * @throws TableInfoMissingException if there is no descriptor
545    */
546   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir)
547   throws IOException {
548     FileStatus status = getTableInfoPath(fs, tableDir, false);
549     if (status == null) {
550       throw new TableInfoMissingException("No table descriptor file under " + tableDir);
551     }
552     return readTableDescriptor(fs, status, false);
553   }
554   
555   /**
556    * @param tableName table name
557    * @return TableDescriptorAndModtime or null if no table descriptor was found
558    * @throws IOException
559    */
560   private TableDescriptorAndModtime getTableDescriptorAndModtime(TableName tableName)
561   throws IOException {
562     // ignore both -ROOT- and hbase:meta tables
563     if (tableName.equals(TableName.META_TABLE_NAME)) {
564       return null;
565     }
566     return getTableDescriptorAndModtime(getTableDir(tableName));
567   }
568 
569   /**
570    * @param tableDir path to table directory
571    * @return TableDescriptorAndModtime or null if no table descriptor was found
572    * at the specified path
573    * @throws IOException
574    */
575   private TableDescriptorAndModtime getTableDescriptorAndModtime(Path tableDir)
576   throws IOException {
577     FileStatus status = getTableInfoPath(tableDir);
578     if (status == null) {
579       return null;
580     }
581     TableDescriptor td = readTableDescriptor(fs, status, !fsreadonly);
582     return new TableDescriptorAndModtime(status.getModificationTime(), td);
583   }
584 
585   private static TableDescriptor readTableDescriptor(FileSystem fs, FileStatus status,
586       boolean rewritePb) throws IOException {
587     int len = Ints.checkedCast(status.getLen());
588     byte [] content = new byte[len];
589     FSDataInputStream fsDataInputStream = fs.open(status.getPath());
590     try {
591       fsDataInputStream.readFully(content);
592     } finally {
593       fsDataInputStream.close();
594     }
595     TableDescriptor td = null;
596     try {
597       td = TableDescriptor.parseFrom(content);
598     } catch (DeserializationException e) {
599       // we have old HTableDescriptor here
600       try {
601         HTableDescriptor htd = HTableDescriptor.parseFrom(content);
602         LOG.warn("Found old table descriptor, converting to new format for table " +
603             htd.getTableName() + "; NOTE table will be in ENABLED state!");
604         td = new TableDescriptor(htd, TableState.State.ENABLED);
605         if (rewritePb) rewriteTableDescriptor(fs, status, td);
606       } catch (DeserializationException e1) {
607         throw new IOException("content=" + Bytes.toShort(content), e);
608       }
609     }
610     if (rewritePb && !ProtobufUtil.isPBMagicPrefix(content)) {
611       // Convert the file over to be pb before leaving here.
612       rewriteTableDescriptor(fs, status, td);
613     }
614     return td;
615   }
616 
617   private static void rewriteTableDescriptor(final FileSystem fs, final FileStatus status,
618       final TableDescriptor td)
619   throws IOException {
620     Path tableInfoDir = status.getPath().getParent();
621     Path tableDir = tableInfoDir.getParent();
622     writeTableDescriptor(fs, td, tableDir, status);
623   }
624 
625   /**
626    * Update table descriptor on the file system
627    * @throws IOException Thrown if failed update.
628    * @throws NotImplementedException if in read only mode
629    */
630   @VisibleForTesting Path updateTableDescriptor(TableDescriptor td)
631   throws IOException {
632     if (fsreadonly) {
633       throw new NotImplementedException("Cannot update a table descriptor - in read only mode");
634     }
635     TableName tableName = td.getHTableDescriptor().getTableName();
636     Path tableDir = getTableDir(tableName);
637     Path p = writeTableDescriptor(fs, td, tableDir, getTableInfoPath(tableDir));
638     if (p == null) throw new IOException("Failed update");
639     LOG.info("Updated tableinfo=" + p);
640     long modtime = getTableInfoModtime(tableName);
641     this.cache.put(tableName, new TableDescriptorAndModtime(modtime, td));
642     return p;
643   }
644 
645   /**
646    * Deletes all the table descriptor files from the file system.
647    * Used in unit tests only.
648    * @throws NotImplementedException if in read only mode
649    */
650   public void deleteTableDescriptorIfExists(TableName tableName) throws IOException {
651     if (fsreadonly) {
652       throw new NotImplementedException("Cannot delete a table descriptor - in read only mode");
653     }
654    
655     Path tableDir = getTableDir(tableName);
656     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
657     deleteTableDescriptorFiles(fs, tableInfoDir, Integer.MAX_VALUE);
658   }
659 
660   /**
661    * Deletes files matching the table info file pattern within the given directory 
662    * whose sequenceId is at most the given max sequenceId.
663    */
664   private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId)
665   throws IOException {
666     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
667     for (FileStatus file : status) {
668       Path path = file.getPath();
669       int sequenceId = getTableInfoSequenceId(path);
670       if (sequenceId <= maxSequenceId) {
671         boolean success = FSUtils.delete(fs, path, false);
672         if (success) {
673           LOG.debug("Deleted table descriptor at " + path);
674         } else {
675           LOG.error("Failed to delete descriptor at " + path);
676         }
677       }
678     }
679   }
680   
681   /**
682    * Attempts to write a new table descriptor to the given table's directory.
683    * It first writes it to the .tmp dir then uses an atomic rename to move it into place.
684    * It begins at the currentSequenceId + 1 and tries 10 times to find a new sequence number
685    * not already in use.
686    * Removes the current descriptor file if passed in.
687    * 
688    * @return Descriptor file or null if we failed write.
689    */
690   private static Path writeTableDescriptor(final FileSystem fs, 
691     final TableDescriptor htd, final Path tableDir,
692     final FileStatus currentDescriptorFile)
693   throws IOException {  
694     // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon.
695     // This directory is never removed to avoid removing it out from under a concurrent writer.
696     Path tmpTableDir = new Path(tableDir, TMP_DIR);
697     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
698     
699     // What is current sequenceid?  We read the current sequenceid from
700     // the current file.  After we read it, another thread could come in and
701     // compete with us writing out next version of file.  The below retries
702     // should help in this case some but its hard to do guarantees in face of
703     // concurrent schema edits.
704     int currentSequenceId = currentDescriptorFile == null ? 0 :
705       getTableInfoSequenceId(currentDescriptorFile.getPath());
706     int newSequenceId = currentSequenceId;
707     
708     // Put arbitrary upperbound on how often we retry
709     int retries = 10;
710     int retrymax = currentSequenceId + retries;
711     Path tableInfoDirPath = null;
712     do {
713       newSequenceId += 1;
714       String filename = getTableInfoFileName(newSequenceId);
715       Path tempPath = new Path(tmpTableDir, filename);
716       if (fs.exists(tempPath)) {
717         LOG.debug(tempPath + " exists; retrying up to " + retries + " times");
718         continue;
719       }
720       tableInfoDirPath = new Path(tableInfoDir, filename);
721       try {
722         writeTD(fs, tempPath, htd);
723         fs.mkdirs(tableInfoDirPath.getParent());
724         if (!fs.rename(tempPath, tableInfoDirPath)) {
725           throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath);
726         }
727         LOG.debug("Wrote descriptor into: " + tableInfoDirPath);
728       } catch (IOException ioe) {
729         // Presume clash of names or something; go around again.
730         LOG.debug("Failed write and/or rename; retrying", ioe);
731         if (!FSUtils.deleteDirectory(fs, tempPath)) {
732           LOG.warn("Failed cleanup of " + tempPath);
733         }
734         tableInfoDirPath = null;
735         continue;
736       }
737       break;
738     } while (newSequenceId < retrymax);
739     if (tableInfoDirPath != null) {
740       // if we succeeded, remove old table info files.
741       deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1);
742     }
743     return tableInfoDirPath;
744   }
745   
746   private static void writeTD(final FileSystem fs, final Path p, final TableDescriptor htd)
747   throws IOException {
748     FSDataOutputStream out = fs.create(p, false);
749     try {
750       // We used to write this file out as a serialized HTD Writable followed by two '\n's and then
751       // the toString version of HTD.  Now we just write out the pb serialization.
752       out.write(htd.toByteArray());
753     } finally {
754       out.close();
755     }
756   }
757 
758   /**
759    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
760    * Used by tests.
761    * @return True if we successfully created file.
762    */
763   public boolean createTableDescriptor(TableDescriptor htd) throws IOException {
764     return createTableDescriptor(htd, false);
765   }
766 
767   /**
768    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
769    * Used by tests.
770    * @return True if we successfully created file.
771    */
772   public boolean createTableDescriptor(HTableDescriptor htd) throws IOException {
773     return createTableDescriptor(new TableDescriptor(htd), false);
774   }
775 
776   /**
777    * Create new HTableDescriptor in HDFS. Happens when we are creating table. If
778    * forceCreation is true then even if previous table descriptor is present it
779    * will be overwritten
780    * 
781    * @return True if we successfully created file.
782    */
783   public boolean createTableDescriptor(TableDescriptor htd, boolean forceCreation)
784   throws IOException {
785     Path tableDir = getTableDir(htd.getHTableDescriptor().getTableName());
786     return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation);
787   }
788 
789   /**
790    * Create tables descriptor for given HTableDescriptor. Default TableDescriptor state
791    * will be used (typically ENABLED).
792    */
793   public boolean createTableDescriptor(HTableDescriptor htd, boolean forceCreation)
794       throws IOException {
795     return createTableDescriptor(new TableDescriptor(htd), forceCreation);
796   }
797 
798   /**
799    * Create a new HTableDescriptor in HDFS in the specified table directory. Happens when we create
800    * a new table or snapshot a table.
801    * @param tableDir table directory under which we should write the file
802    * @param htd description of the table to write
803    * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
804    *          be overwritten
805    * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
806    *         already exists and we weren't forcing the descriptor creation.
807    * @throws IOException if a filesystem error occurs
808    */
809   public boolean createTableDescriptorForTableDirectory(Path tableDir,
810       TableDescriptor htd, boolean forceCreation) throws IOException {
811     if (fsreadonly) {
812       throw new NotImplementedException("Cannot create a table descriptor - in read only mode");
813     }
814     FileStatus status = getTableInfoPath(fs, tableDir);
815     if (status != null) {
816       LOG.debug("Current tableInfoPath = " + status.getPath());
817       if (!forceCreation) {
818         if (fs.exists(status.getPath()) && status.getLen() > 0) {
819           if (readTableDescriptor(fs, status, false).equals(htd)) {
820             LOG.debug("TableInfo already exists.. Skipping creation");
821             return false;
822           }
823         }
824       }
825     }
826     Path p = writeTableDescriptor(fs, htd, tableDir, status);
827     return p != null;
828   }
829   
830 }
831