View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import javax.annotation.Nullable;
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.List;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.ConcurrentHashMap;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  import com.google.common.annotations.VisibleForTesting;
32  import com.google.common.primitives.Ints;
33  import org.apache.commons.lang.NotImplementedException;
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.hbase.classification.InterfaceAudience;
37  import org.apache.hadoop.conf.Configuration;
38  import org.apache.hadoop.fs.FSDataInputStream;
39  import org.apache.hadoop.fs.FSDataOutputStream;
40  import org.apache.hadoop.fs.FileStatus;
41  import org.apache.hadoop.fs.FileSystem;
42  import org.apache.hadoop.fs.Path;
43  import org.apache.hadoop.fs.PathFilter;
44  import org.apache.hadoop.hbase.Coprocessor;
45  import org.apache.hadoop.hbase.HColumnDescriptor;
46  import org.apache.hadoop.hbase.HConstants;
47  import org.apache.hadoop.hbase.HTableDescriptor;
48  import org.apache.hadoop.hbase.TableDescriptor;
49  import org.apache.hadoop.hbase.TableDescriptors;
50  import org.apache.hadoop.hbase.TableInfoMissingException;
51  import org.apache.hadoop.hbase.TableName;
52  import org.apache.hadoop.hbase.client.TableState;
53  import org.apache.hadoop.hbase.exceptions.DeserializationException;
54  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
55  import org.apache.hadoop.hbase.regionserver.BloomType;
56  
57  /**
58   * Implementation of {@link TableDescriptors} that reads descriptors from the
59   * passed filesystem.  It expects descriptors to be in a file in the
60   * {@link #TABLEINFO_DIR} subdir of the table's directory in FS.  Can be read-only
61   *  -- i.e. does not modify the filesystem or can be read and write.
62   *
63   * <p>Also has utility for keeping up the table descriptors tableinfo file.
64   * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir
65   * of the table directory in the filesystem.
66   * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the
67   * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>.  This sequenceid
68   * is always increasing.  It starts at zero.  The table schema file with the
69   * highest sequenceid has the most recent schema edit. Usually there is one file
70   * only, the most recent but there may be short periods where there are more
71   * than one file. Old files are eventually cleaned.  Presumption is that there
72   * will not be lots of concurrent clients making table schema edits.  If so,
73   * the below needs a bit of a reworking and perhaps some supporting api in hdfs.
74   */
75  @InterfaceAudience.Private
76  public class FSTableDescriptors implements TableDescriptors {
77    private static final Log LOG = LogFactory.getLog(FSTableDescriptors.class);
78    private final FileSystem fs;
79    private final Path rootdir;
80    private final boolean fsreadonly;
81    private volatile boolean usecache;
82    private volatile boolean fsvisited;
83  
84    @VisibleForTesting long cachehits = 0;
85    @VisibleForTesting long invocations = 0;
86  
87    /** The file name prefix used to store HTD in HDFS  */
88    static final String TABLEINFO_FILE_PREFIX = ".tableinfo";
89    static final String TABLEINFO_DIR = ".tabledesc";
90    static final String TMP_DIR = ".tmp";
91  
92    // This cache does not age out the old stuff.  Thinking is that the amount
93    // of data we keep up in here is so small, no need to do occasional purge.
94    // TODO.
95    private final Map<TableName, TableDescriptor> cache =
96      new ConcurrentHashMap<TableName, TableDescriptor>();
97  
98    /**
99     * Table descriptor for <code>hbase:meta</code> catalog table
100    */
101   private final HTableDescriptor metaTableDescritor;
102 
103   /**
104    * Construct a FSTableDescriptors instance using the hbase root dir of the given
105    * conf and the filesystem where that root dir lives.
106    * This instance can do write operations (is not read only).
107    */
108   public FSTableDescriptors(final Configuration conf) throws IOException {
109     this(conf, FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf));
110   }
111 
112   public FSTableDescriptors(final Configuration conf, final FileSystem fs, final Path rootdir)
113   throws IOException {
114     this(conf, fs, rootdir, false, true);
115   }
116 
117   /**
118    * @param fsreadonly True if we are read-only when it comes to filesystem
119    * operations; i.e. on remove, we do not do delete in fs.
120    */
121   public FSTableDescriptors(final Configuration conf, final FileSystem fs,
122     final Path rootdir, final boolean fsreadonly, final boolean usecache) throws IOException {
123     super();
124     this.fs = fs;
125     this.rootdir = rootdir;
126     this.fsreadonly = fsreadonly;
127     this.usecache = usecache;
128 
129     this.metaTableDescritor = TableDescriptor.metaTableDescriptor(conf);
130   }
131 
132   public void setCacheOn() throws IOException {
133     this.cache.clear();
134     this.usecache = true;
135   }
136 
137   public void setCacheOff() throws IOException {
138     this.usecache = false;
139     this.cache.clear();
140   }
141 
142   @VisibleForTesting
143   public boolean isUsecache() {
144     return this.usecache;
145   }
146 
147   /**
148    * Get the current table descriptor for the given table, or null if none exists.
149    *
150    * Uses a local cache of the descriptor but still checks the filesystem on each call
151    * to see if a newer file has been created since the cached one was read.
152    */
153   @Override
154   @Nullable
155   public TableDescriptor getDescriptor(final TableName tablename)
156   throws IOException {
157     invocations++;
158     if (TableName.META_TABLE_NAME.equals(tablename)) {
159       cachehits++;
160       return new TableDescriptor(metaTableDescritor, TableState.State.ENABLED);
161     }
162     // hbase:meta is already handled. If some one tries to get the descriptor for
163     // .logs, .oldlogs or .corrupt throw an exception.
164     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename.getNameAsString())) {
165        throw new IOException("No descriptor found for non table = " + tablename);
166     }
167 
168     if (usecache) {
169       // Look in cache of descriptors.
170       TableDescriptor cachedtdm = this.cache.get(tablename);
171       if (cachedtdm != null) {
172         cachehits++;
173         return cachedtdm;
174       }
175     }
176     TableDescriptor tdmt = null;
177     try {
178       tdmt = getTableDescriptorFromFs(fs, rootdir, tablename, !fsreadonly);
179     } catch (NullPointerException e) {
180       LOG.debug("Exception during readTableDecriptor. Current table name = "
181           + tablename, e);
182     } catch (IOException ioe) {
183       LOG.debug("Exception during readTableDecriptor. Current table name = "
184           + tablename, ioe);
185     }
186     // last HTD written wins
187     if (usecache && tdmt != null) {
188       this.cache.put(tablename, tdmt);
189     }
190 
191     return tdmt;
192   }
193 
194   /**
195    * Get the current table descriptor for the given table, or null if none exists.
196    *
197    * Uses a local cache of the descriptor but still checks the filesystem on each call
198    * to see if a newer file has been created since the cached one was read.
199    */
200   @Override
201   public HTableDescriptor get(TableName tableName) throws IOException {
202     if (TableName.META_TABLE_NAME.equals(tableName)) {
203       cachehits++;
204       return metaTableDescritor;
205     }
206     TableDescriptor descriptor = getDescriptor(tableName);
207     return descriptor == null ? null : descriptor.getHTableDescriptor();
208   }
209 
210   /**
211    * Returns a map from table name to table descriptor for all tables.
212    */
213   @Override
214   public Map<String, TableDescriptor> getAllDescriptors()
215   throws IOException {
216     Map<String, TableDescriptor> tds = new TreeMap<String, TableDescriptor>();
217 
218     if (fsvisited && usecache) {
219       for (Map.Entry<TableName, TableDescriptor> entry: this.cache.entrySet()) {
220         tds.put(entry.getKey().toString(), entry.getValue());
221       }
222       // add hbase:meta to the response
223       tds.put(this.metaTableDescritor.getNameAsString(),
224         new TableDescriptor(metaTableDescritor, TableState.State.ENABLED));
225     } else {
226       LOG.debug("Fetching table descriptors from the filesystem.");
227       boolean allvisited = true;
228       for (Path d : FSUtils.getTableDirs(fs, rootdir)) {
229         TableDescriptor htd = null;
230         try {
231           htd = getDescriptor(FSUtils.getTableName(d));
232         } catch (FileNotFoundException fnfe) {
233           // inability of retrieving one HTD shouldn't stop getting the remaining
234           LOG.warn("Trouble retrieving htd", fnfe);
235         }
236         if (htd == null) {
237           allvisited = false;
238           continue;
239         } else {
240           tds.put(htd.getHTableDescriptor().getTableName().getNameAsString(), htd);
241         }
242         fsvisited = allvisited;
243       }
244     }
245     return tds;
246   }
247 
248   /**
249    * Returns a map from table name to table descriptor for all tables.
250    */
251   @Override
252   public Map<String, HTableDescriptor> getAll() throws IOException {
253     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
254     Map<String, TableDescriptor> allDescriptors = getAllDescriptors();
255     for (Map.Entry<String, TableDescriptor> entry : allDescriptors
256         .entrySet()) {
257       htds.put(entry.getKey(), entry.getValue().getHTableDescriptor());
258     }
259     return htds;
260   }
261 
262   /**
263     * Find descriptors by namespace.
264     * @see #get(org.apache.hadoop.hbase.TableName)
265     */
266   @Override
267   public Map<String, HTableDescriptor> getByNamespace(String name)
268   throws IOException {
269     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
270     List<Path> tableDirs =
271         FSUtils.getLocalTableDirs(fs, FSUtils.getNamespaceDir(rootdir, name));
272     for (Path d: tableDirs) {
273       HTableDescriptor htd = null;
274       try {
275         htd = get(FSUtils.getTableName(d));
276       } catch (FileNotFoundException fnfe) {
277         // inability of retrieving one HTD shouldn't stop getting the remaining
278         LOG.warn("Trouble retrieving htd", fnfe);
279       }
280       if (htd == null) continue;
281       htds.put(FSUtils.getTableName(d).getNameAsString(), htd);
282     }
283     return htds;
284   }
285 
286   /**
287    * Adds (or updates) the table descriptor to the FileSystem
288    * and updates the local cache with it.
289    */
290   @Override
291   public void add(TableDescriptor htd) throws IOException {
292     if (fsreadonly) {
293       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
294     }
295     TableName tableName = htd.getHTableDescriptor().getTableName();
296     if (TableName.META_TABLE_NAME.equals(tableName)) {
297       throw new NotImplementedException();
298     }
299     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) {
300       throw new NotImplementedException(
301         "Cannot add a table descriptor for a reserved subdirectory name: "
302             + htd.getHTableDescriptor().getNameAsString());
303     }
304     updateTableDescriptor(htd);
305   }
306 
307   /**
308    * Adds (or updates) the table descriptor to the FileSystem
309    * and updates the local cache with it.
310    */
311   @Override
312   public void add(HTableDescriptor htd) throws IOException {
313     if (fsreadonly) {
314       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
315     }
316     TableName tableName = htd.getTableName();
317     if (TableName.META_TABLE_NAME.equals(tableName)) {
318       throw new NotImplementedException();
319     }
320     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) {
321       throw new NotImplementedException(
322           "Cannot add a table descriptor for a reserved subdirectory name: "
323               + htd.getNameAsString());
324     }
325     TableDescriptor descriptor = getDescriptor(htd.getTableName());
326     if (descriptor == null)
327       descriptor = new TableDescriptor(htd);
328     else
329       descriptor.setHTableDescriptor(htd);
330     updateTableDescriptor(descriptor);
331   }
332 
333   /**
334    * Removes the table descriptor from the local cache and returns it.
335    * If not in read only mode, it also deletes the entire table directory(!)
336    * from the FileSystem.
337    */
338   @Override
339   public HTableDescriptor remove(final TableName tablename)
340   throws IOException {
341     if (fsreadonly) {
342       throw new NotImplementedException("Cannot remove a table descriptor - in read only mode");
343     }
344     Path tabledir = getTableDir(tablename);
345     if (this.fs.exists(tabledir)) {
346       if (!this.fs.delete(tabledir, true)) {
347         throw new IOException("Failed delete of " + tabledir.toString());
348       }
349     }
350     TableDescriptor descriptor = this.cache.remove(tablename);
351     if (descriptor == null) {
352       return null;
353     } else {
354       return descriptor.getHTableDescriptor();
355     }
356   }
357 
358   /**
359    * Checks if a current table info file exists for the given table
360    *
361    * @param tableName name of table
362    * @return true if exists
363    * @throws IOException
364    */
365   public boolean isTableInfoExists(TableName tableName) throws IOException {
366     return getTableInfoPath(tableName) != null;
367   }
368 
369   /**
370    * Find the most current table info file for the given table in the hbase root directory.
371    * @return The file status of the current table info file or null if it does not exist
372    */
373   private FileStatus getTableInfoPath(final TableName tableName) throws IOException {
374     Path tableDir = getTableDir(tableName);
375     return getTableInfoPath(tableDir);
376   }
377 
378   private FileStatus getTableInfoPath(Path tableDir)
379   throws IOException {
380     return getTableInfoPath(fs, tableDir, !fsreadonly);
381   }
382 
383   /**
384    * Find the most current table info file for the table located in the given table directory.
385    *
386    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
387    * files and takes the 'current' one - meaning the one with the highest sequence number if present
388    * or no sequence number at all if none exist (for backward compatibility from before there
389    * were sequence numbers).
390    *
391    * @return The file status of the current table info file or null if it does not exist
392    * @throws IOException
393    */
394   public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir)
395   throws IOException {
396     return getTableInfoPath(fs, tableDir, false);
397   }
398 
399   /**
400    * Find the most current table info file for the table in the given table directory.
401    *
402    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
403    * files and takes the 'current' one - meaning the one with the highest sequence number if
404    * present or no sequence number at all if none exist (for backward compatibility from before
405    * there were sequence numbers).
406    * If there are multiple table info files found and removeOldFiles is true it also deletes the
407    * older files.
408    *
409    * @return The file status of the current table info file or null if none exist
410    * @throws IOException
411    */
412   private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles)
413   throws IOException {
414     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
415     return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles);
416   }
417 
418   /**
419    * Find the most current table info file in the given directory
420    *
421    * Looks within the given directory for any table info files
422    * and takes the 'current' one - meaning the one with the highest sequence number if present
423    * or no sequence number at all if none exist (for backward compatibility from before there
424    * were sequence numbers).
425    * If there are multiple possible files found
426    * and the we're not in read only mode it also deletes the older files.
427    *
428    * @return The file status of the current table info file or null if it does not exist
429    * @throws IOException
430    */
431   // only visible for FSTableDescriptorMigrationToSubdir, can be removed with that
432   static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir, boolean removeOldFiles)
433   throws IOException {
434     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
435     if (status == null || status.length < 1) return null;
436     FileStatus mostCurrent = null;
437     for (FileStatus file : status) {
438       if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) {
439         mostCurrent = file;
440       }
441     }
442     if (removeOldFiles && status.length > 1) {
443       // Clean away old versions
444       for (FileStatus file : status) {
445         Path path = file.getPath();
446         if (file != mostCurrent) {
447           if (!fs.delete(file.getPath(), false)) {
448             LOG.warn("Failed cleanup of " + path);
449           } else {
450             LOG.debug("Cleaned up old tableinfo file " + path);
451           }
452         }
453       }
454     }
455     return mostCurrent;
456   }
457 
458   /**
459    * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in
460    * reverse order.
461    */
462   @VisibleForTesting
463   static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR =
464   new Comparator<FileStatus>() {
465     @Override
466     public int compare(FileStatus left, FileStatus right) {
467       return right.compareTo(left);
468     }};
469 
470   /**
471    * Return the table directory in HDFS
472    */
473   @VisibleForTesting Path getTableDir(final TableName tableName) {
474     return FSUtils.getTableDir(rootdir, tableName);
475   }
476 
477   private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() {
478     @Override
479     public boolean accept(Path p) {
480       // Accept any file that starts with TABLEINFO_NAME
481       return p.getName().startsWith(TABLEINFO_FILE_PREFIX);
482     }};
483 
484   /**
485    * Width of the sequenceid that is a suffix on a tableinfo file.
486    */
487   @VisibleForTesting static final int WIDTH_OF_SEQUENCE_ID = 10;
488 
489   /*
490    * @param number Number to use as suffix.
491    * @return Returns zero-prefixed decimal version of passed
492    * number (Does absolute in case number is negative).
493    */
494   private static String formatTableInfoSequenceId(final int number) {
495     byte [] b = new byte[WIDTH_OF_SEQUENCE_ID];
496     int d = Math.abs(number);
497     for (int i = b.length - 1; i >= 0; i--) {
498       b[i] = (byte)((d % 10) + '0');
499       d /= 10;
500     }
501     return Bytes.toString(b);
502   }
503 
504   /**
505    * Regex to eat up sequenceid suffix on a .tableinfo file.
506    * Use regex because may encounter oldstyle .tableinfos where there is no
507    * sequenceid on the end.
508    */
509   private static final Pattern TABLEINFO_FILE_REGEX =
510     Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$");
511 
512   /**
513    * @param p Path to a <code>.tableinfo</code> file.
514    * @return The current editid or 0 if none found.
515    */
516   @VisibleForTesting static int getTableInfoSequenceId(final Path p) {
517     if (p == null) return 0;
518     Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName());
519     if (!m.matches()) throw new IllegalArgumentException(p.toString());
520     String suffix = m.group(2);
521     if (suffix == null || suffix.length() <= 0) return 0;
522     return Integer.parseInt(m.group(2));
523   }
524 
525   /**
526    * @param sequenceid
527    * @return Name of tableinfo file.
528    */
529   @VisibleForTesting static String getTableInfoFileName(final int sequenceid) {
530     return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid);
531   }
532 
533   /**
534    * Returns the latest table descriptor for the given table directly from the file system
535    * if it exists, bypassing the local cache.
536    * Returns null if it's not found.
537    */
538   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs,
539       Path hbaseRootDir, TableName tableName) throws IOException {
540     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
541     return getTableDescriptorFromFs(fs, tableDir);
542   }
543 
544   /**
545    * Returns the latest table descriptor for the given table directly from the file system
546    * if it exists, bypassing the local cache.
547    * Returns null if it's not found.
548    */
549   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs,
550    Path hbaseRootDir, TableName tableName, boolean rewritePb) throws IOException {
551     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
552     return getTableDescriptorFromFs(fs, tableDir, rewritePb);
553   }
554   /**
555    * Returns the latest table descriptor for the table located at the given directory
556    * directly from the file system if it exists.
557    * @throws TableInfoMissingException if there is no descriptor
558    */
559   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir)
560     throws IOException {
561     return getTableDescriptorFromFs(fs, tableDir, false);
562   }
563 
564   /**
565    * Returns the latest table descriptor for the table located at the given directory
566    * directly from the file system if it exists.
567    * @throws TableInfoMissingException if there is no descriptor
568    */
569   public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir,
570     boolean rewritePb)
571   throws IOException {
572     FileStatus status = getTableInfoPath(fs, tableDir, false);
573     if (status == null) {
574       throw new TableInfoMissingException("No table descriptor file under " + tableDir);
575     }
576     return readTableDescriptor(fs, status, rewritePb);
577   }
578 
579   private static TableDescriptor readTableDescriptor(FileSystem fs, FileStatus status,
580       boolean rewritePb) throws IOException {
581     int len = Ints.checkedCast(status.getLen());
582     byte [] content = new byte[len];
583     FSDataInputStream fsDataInputStream = fs.open(status.getPath());
584     try {
585       fsDataInputStream.readFully(content);
586     } finally {
587       fsDataInputStream.close();
588     }
589     TableDescriptor td = null;
590     try {
591       td = TableDescriptor.parseFrom(content);
592     } catch (DeserializationException e) {
593       // we have old HTableDescriptor here
594       try {
595         HTableDescriptor htd = HTableDescriptor.parseFrom(content);
596         LOG.warn("Found old table descriptor, converting to new format for table " +
597             htd.getTableName() + "; NOTE table will be in ENABLED state!");
598         td = new TableDescriptor(htd, TableState.State.ENABLED);
599         if (rewritePb) rewriteTableDescriptor(fs, status, td);
600       } catch (DeserializationException e1) {
601         throw new IOException("content=" + Bytes.toShort(content), e);
602       }
603     }
604     if (rewritePb && !ProtobufUtil.isPBMagicPrefix(content)) {
605       // Convert the file over to be pb before leaving here.
606       rewriteTableDescriptor(fs, status, td);
607     }
608     return td;
609   }
610 
611   private static void rewriteTableDescriptor(final FileSystem fs, final FileStatus status,
612       final TableDescriptor td)
613   throws IOException {
614     Path tableInfoDir = status.getPath().getParent();
615     Path tableDir = tableInfoDir.getParent();
616     writeTableDescriptor(fs, td, tableDir, status);
617   }
618 
619   /**
620    * Update table descriptor on the file system
621    * @throws IOException Thrown if failed update.
622    * @throws NotImplementedException if in read only mode
623    */
624   @VisibleForTesting Path updateTableDescriptor(TableDescriptor td)
625   throws IOException {
626     if (fsreadonly) {
627       throw new NotImplementedException("Cannot update a table descriptor - in read only mode");
628     }
629     TableName tableName = td.getHTableDescriptor().getTableName();
630     Path tableDir = getTableDir(tableName);
631     Path p = writeTableDescriptor(fs, td, tableDir, getTableInfoPath(tableDir));
632     if (p == null) throw new IOException("Failed update");
633     LOG.info("Updated tableinfo=" + p);
634     if (usecache) {
635       this.cache.put(td.getHTableDescriptor().getTableName(), td);
636     }
637     return p;
638   }
639 
640   /**
641    * Deletes all the table descriptor files from the file system.
642    * Used in unit tests only.
643    * @throws NotImplementedException if in read only mode
644    */
645   public void deleteTableDescriptorIfExists(TableName tableName) throws IOException {
646     if (fsreadonly) {
647       throw new NotImplementedException("Cannot delete a table descriptor - in read only mode");
648     }
649 
650     Path tableDir = getTableDir(tableName);
651     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
652     deleteTableDescriptorFiles(fs, tableInfoDir, Integer.MAX_VALUE);
653   }
654 
655   /**
656    * Deletes files matching the table info file pattern within the given directory
657    * whose sequenceId is at most the given max sequenceId.
658    */
659   private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId)
660   throws IOException {
661     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
662     for (FileStatus file : status) {
663       Path path = file.getPath();
664       int sequenceId = getTableInfoSequenceId(path);
665       if (sequenceId <= maxSequenceId) {
666         boolean success = FSUtils.delete(fs, path, false);
667         if (success) {
668           LOG.debug("Deleted table descriptor at " + path);
669         } else {
670           LOG.error("Failed to delete descriptor at " + path);
671         }
672       }
673     }
674   }
675 
676   /**
677    * Attempts to write a new table descriptor to the given table's directory.
678    * It first writes it to the .tmp dir then uses an atomic rename to move it into place.
679    * It begins at the currentSequenceId + 1 and tries 10 times to find a new sequence number
680    * not already in use.
681    * Removes the current descriptor file if passed in.
682    *
683    * @return Descriptor file or null if we failed write.
684    */
685   private static Path writeTableDescriptor(final FileSystem fs,
686     final TableDescriptor htd, final Path tableDir,
687     final FileStatus currentDescriptorFile)
688   throws IOException {
689     // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon.
690     // This directory is never removed to avoid removing it out from under a concurrent writer.
691     Path tmpTableDir = new Path(tableDir, TMP_DIR);
692     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
693 
694     // What is current sequenceid?  We read the current sequenceid from
695     // the current file.  After we read it, another thread could come in and
696     // compete with us writing out next version of file.  The below retries
697     // should help in this case some but its hard to do guarantees in face of
698     // concurrent schema edits.
699     int currentSequenceId = currentDescriptorFile == null ? 0 :
700       getTableInfoSequenceId(currentDescriptorFile.getPath());
701     int newSequenceId = currentSequenceId;
702 
703     // Put arbitrary upperbound on how often we retry
704     int retries = 10;
705     int retrymax = currentSequenceId + retries;
706     Path tableInfoDirPath = null;
707     do {
708       newSequenceId += 1;
709       String filename = getTableInfoFileName(newSequenceId);
710       Path tempPath = new Path(tmpTableDir, filename);
711       if (fs.exists(tempPath)) {
712         LOG.debug(tempPath + " exists; retrying up to " + retries + " times");
713         continue;
714       }
715       tableInfoDirPath = new Path(tableInfoDir, filename);
716       try {
717         writeTD(fs, tempPath, htd);
718         fs.mkdirs(tableInfoDirPath.getParent());
719         if (!fs.rename(tempPath, tableInfoDirPath)) {
720           throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath);
721         }
722         LOG.debug("Wrote descriptor into: " + tableInfoDirPath);
723       } catch (IOException ioe) {
724         // Presume clash of names or something; go around again.
725         LOG.debug("Failed write and/or rename; retrying", ioe);
726         if (!FSUtils.deleteDirectory(fs, tempPath)) {
727           LOG.warn("Failed cleanup of " + tempPath);
728         }
729         tableInfoDirPath = null;
730         continue;
731       }
732       break;
733     } while (newSequenceId < retrymax);
734     if (tableInfoDirPath != null) {
735       // if we succeeded, remove old table info files.
736       deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1);
737     }
738     return tableInfoDirPath;
739   }
740 
741   private static void writeTD(final FileSystem fs, final Path p, final TableDescriptor htd)
742   throws IOException {
743     FSDataOutputStream out = fs.create(p, false);
744     try {
745       // We used to write this file out as a serialized HTD Writable followed by two '\n's and then
746       // the toString version of HTD.  Now we just write out the pb serialization.
747       out.write(htd.toByteArray());
748     } finally {
749       out.close();
750     }
751   }
752 
753   /**
754    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
755    * Used by tests.
756    * @return True if we successfully created file.
757    */
758   public boolean createTableDescriptor(TableDescriptor htd) throws IOException {
759     return createTableDescriptor(htd, false);
760   }
761 
762   /**
763    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
764    * Used by tests.
765    * @return True if we successfully created file.
766    */
767   public boolean createTableDescriptor(HTableDescriptor htd) throws IOException {
768     return createTableDescriptor(new TableDescriptor(htd), false);
769   }
770 
771   /**
772    * Create new HTableDescriptor in HDFS. Happens when we are creating table. If
773    * forceCreation is true then even if previous table descriptor is present it
774    * will be overwritten
775    *
776    * @return True if we successfully created file.
777    */
778   public boolean createTableDescriptor(TableDescriptor htd, boolean forceCreation)
779   throws IOException {
780     Path tableDir = getTableDir(htd.getHTableDescriptor().getTableName());
781     return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation);
782   }
783 
784   /**
785    * Create tables descriptor for given HTableDescriptor. Default TableDescriptor state
786    * will be used (typically ENABLED).
787    */
788   public boolean createTableDescriptor(HTableDescriptor htd, boolean forceCreation)
789       throws IOException {
790     return createTableDescriptor(new TableDescriptor(htd), forceCreation);
791   }
792 
793   /**
794    * Create a new HTableDescriptor in HDFS in the specified table directory. Happens when we create
795    * a new table or snapshot a table.
796    * @param tableDir table directory under which we should write the file
797    * @param htd description of the table to write
798    * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
799    *          be overwritten
800    * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
801    *         already exists and we weren't forcing the descriptor creation.
802    * @throws IOException if a filesystem error occurs
803    */
804   public boolean createTableDescriptorForTableDirectory(Path tableDir,
805       TableDescriptor htd, boolean forceCreation) throws IOException {
806     if (fsreadonly) {
807       throw new NotImplementedException("Cannot create a table descriptor - in read only mode");
808     }
809     FileStatus status = getTableInfoPath(fs, tableDir);
810     if (status != null) {
811       LOG.debug("Current tableInfoPath = " + status.getPath());
812       if (!forceCreation) {
813         if (fs.exists(status.getPath()) && status.getLen() > 0) {
814           if (readTableDescriptor(fs, status, false).equals(htd)) {
815             LOG.debug("TableInfo already exists.. Skipping creation");
816             return false;
817           }
818         }
819       }
820     }
821     Path p = writeTableDescriptor(fs, htd, tableDir, status);
822     return p != null;
823   }
824 
825 }
826