View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.util;
19  
20  import java.io.FileNotFoundException;
21  import java.io.IOException;
22  import java.util.Comparator;
23  import java.util.List;
24  import java.util.Map;
25  import java.util.TreeMap;
26  import java.util.concurrent.ConcurrentHashMap;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29
30  import com.google.common.annotations.VisibleForTesting;
31  import com.google.common.primitives.Ints;
32  import edu.umd.cs.findbugs.annotations.Nullable;
33  import org.apache.commons.lang.NotImplementedException;
34  import org.apache.commons.logging.Log;
35  import org.apache.commons.logging.LogFactory;
36  import org.apache.hadoop.conf.Configuration;
37  import org.apache.hadoop.fs.FSDataInputStream;
38  import org.apache.hadoop.fs.FSDataOutputStream;
39  import org.apache.hadoop.fs.FileStatus;
40  import org.apache.hadoop.fs.FileSystem;
41  import org.apache.hadoop.fs.Path;
42  import org.apache.hadoop.fs.PathFilter;
43  import org.apache.hadoop.hbase.Coprocessor;
44  import org.apache.hadoop.hbase.HColumnDescriptor;
45  import org.apache.hadoop.hbase.HConstants;
46  import org.apache.hadoop.hbase.HTableDescriptor;
47  import org.apache.hadoop.hbase.TableDescriptors;
48  import org.apache.hadoop.hbase.TableInfoMissingException;
49  import org.apache.hadoop.hbase.TableName;
50  import org.apache.hadoop.hbase.classification.InterfaceAudience;
51  import org.apache.hadoop.hbase.exceptions.DeserializationException;
52  import org.apache.hadoop.hbase.regionserver.BloomType;
53
54  /**
55   * Implementation of {@link TableDescriptors} that reads descriptors from the
56   * passed filesystem.  It expects descriptors to be in a file in the
57   * {@link #TABLEINFO_DIR} subdir of the table's directory in FS.  Can be read-only
58   *  -- i.e. does not modify the filesystem or can be read and write.
59   *
60   * <p>Also has utility for keeping up the table descriptors tableinfo file.
61   * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir
62   * of the table directory in the filesystem.
63   * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the
64   * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>.  This sequenceid
65   * is always increasing.  It starts at zero.  The table schema file with the
66   * highest sequenceid has the most recent schema edit. Usually there is one file
67   * only, the most recent but there may be short periods where there are more
68   * than one file. Old files are eventually cleaned.  Presumption is that there
69   * will not be lots of concurrent clients making table schema edits.  If so,
70   * the below needs a bit of a reworking and perhaps some supporting api in hdfs.
71   */
72  @InterfaceAudience.Private
73  public class FSTableDescriptors implements TableDescriptors {
74    private static final Log LOG = LogFactory.getLog(FSTableDescriptors.class);
75    private final FileSystem fs;
76    private final Path rootdir;
77    private final boolean fsreadonly;
78    private volatile boolean usecache;
79    private volatile boolean fsvisited;
80
81    @VisibleForTesting long cachehits = 0;
82    @VisibleForTesting long invocations = 0;
83
84    /** The file name prefix used to store HTD in HDFS  */
85    static final String TABLEINFO_FILE_PREFIX = ".tableinfo";
86    static final String TABLEINFO_DIR = ".tabledesc";
87    static final String TMP_DIR = ".tmp";
88
89    // This cache does not age out the old stuff.  Thinking is that the amount
90    // of data we keep up in here is so small, no need to do occasional purge.
91    // TODO.
92    private final Map<TableName, HTableDescriptor> cache =
93      new ConcurrentHashMap<TableName, HTableDescriptor>();
94
95    /**
96     * Table descriptor for <code>hbase:meta</code> catalog table
97     */
98    private final HTableDescriptor metaTableDescriptor;
99
100   /**
101    * Construct a FSTableDescriptors instance using the hbase root dir of the given
102    * conf and the filesystem where that root dir lives.
103    * This instance can do write operations (is not read only).
104    */
105   public FSTableDescriptors(final Configuration conf) throws IOException {
106     this(conf, FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf));
107   }
108
109   public FSTableDescriptors(final Configuration conf, final FileSystem fs, final Path rootdir)
110   throws IOException {
111     this(conf, fs, rootdir, false, true);
112   }
113
114   /**
115    * @param fsreadonly True if we are read-only when it comes to filesystem
116    * operations; i.e. on remove, we do not do delete in fs.
117    */
118   public FSTableDescriptors(final Configuration conf, final FileSystem fs,
119     final Path rootdir, final boolean fsreadonly, final boolean usecache) throws IOException {
120     super();
121     this.fs = fs;
122     this.rootdir = rootdir;
123     this.fsreadonly = fsreadonly;
124     this.usecache = usecache;
125
126     this.metaTableDescriptor = createMetaTableDescriptor(conf);
127   }
128
129   @VisibleForTesting
130   public static HTableDescriptor createMetaTableDescriptor(final Configuration conf)
131       throws IOException {
132     HTableDescriptor metaDescriptor = new HTableDescriptor(
133         TableName.META_TABLE_NAME,
134         new HColumnDescriptor[] {
135             new HColumnDescriptor(HConstants.CATALOG_FAMILY)
136                 .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
137                     HConstants.DEFAULT_HBASE_META_VERSIONS))
138                 .setInMemory(true)
139                 .setBlocksize(conf.getInt(HConstants.HBASE_META_BLOCK_SIZE,
140                     HConstants.DEFAULT_HBASE_META_BLOCK_SIZE))
141                 .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
142                     // Disable blooms for meta.  Needs work.  Seems to mess w/ getClosestOrBefore.
143                 .setBloomFilterType(BloomType.NONE)
144                     // Enable cache of data blocks in L1 if more than one caching tier deployed:
145                     // e.g. if using CombinedBlockCache (BucketCache).
146                 .setCacheDataInL1(true),
147             new HColumnDescriptor(HConstants.TABLE_FAMILY)
148                 // Ten is arbitrary number.  Keep versions to help debugging.
149                 .setMaxVersions(10)
150                 .setInMemory(true)
151                 .setBlocksize(8 * 1024)
152                 .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
153                     // Disable blooms for meta.  Needs work.  Seems to mess w/ getClosestOrBefore.
154                 .setBloomFilterType(BloomType.NONE)
155                     // Enable cache of data blocks in L1 if more than one caching tier deployed:
156                     // e.g. if using CombinedBlockCache (BucketCache).
157                 .setCacheDataInL1(true)
158         }) {
159     };
160     metaDescriptor.addCoprocessor(
161         "org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint",
162         null, Coprocessor.PRIORITY_SYSTEM, null);
163     return metaDescriptor;
164   }
165 
166   @Override
167   public void setCacheOn() throws IOException {
168     this.cache.clear();
169     this.usecache = true;
170   }
171
172   @Override
173   public void setCacheOff() throws IOException {
174     this.usecache = false;
175     this.cache.clear();
176   }
177
178   @VisibleForTesting
179   public boolean isUsecache() {
180     return this.usecache;
181   }
182
183   /**
184    * Get the current table descriptor for the given table, or null if none exists.
185    *
186    * Uses a local cache of the descriptor but still checks the filesystem on each call
187    * to see if a newer file has been created since the cached one was read.
188    */
189   @Override
190   @Nullable
191   public HTableDescriptor get(final TableName tablename)
192   throws IOException {
193     invocations++;
194     if (TableName.META_TABLE_NAME.equals(tablename)) {
195       cachehits++;
196       return metaTableDescriptor;
197     }
198     // hbase:meta is already handled. If some one tries to get the descriptor for
199     // .logs, .oldlogs or .corrupt throw an exception.
200     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename.getNameAsString())) {
201        throw new IOException("No descriptor found for non table = " + tablename);
202     }
203
204     if (usecache) {
205       // Look in cache of descriptors.
206       HTableDescriptor cachedtdm = this.cache.get(tablename);
207       if (cachedtdm != null) {
208         cachehits++;
209         return cachedtdm;
210       }
211     }
212     HTableDescriptor tdmt = null;
213     try {
214       tdmt = getTableDescriptorFromFs(fs, rootdir, tablename);
215     } catch (NullPointerException e) {
216       LOG.debug("Exception during readTableDecriptor. Current table name = "
217           + tablename, e);
218     } catch (TableInfoMissingException e) {
219       // ignore. This is regular operation
220     } catch (IOException ioe) {
221       LOG.debug("Exception during readTableDecriptor. Current table name = "
222           + tablename, ioe);
223     }
224     // last HTD written wins
225     if (usecache && tdmt != null) {
226       this.cache.put(tablename, tdmt);
227     }
228
229     return tdmt;
230   }
231
232   /**
233    * Returns a map from table name to table descriptor for all tables.
234    */
235   @Override
236   public Map<String, HTableDescriptor> getAllDescriptors()
237   throws IOException {
238     Map<String, HTableDescriptor> tds = new TreeMap<String, HTableDescriptor>();
239
240     if (fsvisited && usecache) {
241       for (Map.Entry<TableName, HTableDescriptor> entry: this.cache.entrySet()) {
242         tds.put(entry.getKey().toString(), entry.getValue());
243       }
244       // add hbase:meta to the response
245       tds.put(this.metaTableDescriptor.getNameAsString(), metaTableDescriptor);
246     } else {
247       LOG.debug("Fetching table descriptors from the filesystem.");
248       boolean allvisited = true;
249       for (Path d : FSUtils.getTableDirs(fs, rootdir)) {
250         HTableDescriptor htd = null;
251         try {
252           htd = get(FSUtils.getTableName(d));
253         } catch (FileNotFoundException fnfe) {
254           // inability of retrieving one HTD shouldn't stop getting the remaining
255           LOG.warn("Trouble retrieving htd", fnfe);
256         }
257         if (htd == null) {
258           allvisited = false;
259           continue;
260         } else {
261           tds.put(htd.getTableName().getNameAsString(), htd);
262         }
263         fsvisited = allvisited;
264       }
265     }
266     return tds;
267   }
268
269   /**
270    * Returns a map from table name to table descriptor for all tables.
271    */
272   @Override
273   public Map<String, HTableDescriptor> getAll() throws IOException {
274     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
275     Map<String, HTableDescriptor> allDescriptors = getAllDescriptors();
276     for (Map.Entry<String, HTableDescriptor> entry : allDescriptors
277         .entrySet()) {
278       htds.put(entry.getKey(), entry.getValue());
279     }
280     return htds;
281   }
282
283   /**
284     * Find descriptors by namespace.
285     * @see #get(org.apache.hadoop.hbase.TableName)
286     */
287   @Override
288   public Map<String, HTableDescriptor> getByNamespace(String name)
289   throws IOException {
290     Map<String, HTableDescriptor> htds = new TreeMap<String, HTableDescriptor>();
291     List<Path> tableDirs =
292         FSUtils.getLocalTableDirs(fs, FSUtils.getNamespaceDir(rootdir, name));
293     for (Path d: tableDirs) {
294       HTableDescriptor htd = null;
295       try {
296         htd = get(FSUtils.getTableName(d));
297       } catch (FileNotFoundException fnfe) {
298         // inability of retrieving one HTD shouldn't stop getting the remaining
299         LOG.warn("Trouble retrieving htd", fnfe);
300       }
301       if (htd == null) continue;
302       htds.put(FSUtils.getTableName(d).getNameAsString(), htd);
303     }
304     return htds;
305   }
306 
307   /**
308    * Adds (or updates) the table descriptor to the FileSystem
309    * and updates the local cache with it.
310    */
311   @Override
312   public void add(HTableDescriptor htd) throws IOException {
313     if (fsreadonly) {
314       throw new NotImplementedException("Cannot add a table descriptor - in read only mode");
315     }
316     TableName tableName = htd.getTableName();
317     if (TableName.META_TABLE_NAME.equals(tableName)) {
318       throw new NotImplementedException();
319     }
320     if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) {
321       throw new NotImplementedException(
322           "Cannot add a table descriptor for a reserved subdirectory name: "
323               + htd.getNameAsString());
324     }
325     updateTableDescriptor(htd);
326   }
327
328   /**
329    * Removes the table descriptor from the local cache and returns it.
330    * If not in read only mode, it also deletes the entire table directory(!)
331    * from the FileSystem.
332    */
333   @Override
334   public HTableDescriptor remove(final TableName tablename)
335   throws IOException {
336     if (fsreadonly) {
337       throw new NotImplementedException("Cannot remove a table descriptor - in read only mode");
338     }
339     Path tabledir = getTableDir(tablename);
340     if (this.fs.exists(tabledir)) {
341       if (!this.fs.delete(tabledir, true)) {
342         throw new IOException("Failed delete of " + tabledir.toString());
343       }
344     }
345     HTableDescriptor descriptor = this.cache.remove(tablename);
346     return descriptor;
347   }
348
349   /**
350    * Checks if a current table info file exists for the given table
351    *
352    * @param tableName name of table
353    * @return true if exists
354    * @throws IOException
355    */
356   public boolean isTableInfoExists(TableName tableName) throws IOException {
357     return getTableInfoPath(tableName) != null;
358   }
359
360   /**
361    * Find the most current table info file for the given table in the hbase root directory.
362    * @return The file status of the current table info file or null if it does not exist
363    */
364   private FileStatus getTableInfoPath(final TableName tableName) throws IOException {
365     Path tableDir = getTableDir(tableName);
366     return getTableInfoPath(tableDir);
367   }
368 
369   private FileStatus getTableInfoPath(Path tableDir)
370   throws IOException {
371     return getTableInfoPath(fs, tableDir, !fsreadonly);
372   }
373
374   /**
375    * Find the most current table info file for the table located in the given table directory.
376    *
377    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
378    * files and takes the 'current' one - meaning the one with the highest sequence number if present
379    * or no sequence number at all if none exist (for backward compatibility from before there
380    * were sequence numbers).
381    *
382    * @return The file status of the current table info file or null if it does not exist
383    * @throws IOException
384    */
385   public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir)
386   throws IOException {
387     return getTableInfoPath(fs, tableDir, false);
388   }
389
390   /**
391    * Find the most current table info file for the table in the given table directory.
392    *
393    * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
394    * files and takes the 'current' one - meaning the one with the highest sequence number if
395    * present or no sequence number at all if none exist (for backward compatibility from before
396    * there were sequence numbers).
397    * If there are multiple table info files found and removeOldFiles is true it also deletes the
398    * older files.
399    *
400    * @return The file status of the current table info file or null if none exist
401    * @throws IOException
402    */
403   private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles)
404   throws IOException {
405     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
406     return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles);
407   }
408
409   /**
410    * Find the most current table info file in the given directory
411    *
412    * Looks within the given directory for any table info files
413    * and takes the 'current' one - meaning the one with the highest sequence number if present
414    * or no sequence number at all if none exist (for backward compatibility from before there
415    * were sequence numbers).
416    * If there are multiple possible files found
417    * and the we're not in read only mode it also deletes the older files.
418    *
419    * @return The file status of the current table info file or null if it does not exist
420    * @throws IOException
421    */
422   // only visible for FSTableDescriptorMigrationToSubdir, can be removed with that
423   static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir, boolean removeOldFiles)
424   throws IOException {
425     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
426     if (status == null || status.length < 1) return null;
427     FileStatus mostCurrent = null;
428     for (FileStatus file : status) {
429       if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) {
430         mostCurrent = file;
431       }
432     }
433     if (removeOldFiles && status.length > 1) {
434       // Clean away old versions
435       for (FileStatus file : status) {
436         Path path = file.getPath();
437         if (file != mostCurrent) {
438           if (!fs.delete(file.getPath(), false)) {
439             LOG.warn("Failed cleanup of " + path);
440           } else {
441             LOG.debug("Cleaned up old tableinfo file " + path);
442           }
443         }
444       }
445     }
446     return mostCurrent;
447   }
448
449   /**
450    * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in
451    * reverse order.
452    */
453   @VisibleForTesting
454   static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR =
455   new Comparator<FileStatus>() {
456     @Override
457     public int compare(FileStatus left, FileStatus right) {
458       return right.compareTo(left);
459     }};
460
461   /**
462    * Return the table directory in HDFS
463    */
464   @VisibleForTesting Path getTableDir(final TableName tableName) {
465     return FSUtils.getTableDir(rootdir, tableName);
466   }
467
468   private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() {
469     @Override
470     public boolean accept(Path p) {
471       // Accept any file that starts with TABLEINFO_NAME
472       return p.getName().startsWith(TABLEINFO_FILE_PREFIX);
473     }};
474
475   /**
476    * Width of the sequenceid that is a suffix on a tableinfo file.
477    */
478   @VisibleForTesting static final int WIDTH_OF_SEQUENCE_ID = 10;
479
480   /*
481    * @param number Number to use as suffix.
482    * @return Returns zero-prefixed decimal version of passed
483    * number (Does absolute in case number is negative).
484    */
485   private static String formatTableInfoSequenceId(final int number) {
486     byte [] b = new byte[WIDTH_OF_SEQUENCE_ID];
487     int d = Math.abs(number);
488     for (int i = b.length - 1; i >= 0; i--) {
489       b[i] = (byte)((d % 10) + '0');
490       d /= 10;
491     }
492     return Bytes.toString(b);
493   }
494
495   /**
496    * Regex to eat up sequenceid suffix on a .tableinfo file.
497    * Use regex because may encounter oldstyle .tableinfos where there is no
498    * sequenceid on the end.
499    */
500   private static final Pattern TABLEINFO_FILE_REGEX =
501     Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$");
502
503   /**
504    * @param p Path to a <code>.tableinfo</code> file.
505    * @return The current editid or 0 if none found.
506    */
507   @VisibleForTesting static int getTableInfoSequenceId(final Path p) {
508     if (p == null) return 0;
509     Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName());
510     if (!m.matches()) throw new IllegalArgumentException(p.toString());
511     String suffix = m.group(2);
512     if (suffix == null || suffix.length() <= 0) return 0;
513     return Integer.parseInt(m.group(2));
514   }
515
516   /**
517    * @param sequenceid
518    * @return Name of tableinfo file.
519    */
520   @VisibleForTesting static String getTableInfoFileName(final int sequenceid) {
521     return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid);
522   }
523
524   /**
525    * Returns the latest table descriptor for the given table directly from the file system
526    * if it exists, bypassing the local cache.
527    * Returns null if it's not found.
528    */
529   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs,
530       Path hbaseRootDir, TableName tableName) throws IOException {
531     Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName);
532     return getTableDescriptorFromFs(fs, tableDir);
533   }
534
535   /**
536    * Returns the latest table descriptor for the table located at the given directory
537    * directly from the file system if it exists.
538    * @throws TableInfoMissingException if there is no descriptor
539    */
540   public static HTableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir)
541   throws IOException {
542     FileStatus status = getTableInfoPath(fs, tableDir, false);
543     if (status == null) {
544       throw new TableInfoMissingException("No table descriptor file under " + tableDir);
545     }
546     return readTableDescriptor(fs, status);
547   }
548
549   private static HTableDescriptor readTableDescriptor(FileSystem fs, FileStatus status)
550       throws IOException {
551     int len = Ints.checkedCast(status.getLen());
552     byte [] content = new byte[len];
553     FSDataInputStream fsDataInputStream = fs.open(status.getPath());
554     try {
555       fsDataInputStream.readFully(content);
556     } finally {
557       fsDataInputStream.close();
558     }
559     HTableDescriptor htd = null;
560     try {
561       htd = HTableDescriptor.parseFrom(content);
562     } catch (DeserializationException e) {
563       throw new IOException("content=" + Bytes.toShort(content), e);
564     }
565     return htd;
566   }
567
568   /**
569    * Update table descriptor on the file system
570    * @throws IOException Thrown if failed update.
571    * @throws NotImplementedException if in read only mode
572    */
573   @VisibleForTesting Path updateTableDescriptor(HTableDescriptor td)
574   throws IOException {
575     if (fsreadonly) {
576       throw new NotImplementedException("Cannot update a table descriptor - in read only mode");
577     }
578     TableName tableName = td.getTableName();
579     Path tableDir = getTableDir(tableName);
580     Path p = writeTableDescriptor(fs, td, tableDir, getTableInfoPath(tableDir));
581     if (p == null) throw new IOException("Failed update");
582     LOG.info("Updated tableinfo=" + p);
583     if (usecache) {
584       this.cache.put(td.getTableName(), td);
585     }
586     return p;
587   }
588
589   /**
590    * Deletes all the table descriptor files from the file system.
591    * Used in unit tests only.
592    * @throws NotImplementedException if in read only mode
593    */
594   public void deleteTableDescriptorIfExists(TableName tableName) throws IOException {
595     if (fsreadonly) {
596       throw new NotImplementedException("Cannot delete a table descriptor - in read only mode");
597     }
598
599     Path tableDir = getTableDir(tableName);
600     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
601     deleteTableDescriptorFiles(fs, tableInfoDir, Integer.MAX_VALUE);
602   }
603
604   /**
605    * Deletes files matching the table info file pattern within the given directory
606    * whose sequenceId is at most the given max sequenceId.
607    */
608   private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId)
609   throws IOException {
610     FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
611     for (FileStatus file : status) {
612       Path path = file.getPath();
613       int sequenceId = getTableInfoSequenceId(path);
614       if (sequenceId <= maxSequenceId) {
615         boolean success = FSUtils.delete(fs, path, false);
616         if (success) {
617           LOG.debug("Deleted table descriptor at " + path);
618         } else {
619           LOG.error("Failed to delete descriptor at " + path);
620         }
621       }
622     }
623   }
624
625   /**
626    * Attempts to write a new table descriptor to the given table's directory.
627    * It first writes it to the .tmp dir then uses an atomic rename to move it into place.
628    * It begins at the currentSequenceId + 1 and tries 10 times to find a new sequence number
629    * not already in use.
630    * Removes the current descriptor file if passed in.
631    *
632    * @return Descriptor file or null if we failed write.
633    */
634   private static Path writeTableDescriptor(final FileSystem fs,
635     final HTableDescriptor htd, final Path tableDir,
636     final FileStatus currentDescriptorFile)
637   throws IOException {
638     // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon.
639     // This directory is never removed to avoid removing it out from under a concurrent writer.
640     Path tmpTableDir = new Path(tableDir, TMP_DIR);
641     Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
642
643     // What is current sequenceid?  We read the current sequenceid from
644     // the current file.  After we read it, another thread could come in and
645     // compete with us writing out next version of file.  The below retries
646     // should help in this case some but its hard to do guarantees in face of
647     // concurrent schema edits.
648     int currentSequenceId = currentDescriptorFile == null ? 0 :
649       getTableInfoSequenceId(currentDescriptorFile.getPath());
650     int newSequenceId = currentSequenceId;
651
652     // Put arbitrary upperbound on how often we retry
653     int retries = 10;
654     int retrymax = currentSequenceId + retries;
655     Path tableInfoDirPath = null;
656     do {
657       newSequenceId += 1;
658       String filename = getTableInfoFileName(newSequenceId);
659       Path tempPath = new Path(tmpTableDir, filename);
660       if (fs.exists(tempPath)) {
661         LOG.debug(tempPath + " exists; retrying up to " + retries + " times");
662         continue;
663       }
664       tableInfoDirPath = new Path(tableInfoDir, filename);
665       try {
666         writeTD(fs, tempPath, htd);
667         fs.mkdirs(tableInfoDirPath.getParent());
668         if (!fs.rename(tempPath, tableInfoDirPath)) {
669           throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath);
670         }
671         LOG.debug("Wrote descriptor into: " + tableInfoDirPath);
672       } catch (IOException ioe) {
673         // Presume clash of names or something; go around again.
674         LOG.debug("Failed write and/or rename; retrying", ioe);
675         if (!FSUtils.deleteDirectory(fs, tempPath)) {
676           LOG.warn("Failed cleanup of " + tempPath);
677         }
678         tableInfoDirPath = null;
679         continue;
680       }
681       break;
682     } while (newSequenceId < retrymax);
683     if (tableInfoDirPath != null) {
684       // if we succeeded, remove old table info files.
685       deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1);
686     }
687     return tableInfoDirPath;
688   }
689
690   private static void writeTD(final FileSystem fs, final Path p, final HTableDescriptor htd)
691   throws IOException {
692     FSDataOutputStream out = fs.create(p, false);
693     try {
694       // We used to write this file out as a serialized HTD Writable followed by two '\n's and then
695       // the toString version of HTD.  Now we just write out the pb serialization.
696       out.write(htd.toByteArray());
697     } finally {
698       out.close();
699     }
700   }
701
702   /**
703    * Create new HTableDescriptor in HDFS. Happens when we are creating table.
704    * Used by tests.
705    * @return True if we successfully created file.
706    */
707   public boolean createTableDescriptor(HTableDescriptor htd) throws IOException {
708     return createTableDescriptor(htd, false);
709   }
710
711   /**
712    * Create new HTableDescriptor in HDFS. Happens when we are creating table. If
713    * forceCreation is true then even if previous table descriptor is present it
714    * will be overwritten
715    *
716    * @return True if we successfully created file.
717    */
718   public boolean createTableDescriptor(HTableDescriptor htd, boolean forceCreation)
719   throws IOException {
720     Path tableDir = getTableDir(htd.getTableName());
721     return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation);
722   }
723
724   /**
725    * Create a new HTableDescriptor in HDFS in the specified table directory. Happens when we create
726    * a new table or snapshot a table.
727    * @param tableDir table directory under which we should write the file
728    * @param htd description of the table to write
729    * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
730    *          be overwritten
731    * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
732    *         already exists and we weren't forcing the descriptor creation.
733    * @throws IOException if a filesystem error occurs
734    */
735   public boolean createTableDescriptorForTableDirectory(Path tableDir,
736       HTableDescriptor htd, boolean forceCreation) throws IOException {
737     if (fsreadonly) {
738       throw new NotImplementedException("Cannot create a table descriptor - in read only mode");
739     }
740     FileStatus status = getTableInfoPath(fs, tableDir);
741     if (status != null) {
742       LOG.debug("Current tableInfoPath = " + status.getPath());
743       if (!forceCreation) {
744         if (fs.exists(status.getPath()) && status.getLen() > 0) {
745           if (readTableDescriptor(fs, status).equals(htd)) {
746             LOG.debug("TableInfo already exists.. Skipping creation");
747             return false;
748           }
749         }
750       }
751     }
752     Path p = writeTableDescriptor(fs, htd, tableDir, status);
753     return p != null;
754   }
755
756 }
757