001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util;
019
020import edu.umd.cs.findbugs.annotations.Nullable;
021import java.io.IOException;
022import java.util.Comparator;
023import java.util.List;
024import java.util.Map;
025import java.util.TreeMap;
026import java.util.concurrent.ConcurrentHashMap;
027import java.util.regex.Matcher;
028import java.util.regex.Pattern;
029import org.apache.commons.lang3.NotImplementedException;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.fs.FSDataInputStream;
032import org.apache.hadoop.fs.FSDataOutputStream;
033import org.apache.hadoop.fs.FileStatus;
034import org.apache.hadoop.fs.FileSystem;
035import org.apache.hadoop.fs.Path;
036import org.apache.hadoop.fs.PathFilter;
037import org.apache.hadoop.hbase.Coprocessor;
038import org.apache.hadoop.hbase.HConstants;
039import org.apache.hadoop.hbase.TableDescriptors;
040import org.apache.hadoop.hbase.TableInfoMissingException;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
043import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
044import org.apache.hadoop.hbase.client.TableDescriptor;
045import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
046import org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint;
047import org.apache.hadoop.hbase.exceptions.DeserializationException;
048import org.apache.hadoop.hbase.regionserver.BloomType;
049import org.apache.yetus.audience.InterfaceAudience;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053import org.apache.hbase.thirdparty.com.google.common.primitives.Ints;
054
055/**
056 * Implementation of {@link TableDescriptors} that reads descriptors from the
057 * passed filesystem.  It expects descriptors to be in a file in the
058 * {@link #TABLEINFO_DIR} subdir of the table's directory in FS.  Can be read-only
059 *  -- i.e. does not modify the filesystem or can be read and write.
060 *
061 * <p>Also has utility for keeping up the table descriptors tableinfo file.
062 * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir
063 * of the table directory in the filesystem.
064 * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the
065 * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>.  This sequenceid
066 * is always increasing.  It starts at zero.  The table schema file with the
067 * highest sequenceid has the most recent schema edit. Usually there is one file
068 * only, the most recent but there may be short periods where there are more
069 * than one file. Old files are eventually cleaned.  Presumption is that there
070 * will not be lots of concurrent clients making table schema edits.  If so,
071 * the below needs a bit of a reworking and perhaps some supporting api in hdfs.
072 */
073@InterfaceAudience.Private
074public class FSTableDescriptors implements TableDescriptors {
075  private static final Logger LOG = LoggerFactory.getLogger(FSTableDescriptors.class);
076  private final FileSystem fs;
077  private final Path rootdir;
078  private final boolean fsreadonly;
079  private final boolean usecache;
080  private volatile boolean fsvisited;
081
082  long cachehits = 0;
083  long invocations = 0;
084
085  /**
086   * The file name prefix used to store HTD in HDFS
087   */
088  static final String TABLEINFO_FILE_PREFIX = ".tableinfo";
089  static final String TABLEINFO_DIR = ".tabledesc";
090  static final String TMP_DIR = ".tmp";
091
092  // This cache does not age out the old stuff.  Thinking is that the amount
093  // of data we keep up in here is so small, no need to do occasional purge.
094  // TODO.
095  private final Map<TableName, TableDescriptor> cache = new ConcurrentHashMap<>();
096
097  /**
098   * Construct a FSTableDescriptors instance using the hbase root dir of the given conf and the
099   * filesystem where that root dir lives. This instance can do write operations (is not read only).
100   */
101  public FSTableDescriptors(final Configuration conf) throws IOException {
102    this(CommonFSUtils.getCurrentFileSystem(conf), CommonFSUtils.getRootDir(conf));
103  }
104
105  public FSTableDescriptors(final FileSystem fs, final Path rootdir) {
106    this(fs, rootdir, false, true);
107  }
108
109  public FSTableDescriptors(final FileSystem fs, final Path rootdir, final boolean fsreadonly,
110      final boolean usecache) {
111    this.fs = fs;
112    this.rootdir = rootdir;
113    this.fsreadonly = fsreadonly;
114    this.usecache = usecache;
115  }
116
117  public static void tryUpdateMetaTableDescriptor(Configuration conf) throws IOException {
118    tryUpdateAndGetMetaTableDescriptor(conf, CommonFSUtils.getCurrentFileSystem(conf),
119      CommonFSUtils.getRootDir(conf));
120  }
121
122  public static TableDescriptor tryUpdateAndGetMetaTableDescriptor(Configuration conf,
123    FileSystem fs, Path rootdir) throws IOException {
124    // see if we already have meta descriptor on fs. Write one if not.
125    try {
126      return getTableDescriptorFromFs(fs, rootdir, TableName.META_TABLE_NAME);
127    } catch (TableInfoMissingException e) {
128      TableDescriptorBuilder builder = createMetaTableDescriptorBuilder(conf);
129      TableDescriptor td = builder.build();
130      LOG.info("Creating new hbase:meta table descriptor {}", td);
131      TableName tableName = td.getTableName();
132      Path tableDir = CommonFSUtils.getTableDir(rootdir, tableName);
133      Path p = writeTableDescriptor(fs, td, tableDir, getTableInfoPath(fs, tableDir, true));
134      if (p == null) {
135        throw new IOException("Failed update hbase:meta table descriptor");
136      }
137      LOG.info("Updated hbase:meta table descriptor to {}", p);
138      return td;
139    }
140  }
141
142  public static TableDescriptorBuilder createMetaTableDescriptorBuilder(final Configuration conf)
143    throws IOException {
144    // TODO We used to set CacheDataInL1 for META table. When we have BucketCache in file mode, now
145    // the META table data goes to File mode BC only. Test how that affect the system. If too much,
146    // we have to rethink about adding back the setCacheDataInL1 for META table CFs.
147    return TableDescriptorBuilder.newBuilder(TableName.META_TABLE_NAME)
148      .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.CATALOG_FAMILY)
149        .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
150          HConstants.DEFAULT_HBASE_META_VERSIONS))
151        .setInMemory(true)
152        .setBlocksize(conf.getInt(HConstants.HBASE_META_BLOCK_SIZE,
153          HConstants.DEFAULT_HBASE_META_BLOCK_SIZE))
154        .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
155        // Disable blooms for meta.  Needs work.  Seems to mess w/ getClosestOrBefore.
156        .setBloomFilterType(BloomType.NONE)
157        .build())
158      .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.TABLE_FAMILY)
159        .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS,
160          HConstants.DEFAULT_HBASE_META_VERSIONS))
161        .setInMemory(true)
162        .setBlocksize(8 * 1024)
163        .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
164        // Disable blooms for meta.  Needs work.  Seems to mess w/ getClosestOrBefore.
165        .setBloomFilterType(BloomType.NONE)
166        .build())
167      .setColumnFamily(ColumnFamilyDescriptorBuilder
168        .newBuilder(HConstants.REPLICATION_BARRIER_FAMILY)
169        .setMaxVersions(HConstants.ALL_VERSIONS)
170        .setInMemory(true)
171        .setScope(HConstants.REPLICATION_SCOPE_LOCAL)
172        // Disable blooms for meta.  Needs work.  Seems to mess w/ getClosestOrBefore.
173        .setBloomFilterType(BloomType.NONE)
174        .build())
175      .setCoprocessor(CoprocessorDescriptorBuilder.newBuilder(
176        MultiRowMutationEndpoint.class.getName())
177        .setPriority(Coprocessor.PRIORITY_SYSTEM)
178        .build());
179  }
180
181  protected boolean isUsecache() {
182    return this.usecache;
183  }
184
185  /**
186   * Get the current table descriptor for the given table, or null if none exists.
187   * <p/>
188   * Uses a local cache of the descriptor but still checks the filesystem on each call if
189   * {@link #fsvisited} is not {@code true}, i.e, we haven't done a full scan yet, to see if a newer
190   * file has been created since the cached one was read.
191   */
192  @Override
193  @Nullable
194  public TableDescriptor get(TableName tableName) {
195    invocations++;
196    if (usecache) {
197      // Look in cache of descriptors.
198      TableDescriptor cachedtdm = this.cache.get(tableName);
199      if (cachedtdm != null) {
200        cachehits++;
201        return cachedtdm;
202      }
203      // we do not need to go to fs any more
204      if (fsvisited) {
205        return null;
206      }
207    }
208    TableDescriptor tdmt = null;
209    try {
210      tdmt = getTableDescriptorFromFs(fs, rootdir, tableName);
211    } catch (TableInfoMissingException e) {
212      // ignore. This is regular operation
213    } catch (NullPointerException | IOException ioe) {
214      LOG.debug("Exception during readTableDecriptor. Current table name = " + tableName, ioe);
215    }
216    // last HTD written wins
217    if (usecache && tdmt != null) {
218      this.cache.put(tableName, tdmt);
219    }
220
221    return tdmt;
222  }
223
224  /**
225   * Returns a map from table name to table descriptor for all tables.
226   */
227  @Override
228  public Map<String, TableDescriptor> getAll() throws IOException {
229    Map<String, TableDescriptor> tds = new TreeMap<>();
230    if (fsvisited) {
231      for (Map.Entry<TableName, TableDescriptor> entry: this.cache.entrySet()) {
232        tds.put(entry.getKey().getNameWithNamespaceInclAsString(), entry.getValue());
233      }
234    } else {
235      LOG.trace("Fetching table descriptors from the filesystem.");
236      boolean allvisited = usecache;
237      for (Path d : FSUtils.getTableDirs(fs, rootdir)) {
238        TableDescriptor htd = get(CommonFSUtils.getTableName(d));
239        if (htd == null) {
240          allvisited = false;
241        } else {
242          tds.put(htd.getTableName().getNameWithNamespaceInclAsString(), htd);
243        }
244      }
245      fsvisited = allvisited;
246    }
247    return tds;
248  }
249
250  /**
251    * Find descriptors by namespace.
252    * @see #get(org.apache.hadoop.hbase.TableName)
253    */
254  @Override
255  public Map<String, TableDescriptor> getByNamespace(String name) throws IOException {
256    Map<String, TableDescriptor> htds = new TreeMap<>();
257    List<Path> tableDirs =
258      FSUtils.getLocalTableDirs(fs, CommonFSUtils.getNamespaceDir(rootdir, name));
259    for (Path d : tableDirs) {
260      TableDescriptor htd = get(CommonFSUtils.getTableName(d));
261      if (htd == null) {
262        continue;
263      }
264      htds.put(CommonFSUtils.getTableName(d).getNameAsString(), htd);
265    }
266    return htds;
267  }
268
269  @Override
270  public void update(TableDescriptor td, boolean cacheOnly) throws IOException {
271    // TODO: in fact this method will only be called at master side, so fsreadonly and usecache will
272    // always be true. In general, we'd better have a ReadOnlyFSTableDesciptors for HRegionServer
273    // but now, HMaster extends HRegionServer, so unless making use of generic, we can not have
274    // different implementations for HMaster and HRegionServer. Revisit this when we make HMaster
275    // not extend HRegionServer in the future.
276    if (fsreadonly) {
277      throw new UnsupportedOperationException("Cannot add a table descriptor - in read only mode");
278    }
279    if (!cacheOnly) {
280      updateTableDescriptor(td);
281    }
282    if (usecache) {
283      this.cache.put(td.getTableName(), td);
284    }
285  }
286
287  Path updateTableDescriptor(TableDescriptor td) throws IOException {
288    TableName tableName = td.getTableName();
289    Path tableDir = getTableDir(tableName);
290    Path p = writeTableDescriptor(fs, td, tableDir, getTableInfoPath(tableDir));
291    if (p == null) {
292      throw new IOException("Failed update");
293    }
294    LOG.info("Updated tableinfo=" + p);
295    return p;
296  }
297
298  /**
299   * Removes the table descriptor from the local cache and returns it.
300   * If not in read only mode, it also deletes the entire table directory(!)
301   * from the FileSystem.
302   */
303  @Override
304  public TableDescriptor remove(final TableName tablename) throws IOException {
305    if (fsreadonly) {
306      throw new NotImplementedException("Cannot remove a table descriptor - in read only mode");
307    }
308    Path tabledir = getTableDir(tablename);
309    if (this.fs.exists(tabledir)) {
310      if (!this.fs.delete(tabledir, true)) {
311        throw new IOException("Failed delete of " + tabledir.toString());
312      }
313    }
314    TableDescriptor descriptor = this.cache.remove(tablename);
315    return descriptor;
316  }
317
318  private FileStatus getTableInfoPath(Path tableDir) throws IOException {
319    return getTableInfoPath(fs, tableDir, !fsreadonly);
320  }
321
322  /**
323   * Find the most current table info file for the table located in the given table directory.
324   *
325   * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
326   * files and takes the 'current' one - meaning the one with the highest sequence number if present
327   * or no sequence number at all if none exist (for backward compatibility from before there
328   * were sequence numbers).
329   *
330   * @return The file status of the current table info file or null if it does not exist
331   */
332  public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir)
333  throws IOException {
334    return getTableInfoPath(fs, tableDir, false);
335  }
336
337  /**
338   * Find the most current table info file for the table in the given table directory.
339   *
340   * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info
341   * files and takes the 'current' one - meaning the one with the highest sequence number if
342   * present or no sequence number at all if none exist (for backward compatibility from before
343   * there were sequence numbers).
344   * If there are multiple table info files found and removeOldFiles is true it also deletes the
345   * older files.
346   *
347   * @return The file status of the current table info file or null if none exist
348   */
349  private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles)
350      throws IOException {
351    Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
352    return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles);
353  }
354
355  /**
356   * Find the most current table info file in the given directory
357   * <p/>
358   * Looks within the given directory for any table info files and takes the 'current' one - meaning
359   * the one with the highest sequence number if present or no sequence number at all if none exist
360   * (for backward compatibility from before there were sequence numbers).
361   * <p/>
362   * If there are multiple possible files found and the we're not in read only mode it also deletes
363   * the older files.
364   * @return The file status of the current table info file or null if it does not exist
365   */
366  private static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir,
367    boolean removeOldFiles) throws IOException {
368    FileStatus[] status = CommonFSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
369    if (status == null || status.length < 1) {
370      return null;
371    }
372    FileStatus mostCurrent = null;
373    for (FileStatus file : status) {
374      if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) {
375        mostCurrent = file;
376      }
377    }
378    if (removeOldFiles && status.length > 1) {
379      // Clean away old versions
380      for (FileStatus file : status) {
381        Path path = file.getPath();
382        if (!file.equals(mostCurrent)) {
383          if (!fs.delete(file.getPath(), false)) {
384            LOG.warn("Failed cleanup of " + path);
385          } else {
386            LOG.debug("Cleaned up old tableinfo file " + path);
387          }
388        }
389      }
390    }
391    return mostCurrent;
392  }
393
394  /**
395   * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in reverse order.
396   */
397  static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR =
398    new Comparator<FileStatus>() {
399      @Override
400      public int compare(FileStatus left, FileStatus right) {
401        return right.compareTo(left);
402      }
403    };
404
405  /**
406   * Return the table directory in HDFS
407   */
408  Path getTableDir(final TableName tableName) {
409    return CommonFSUtils.getTableDir(rootdir, tableName);
410  }
411
412  private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() {
413    @Override
414    public boolean accept(Path p) {
415      // Accept any file that starts with TABLEINFO_NAME
416      return p.getName().startsWith(TABLEINFO_FILE_PREFIX);
417    }};
418
419  /**
420   * Width of the sequenceid that is a suffix on a tableinfo file.
421   */
422  static final int WIDTH_OF_SEQUENCE_ID = 10;
423
424  /**
425   * @param number Number to use as suffix.
426   * @return Returns zero-prefixed decimal version of passed number (Does absolute in case number is
427   *         negative).
428   */
429  private static String formatTableInfoSequenceId(final int number) {
430    byte [] b = new byte[WIDTH_OF_SEQUENCE_ID];
431    int d = Math.abs(number);
432    for (int i = b.length - 1; i >= 0; i--) {
433      b[i] = (byte)((d % 10) + '0');
434      d /= 10;
435    }
436    return Bytes.toString(b);
437  }
438
439  /**
440   * Regex to eat up sequenceid suffix on a .tableinfo file.
441   * Use regex because may encounter oldstyle .tableinfos where there is no
442   * sequenceid on the end.
443   */
444  private static final Pattern TABLEINFO_FILE_REGEX =
445    Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$");
446
447  /**
448   * @param p Path to a <code>.tableinfo</code> file.
449   * @return The current editid or 0 if none found.
450   */
451  static int getTableInfoSequenceId(final Path p) {
452    if (p == null) {
453      return 0;
454    }
455    Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName());
456    if (!m.matches()) {
457      throw new IllegalArgumentException(p.toString());
458    }
459    String suffix = m.group(2);
460    if (suffix == null || suffix.length() <= 0) {
461      return 0;
462    }
463    return Integer.parseInt(m.group(2));
464  }
465
466  /**
467   * @param sequenceid
468   * @return Name of tableinfo file.
469   */
470  static String getTableInfoFileName(final int sequenceid) {
471    return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid);
472  }
473
474  /**
475   * Returns the latest table descriptor for the given table directly from the file system
476   * if it exists, bypassing the local cache.
477   * Returns null if it's not found.
478   */
479  public static TableDescriptor getTableDescriptorFromFs(FileSystem fs,
480      Path hbaseRootDir, TableName tableName) throws IOException {
481    Path tableDir = CommonFSUtils.getTableDir(hbaseRootDir, tableName);
482    return getTableDescriptorFromFs(fs, tableDir);
483  }
484
485  /**
486   * Returns the latest table descriptor for the table located at the given directory
487   * directly from the file system if it exists.
488   * @throws TableInfoMissingException if there is no descriptor
489   */
490  public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir)
491    throws IOException {
492    FileStatus status = getTableInfoPath(fs, tableDir, false);
493    if (status == null) {
494      throw new TableInfoMissingException("No table descriptor file under " + tableDir);
495    }
496    return readTableDescriptor(fs, status);
497  }
498
499  private static TableDescriptor readTableDescriptor(FileSystem fs, FileStatus status)
500      throws IOException {
501    int len = Ints.checkedCast(status.getLen());
502    byte [] content = new byte[len];
503    FSDataInputStream fsDataInputStream = fs.open(status.getPath());
504    try {
505      fsDataInputStream.readFully(content);
506    } finally {
507      fsDataInputStream.close();
508    }
509    TableDescriptor htd = null;
510    try {
511      htd = TableDescriptorBuilder.parseFrom(content);
512    } catch (DeserializationException e) {
513      throw new IOException("content=" + Bytes.toShort(content), e);
514    }
515    return htd;
516  }
517
518  /**
519   * Deletes files matching the table info file pattern within the given directory
520   * whose sequenceId is at most the given max sequenceId.
521   */
522  private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId)
523  throws IOException {
524    FileStatus [] status = CommonFSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER);
525    for (FileStatus file : status) {
526      Path path = file.getPath();
527      int sequenceId = getTableInfoSequenceId(path);
528      if (sequenceId <= maxSequenceId) {
529        boolean success = CommonFSUtils.delete(fs, path, false);
530        if (success) {
531          LOG.debug("Deleted " + path);
532        } else {
533          LOG.error("Failed to delete table descriptor at " + path);
534        }
535      }
536    }
537  }
538
539  /**
540   * Attempts to write a new table descriptor to the given table's directory. It first writes it to
541   * the .tmp dir then uses an atomic rename to move it into place. It begins at the
542   * currentSequenceId + 1 and tries 10 times to find a new sequence number not already in use.
543   * <p/>
544   * Removes the current descriptor file if passed in.
545   * @return Descriptor file or null if we failed write.
546   */
547  private static Path writeTableDescriptor(final FileSystem fs, final TableDescriptor htd,
548    final Path tableDir, final FileStatus currentDescriptorFile) throws IOException {
549    // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon.
550    // This directory is never removed to avoid removing it out from under a concurrent writer.
551    Path tmpTableDir = new Path(tableDir, TMP_DIR);
552    Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR);
553
554    // What is current sequenceid?  We read the current sequenceid from
555    // the current file.  After we read it, another thread could come in and
556    // compete with us writing out next version of file.  The below retries
557    // should help in this case some but its hard to do guarantees in face of
558    // concurrent schema edits.
559    int currentSequenceId = currentDescriptorFile == null ? 0 :
560      getTableInfoSequenceId(currentDescriptorFile.getPath());
561    int newSequenceId = currentSequenceId;
562
563    // Put arbitrary upperbound on how often we retry
564    int retries = 10;
565    int retrymax = currentSequenceId + retries;
566    Path tableInfoDirPath = null;
567    do {
568      newSequenceId += 1;
569      String filename = getTableInfoFileName(newSequenceId);
570      Path tempPath = new Path(tmpTableDir, filename);
571      if (fs.exists(tempPath)) {
572        LOG.debug(tempPath + " exists; retrying up to " + retries + " times");
573        continue;
574      }
575      tableInfoDirPath = new Path(tableInfoDir, filename);
576      try {
577        writeTD(fs, tempPath, htd);
578        fs.mkdirs(tableInfoDirPath.getParent());
579        if (!fs.rename(tempPath, tableInfoDirPath)) {
580          throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath);
581        }
582        LOG.debug("Wrote into " + tableInfoDirPath);
583      } catch (IOException ioe) {
584        // Presume clash of names or something; go around again.
585        LOG.debug("Failed write and/or rename; retrying", ioe);
586        if (!CommonFSUtils.deleteDirectory(fs, tempPath)) {
587          LOG.warn("Failed cleanup of " + tempPath);
588        }
589        tableInfoDirPath = null;
590        continue;
591      }
592      break;
593    } while (newSequenceId < retrymax);
594    if (tableInfoDirPath != null) {
595      // if we succeeded, remove old table info files.
596      deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1);
597    }
598    return tableInfoDirPath;
599  }
600
601  private static void writeTD(final FileSystem fs, final Path p, final TableDescriptor htd)
602  throws IOException {
603    FSDataOutputStream out = fs.create(p, false);
604    try {
605      // We used to write this file out as a serialized HTD Writable followed by two '\n's and then
606      // the toString version of HTD.  Now we just write out the pb serialization.
607      out.write(TableDescriptorBuilder.toByteArray(htd));
608    } finally {
609      out.close();
610    }
611  }
612
613  /**
614   * Create new TableDescriptor in HDFS. Happens when we are creating table.
615   * Used by tests.
616   * @return True if we successfully created file.
617   */
618  public boolean createTableDescriptor(TableDescriptor htd) throws IOException {
619    return createTableDescriptor(htd, false);
620  }
621
622  /**
623   * Create new TableDescriptor in HDFS. Happens when we are creating table. If
624   * forceCreation is true then even if previous table descriptor is present it
625   * will be overwritten
626   *
627   * @return True if we successfully created file.
628   */
629  public boolean createTableDescriptor(TableDescriptor htd, boolean forceCreation)
630  throws IOException {
631    Path tableDir = getTableDir(htd.getTableName());
632    return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation);
633  }
634
635  /**
636   * Create a new TableDescriptor in HDFS in the specified table directory. Happens when we create
637   * a new table during cluster start or in Clone and Create Table Procedures. Checks readOnly flag
638   * passed on construction.
639   * @param tableDir table directory under which we should write the file
640   * @param htd description of the table to write
641   * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
642   *          be overwritten
643   * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
644   *         already exists and we weren't forcing the descriptor creation.
645   * @throws IOException if a filesystem error occurs
646   */
647  public boolean createTableDescriptorForTableDirectory(Path tableDir, TableDescriptor htd,
648      boolean forceCreation) throws IOException {
649    if (this.fsreadonly) {
650      throw new NotImplementedException("Cannot create a table descriptor - in read only mode");
651    }
652    return createTableDescriptorForTableDirectory(this.fs, tableDir, htd, forceCreation);
653  }
654
655  /**
656   * Create a new TableDescriptor in HDFS in the specified table directory. Happens when we create
657   * a new table snapshoting. Does not enforce read-only. That is for caller to determine.
658   * @param fs Filesystem to use.
659   * @param tableDir table directory under which we should write the file
660   * @param htd description of the table to write
661   * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will
662   *          be overwritten
663   * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file
664   *         already exists and we weren't forcing the descriptor creation.
665   * @throws IOException if a filesystem error occurs
666   */
667  public static boolean createTableDescriptorForTableDirectory(FileSystem fs, Path tableDir,
668      TableDescriptor htd, boolean forceCreation) throws IOException {
669    FileStatus status = getTableInfoPath(fs, tableDir);
670    if (status != null) {
671      LOG.debug("Current path=" + status.getPath());
672      if (!forceCreation) {
673        if (fs.exists(status.getPath()) && status.getLen() > 0) {
674          if (readTableDescriptor(fs, status).equals(htd)) {
675            LOG.trace("TableInfo already exists.. Skipping creation");
676            return false;
677          }
678        }
679      }
680    }
681    return writeTableDescriptor(fs, htd, tableDir, status) != null;
682  }
683}
684