001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import edu.umd.cs.findbugs.annotations.Nullable; 021import java.io.FileNotFoundException; 022import java.io.IOException; 023import java.util.Comparator; 024import java.util.List; 025import java.util.Map; 026import java.util.TreeMap; 027import java.util.concurrent.ConcurrentHashMap; 028import java.util.function.Function; 029import java.util.regex.Matcher; 030import java.util.regex.Pattern; 031import org.apache.commons.lang3.NotImplementedException; 032import org.apache.hadoop.conf.Configuration; 033import org.apache.hadoop.fs.FSDataInputStream; 034import org.apache.hadoop.fs.FSDataOutputStream; 035import org.apache.hadoop.fs.FileStatus; 036import org.apache.hadoop.fs.FileSystem; 037import org.apache.hadoop.fs.Path; 038import org.apache.hadoop.fs.PathFilter; 039import org.apache.hadoop.hbase.Coprocessor; 040import org.apache.hadoop.hbase.HConstants; 041import org.apache.hadoop.hbase.TableDescriptors; 042import org.apache.hadoop.hbase.TableInfoMissingException; 043import org.apache.hadoop.hbase.TableName; 044import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 045import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder; 046import org.apache.hadoop.hbase.client.TableDescriptor; 047import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 048import org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint; 049import org.apache.hadoop.hbase.exceptions.DeserializationException; 050import org.apache.hadoop.hbase.regionserver.BloomType; 051import org.apache.yetus.audience.InterfaceAudience; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 056import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; 057 058/** 059 * Implementation of {@link TableDescriptors} that reads descriptors from the 060 * passed filesystem. It expects descriptors to be in a file in the 061 * {@link #TABLEINFO_DIR} subdir of the table's directory in FS. Can be read-only 062 * -- i.e. does not modify the filesystem or can be read and write. 063 * 064 * <p>Also has utility for keeping up the table descriptors tableinfo file. 065 * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir 066 * of the table directory in the filesystem. 067 * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the 068 * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>. This sequenceid 069 * is always increasing. It starts at zero. The table schema file with the 070 * highest sequenceid has the most recent schema edit. Usually there is one file 071 * only, the most recent but there may be short periods where there are more 072 * than one file. Old files are eventually cleaned. Presumption is that there 073 * will not be lots of concurrent clients making table schema edits. If so, 074 * the below needs a bit of a reworking and perhaps some supporting api in hdfs. 075 */ 076@InterfaceAudience.Private 077public class FSTableDescriptors implements TableDescriptors { 078 private static final Logger LOG = LoggerFactory.getLogger(FSTableDescriptors.class); 079 private final FileSystem fs; 080 private final Path rootdir; 081 private final boolean fsreadonly; 082 private volatile boolean usecache; 083 private volatile boolean fsvisited; 084 085 @VisibleForTesting 086 long cachehits = 0; 087 @VisibleForTesting 088 long invocations = 0; 089 090 /** 091 * The file name prefix used to store HTD in HDFS 092 */ 093 static final String TABLEINFO_FILE_PREFIX = ".tableinfo"; 094 static final String TABLEINFO_DIR = ".tabledesc"; 095 static final String TMP_DIR = ".tmp"; 096 097 // This cache does not age out the old stuff. Thinking is that the amount 098 // of data we keep up in here is so small, no need to do occasional purge. 099 // TODO. 100 private final Map<TableName, TableDescriptor> cache = new ConcurrentHashMap<>(); 101 102 /** 103 * Construct a FSTableDescriptors instance using the hbase root dir of the given conf and the 104 * filesystem where that root dir lives. This instance can do write operations (is not read only). 105 */ 106 public FSTableDescriptors(final Configuration conf) throws IOException { 107 this(CommonFSUtils.getCurrentFileSystem(conf), CommonFSUtils.getRootDir(conf)); 108 } 109 110 public FSTableDescriptors(final FileSystem fs, final Path rootdir) { 111 this(fs, rootdir, false, true); 112 } 113 114 public FSTableDescriptors(final FileSystem fs, final Path rootdir, final boolean fsreadonly, 115 final boolean usecache) { 116 this.fs = fs; 117 this.rootdir = rootdir; 118 this.fsreadonly = fsreadonly; 119 this.usecache = usecache; 120 } 121 122 public static void tryUpdateMetaTableDescriptor(Configuration conf) throws IOException { 123 tryUpdateMetaTableDescriptor(conf, CommonFSUtils.getCurrentFileSystem(conf), 124 CommonFSUtils.getRootDir(conf), null); 125 } 126 127 public static void tryUpdateMetaTableDescriptor(Configuration conf, FileSystem fs, Path rootdir, 128 Function<TableDescriptorBuilder, TableDescriptorBuilder> metaObserver) throws IOException { 129 // see if we already have meta descriptor on fs. Write one if not. 130 try { 131 getTableDescriptorFromFs(fs, rootdir, TableName.META_TABLE_NAME); 132 } catch (TableInfoMissingException e) { 133 TableDescriptorBuilder builder = createMetaTableDescriptorBuilder(conf); 134 if (metaObserver != null) { 135 builder = metaObserver.apply(builder); 136 } 137 TableDescriptor td = builder.build(); 138 LOG.info("Creating new hbase:meta table descriptor {}", td); 139 TableName tableName = td.getTableName(); 140 Path tableDir = CommonFSUtils.getTableDir(rootdir, tableName); 141 Path p = writeTableDescriptor(fs, td, tableDir, getTableInfoPath(fs, tableDir, true)); 142 if (p == null) { 143 throw new IOException("Failed update hbase:meta table descriptor"); 144 } 145 LOG.info("Updated hbase:meta table descriptor to {}", p); 146 } 147 } 148 149 @VisibleForTesting 150 public static TableDescriptorBuilder createMetaTableDescriptorBuilder(final Configuration conf) 151 throws IOException { 152 // TODO We used to set CacheDataInL1 for META table. When we have BucketCache in file mode, now 153 // the META table data goes to File mode BC only. Test how that affect the system. If too much, 154 // we have to rethink about adding back the setCacheDataInL1 for META table CFs. 155 return TableDescriptorBuilder.newBuilder(TableName.META_TABLE_NAME) 156 .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.CATALOG_FAMILY) 157 .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS, 158 HConstants.DEFAULT_HBASE_META_VERSIONS)) 159 .setInMemory(true) 160 .setBlocksize(conf.getInt(HConstants.HBASE_META_BLOCK_SIZE, 161 HConstants.DEFAULT_HBASE_META_BLOCK_SIZE)) 162 .setScope(HConstants.REPLICATION_SCOPE_LOCAL) 163 // Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore. 164 .setBloomFilterType(BloomType.NONE) 165 .build()) 166 .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.TABLE_FAMILY) 167 .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS, 168 HConstants.DEFAULT_HBASE_META_VERSIONS)) 169 .setInMemory(true) 170 .setBlocksize(8 * 1024) 171 .setScope(HConstants.REPLICATION_SCOPE_LOCAL) 172 // Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore. 173 .setBloomFilterType(BloomType.NONE) 174 .build()) 175 .setColumnFamily(ColumnFamilyDescriptorBuilder 176 .newBuilder(HConstants.REPLICATION_BARRIER_FAMILY) 177 .setMaxVersions(HConstants.ALL_VERSIONS) 178 .setInMemory(true) 179 .setScope(HConstants.REPLICATION_SCOPE_LOCAL) 180 // Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore. 181 .setBloomFilterType(BloomType.NONE) 182 .build()) 183 .setCoprocessor(CoprocessorDescriptorBuilder.newBuilder( 184 MultiRowMutationEndpoint.class.getName()) 185 .setPriority(Coprocessor.PRIORITY_SYSTEM) 186 .build()); 187 } 188 189 @Override 190 public void setCacheOn() throws IOException { 191 this.cache.clear(); 192 this.usecache = true; 193 } 194 195 @Override 196 public void setCacheOff() throws IOException { 197 this.usecache = false; 198 this.cache.clear(); 199 } 200 201 @VisibleForTesting 202 public boolean isUsecache() { 203 return this.usecache; 204 } 205 206 /** 207 * Get the current table descriptor for the given table, or null if none exists. 208 * 209 * Uses a local cache of the descriptor but still checks the filesystem on each call 210 * to see if a newer file has been created since the cached one was read. 211 */ 212 @Override 213 @Nullable 214 public TableDescriptor get(final TableName tablename) 215 throws IOException { 216 invocations++; 217 if (usecache) { 218 // Look in cache of descriptors. 219 TableDescriptor cachedtdm = this.cache.get(tablename); 220 if (cachedtdm != null) { 221 cachehits++; 222 return cachedtdm; 223 } 224 } 225 TableDescriptor tdmt = null; 226 try { 227 tdmt = getTableDescriptorFromFs(fs, rootdir, tablename); 228 } catch (NullPointerException e) { 229 LOG.debug("Exception during readTableDecriptor. Current table name = " 230 + tablename, e); 231 } catch (TableInfoMissingException e) { 232 // ignore. This is regular operation 233 } catch (IOException ioe) { 234 LOG.debug("Exception during readTableDecriptor. Current table name = " 235 + tablename, ioe); 236 } 237 // last HTD written wins 238 if (usecache && tdmt != null) { 239 this.cache.put(tablename, tdmt); 240 } 241 242 return tdmt; 243 } 244 245 /** 246 * Returns a map from table name to table descriptor for all tables. 247 */ 248 @Override 249 public Map<String, TableDescriptor> getAll() throws IOException { 250 Map<String, TableDescriptor> tds = new TreeMap<>(); 251 if (fsvisited && usecache) { 252 for (Map.Entry<TableName, TableDescriptor> entry: this.cache.entrySet()) { 253 tds.put(entry.getKey().getNameWithNamespaceInclAsString(), entry.getValue()); 254 } 255 } else { 256 LOG.trace("Fetching table descriptors from the filesystem."); 257 boolean allvisited = true; 258 for (Path d : FSUtils.getTableDirs(fs, rootdir)) { 259 TableDescriptor htd = null; 260 try { 261 htd = get(CommonFSUtils.getTableName(d)); 262 } catch (FileNotFoundException fnfe) { 263 // inability of retrieving one HTD shouldn't stop getting the remaining 264 LOG.warn("Trouble retrieving htd", fnfe); 265 } 266 if (htd == null) { 267 allvisited = false; 268 continue; 269 } else { 270 tds.put(htd.getTableName().getNameWithNamespaceInclAsString(), htd); 271 } 272 fsvisited = allvisited; 273 } 274 } 275 return tds; 276 } 277 278 /** 279 * Find descriptors by namespace. 280 * @see #get(org.apache.hadoop.hbase.TableName) 281 */ 282 @Override 283 public Map<String, TableDescriptor> getByNamespace(String name) 284 throws IOException { 285 Map<String, TableDescriptor> htds = new TreeMap<>(); 286 List<Path> tableDirs = 287 FSUtils.getLocalTableDirs(fs, CommonFSUtils.getNamespaceDir(rootdir, name)); 288 for (Path d: tableDirs) { 289 TableDescriptor htd = null; 290 try { 291 htd = get(CommonFSUtils.getTableName(d)); 292 } catch (FileNotFoundException fnfe) { 293 // inability of retrieving one HTD shouldn't stop getting the remaining 294 LOG.warn("Trouble retrieving htd", fnfe); 295 } 296 if (htd == null) continue; 297 htds.put(CommonFSUtils.getTableName(d).getNameAsString(), htd); 298 } 299 return htds; 300 } 301 302 /** 303 * Adds (or updates) the table descriptor to the FileSystem 304 * and updates the local cache with it. 305 */ 306 @Override 307 public void update(TableDescriptor htd) throws IOException { 308 if (fsreadonly) { 309 throw new NotImplementedException("Cannot add a table descriptor - in read only mode"); 310 } 311 updateTableDescriptor(htd); 312 } 313 314 /** 315 * Removes the table descriptor from the local cache and returns it. 316 * If not in read only mode, it also deletes the entire table directory(!) 317 * from the FileSystem. 318 */ 319 @Override 320 public TableDescriptor remove(final TableName tablename) throws IOException { 321 if (fsreadonly) { 322 throw new NotImplementedException("Cannot remove a table descriptor - in read only mode"); 323 } 324 Path tabledir = getTableDir(tablename); 325 if (this.fs.exists(tabledir)) { 326 if (!this.fs.delete(tabledir, true)) { 327 throw new IOException("Failed delete of " + tabledir.toString()); 328 } 329 } 330 TableDescriptor descriptor = this.cache.remove(tablename); 331 return descriptor; 332 } 333 334 private FileStatus getTableInfoPath(Path tableDir) throws IOException { 335 return getTableInfoPath(fs, tableDir, !fsreadonly); 336 } 337 338 /** 339 * Find the most current table info file for the table located in the given table directory. 340 * 341 * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info 342 * files and takes the 'current' one - meaning the one with the highest sequence number if present 343 * or no sequence number at all if none exist (for backward compatibility from before there 344 * were sequence numbers). 345 * 346 * @return The file status of the current table info file or null if it does not exist 347 */ 348 public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir) 349 throws IOException { 350 return getTableInfoPath(fs, tableDir, false); 351 } 352 353 /** 354 * Find the most current table info file for the table in the given table directory. 355 * 356 * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info 357 * files and takes the 'current' one - meaning the one with the highest sequence number if 358 * present or no sequence number at all if none exist (for backward compatibility from before 359 * there were sequence numbers). 360 * If there are multiple table info files found and removeOldFiles is true it also deletes the 361 * older files. 362 * 363 * @return The file status of the current table info file or null if none exist 364 */ 365 private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles) 366 throws IOException { 367 Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR); 368 return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles); 369 } 370 371 /** 372 * Find the most current table info file in the given directory 373 * 374 * Looks within the given directory for any table info files 375 * and takes the 'current' one - meaning the one with the highest sequence number if present 376 * or no sequence number at all if none exist (for backward compatibility from before there 377 * were sequence numbers). 378 * If there are multiple possible files found 379 * and the we're not in read only mode it also deletes the older files. 380 * 381 * @return The file status of the current table info file or null if it does not exist 382 * @throws IOException 383 */ 384 // only visible for FSTableDescriptorMigrationToSubdir, can be removed with that 385 static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir, boolean removeOldFiles) 386 throws IOException { 387 FileStatus[] status = CommonFSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER); 388 if (status == null || status.length < 1) return null; 389 FileStatus mostCurrent = null; 390 for (FileStatus file : status) { 391 if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) { 392 mostCurrent = file; 393 } 394 } 395 if (removeOldFiles && status.length > 1) { 396 // Clean away old versions 397 for (FileStatus file : status) { 398 Path path = file.getPath(); 399 if (!file.equals(mostCurrent)) { 400 if (!fs.delete(file.getPath(), false)) { 401 LOG.warn("Failed cleanup of " + path); 402 } else { 403 LOG.debug("Cleaned up old tableinfo file " + path); 404 } 405 } 406 } 407 } 408 return mostCurrent; 409 } 410 411 /** 412 * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in 413 * reverse order. 414 */ 415 @VisibleForTesting 416 static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR = 417 new Comparator<FileStatus>() { 418 @Override 419 public int compare(FileStatus left, FileStatus right) { 420 return right.compareTo(left); 421 }}; 422 423 /** 424 * Return the table directory in HDFS 425 */ 426 @VisibleForTesting 427 Path getTableDir(final TableName tableName) { 428 return CommonFSUtils.getTableDir(rootdir, tableName); 429 } 430 431 private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() { 432 @Override 433 public boolean accept(Path p) { 434 // Accept any file that starts with TABLEINFO_NAME 435 return p.getName().startsWith(TABLEINFO_FILE_PREFIX); 436 }}; 437 438 /** 439 * Width of the sequenceid that is a suffix on a tableinfo file. 440 */ 441 @VisibleForTesting static final int WIDTH_OF_SEQUENCE_ID = 10; 442 443 /* 444 * @param number Number to use as suffix. 445 * @return Returns zero-prefixed decimal version of passed 446 * number (Does absolute in case number is negative). 447 */ 448 private static String formatTableInfoSequenceId(final int number) { 449 byte [] b = new byte[WIDTH_OF_SEQUENCE_ID]; 450 int d = Math.abs(number); 451 for (int i = b.length - 1; i >= 0; i--) { 452 b[i] = (byte)((d % 10) + '0'); 453 d /= 10; 454 } 455 return Bytes.toString(b); 456 } 457 458 /** 459 * Regex to eat up sequenceid suffix on a .tableinfo file. 460 * Use regex because may encounter oldstyle .tableinfos where there is no 461 * sequenceid on the end. 462 */ 463 private static final Pattern TABLEINFO_FILE_REGEX = 464 Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$"); 465 466 /** 467 * @param p Path to a <code>.tableinfo</code> file. 468 * @return The current editid or 0 if none found. 469 */ 470 @VisibleForTesting static int getTableInfoSequenceId(final Path p) { 471 if (p == null) return 0; 472 Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName()); 473 if (!m.matches()) throw new IllegalArgumentException(p.toString()); 474 String suffix = m.group(2); 475 if (suffix == null || suffix.length() <= 0) return 0; 476 return Integer.parseInt(m.group(2)); 477 } 478 479 /** 480 * @param sequenceid 481 * @return Name of tableinfo file. 482 */ 483 @VisibleForTesting static String getTableInfoFileName(final int sequenceid) { 484 return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid); 485 } 486 487 /** 488 * Returns the latest table descriptor for the given table directly from the file system 489 * if it exists, bypassing the local cache. 490 * Returns null if it's not found. 491 */ 492 public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, 493 Path hbaseRootDir, TableName tableName) throws IOException { 494 Path tableDir = CommonFSUtils.getTableDir(hbaseRootDir, tableName); 495 return getTableDescriptorFromFs(fs, tableDir); 496 } 497 498 /** 499 * Returns the latest table descriptor for the table located at the given directory 500 * directly from the file system if it exists. 501 * @throws TableInfoMissingException if there is no descriptor 502 */ 503 public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir) 504 throws IOException { 505 FileStatus status = getTableInfoPath(fs, tableDir, false); 506 if (status == null) { 507 throw new TableInfoMissingException("No table descriptor file under " + tableDir); 508 } 509 return readTableDescriptor(fs, status); 510 } 511 512 private static TableDescriptor readTableDescriptor(FileSystem fs, FileStatus status) 513 throws IOException { 514 int len = Ints.checkedCast(status.getLen()); 515 byte [] content = new byte[len]; 516 FSDataInputStream fsDataInputStream = fs.open(status.getPath()); 517 try { 518 fsDataInputStream.readFully(content); 519 } finally { 520 fsDataInputStream.close(); 521 } 522 TableDescriptor htd = null; 523 try { 524 htd = TableDescriptorBuilder.parseFrom(content); 525 } catch (DeserializationException e) { 526 throw new IOException("content=" + Bytes.toShort(content), e); 527 } 528 return htd; 529 } 530 531 /** 532 * Update table descriptor on the file system 533 * @throws IOException Thrown if failed update. 534 * @throws NotImplementedException if in read only mode 535 */ 536 @VisibleForTesting 537 Path updateTableDescriptor(TableDescriptor td) throws IOException { 538 if (fsreadonly) { 539 throw new NotImplementedException("Cannot update a table descriptor - in read only mode"); 540 } 541 TableName tableName = td.getTableName(); 542 Path tableDir = getTableDir(tableName); 543 Path p = writeTableDescriptor(fs, td, tableDir, getTableInfoPath(tableDir)); 544 if (p == null) { 545 throw new IOException("Failed update"); 546 } 547 LOG.info("Updated tableinfo=" + p); 548 if (usecache) { 549 this.cache.put(td.getTableName(), td); 550 } 551 return p; 552 } 553 554 /** 555 * Deletes files matching the table info file pattern within the given directory 556 * whose sequenceId is at most the given max sequenceId. 557 */ 558 private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId) 559 throws IOException { 560 FileStatus [] status = CommonFSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER); 561 for (FileStatus file : status) { 562 Path path = file.getPath(); 563 int sequenceId = getTableInfoSequenceId(path); 564 if (sequenceId <= maxSequenceId) { 565 boolean success = CommonFSUtils.delete(fs, path, false); 566 if (success) { 567 LOG.debug("Deleted " + path); 568 } else { 569 LOG.error("Failed to delete table descriptor at " + path); 570 } 571 } 572 } 573 } 574 575 /** 576 * Attempts to write a new table descriptor to the given table's directory. 577 * It first writes it to the .tmp dir then uses an atomic rename to move it into place. 578 * It begins at the currentSequenceId + 1 and tries 10 times to find a new sequence number 579 * not already in use. 580 * Removes the current descriptor file if passed in. 581 * 582 * @return Descriptor file or null if we failed write. 583 */ 584 private static Path writeTableDescriptor(final FileSystem fs, 585 final TableDescriptor htd, final Path tableDir, 586 final FileStatus currentDescriptorFile) 587 throws IOException { 588 // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon. 589 // This directory is never removed to avoid removing it out from under a concurrent writer. 590 Path tmpTableDir = new Path(tableDir, TMP_DIR); 591 Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR); 592 593 // What is current sequenceid? We read the current sequenceid from 594 // the current file. After we read it, another thread could come in and 595 // compete with us writing out next version of file. The below retries 596 // should help in this case some but its hard to do guarantees in face of 597 // concurrent schema edits. 598 int currentSequenceId = currentDescriptorFile == null ? 0 : 599 getTableInfoSequenceId(currentDescriptorFile.getPath()); 600 int newSequenceId = currentSequenceId; 601 602 // Put arbitrary upperbound on how often we retry 603 int retries = 10; 604 int retrymax = currentSequenceId + retries; 605 Path tableInfoDirPath = null; 606 do { 607 newSequenceId += 1; 608 String filename = getTableInfoFileName(newSequenceId); 609 Path tempPath = new Path(tmpTableDir, filename); 610 if (fs.exists(tempPath)) { 611 LOG.debug(tempPath + " exists; retrying up to " + retries + " times"); 612 continue; 613 } 614 tableInfoDirPath = new Path(tableInfoDir, filename); 615 try { 616 writeTD(fs, tempPath, htd); 617 fs.mkdirs(tableInfoDirPath.getParent()); 618 if (!fs.rename(tempPath, tableInfoDirPath)) { 619 throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath); 620 } 621 LOG.debug("Wrote into " + tableInfoDirPath); 622 } catch (IOException ioe) { 623 // Presume clash of names or something; go around again. 624 LOG.debug("Failed write and/or rename; retrying", ioe); 625 if (!CommonFSUtils.deleteDirectory(fs, tempPath)) { 626 LOG.warn("Failed cleanup of " + tempPath); 627 } 628 tableInfoDirPath = null; 629 continue; 630 } 631 break; 632 } while (newSequenceId < retrymax); 633 if (tableInfoDirPath != null) { 634 // if we succeeded, remove old table info files. 635 deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1); 636 } 637 return tableInfoDirPath; 638 } 639 640 private static void writeTD(final FileSystem fs, final Path p, final TableDescriptor htd) 641 throws IOException { 642 FSDataOutputStream out = fs.create(p, false); 643 try { 644 // We used to write this file out as a serialized HTD Writable followed by two '\n's and then 645 // the toString version of HTD. Now we just write out the pb serialization. 646 out.write(TableDescriptorBuilder.toByteArray(htd)); 647 } finally { 648 out.close(); 649 } 650 } 651 652 /** 653 * Create new TableDescriptor in HDFS. Happens when we are creating table. 654 * Used by tests. 655 * @return True if we successfully created file. 656 */ 657 public boolean createTableDescriptor(TableDescriptor htd) throws IOException { 658 return createTableDescriptor(htd, false); 659 } 660 661 /** 662 * Create new TableDescriptor in HDFS. Happens when we are creating table. If 663 * forceCreation is true then even if previous table descriptor is present it 664 * will be overwritten 665 * 666 * @return True if we successfully created file. 667 */ 668 public boolean createTableDescriptor(TableDescriptor htd, boolean forceCreation) 669 throws IOException { 670 Path tableDir = getTableDir(htd.getTableName()); 671 return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation); 672 } 673 674 /** 675 * Create a new TableDescriptor in HDFS in the specified table directory. Happens when we create 676 * a new table during cluster start or in Clone and Create Table Procedures. Checks readOnly flag 677 * passed on construction. 678 * @param tableDir table directory under which we should write the file 679 * @param htd description of the table to write 680 * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will 681 * be overwritten 682 * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file 683 * already exists and we weren't forcing the descriptor creation. 684 * @throws IOException if a filesystem error occurs 685 */ 686 public boolean createTableDescriptorForTableDirectory(Path tableDir, TableDescriptor htd, 687 boolean forceCreation) throws IOException { 688 if (this.fsreadonly) { 689 throw new NotImplementedException("Cannot create a table descriptor - in read only mode"); 690 } 691 return createTableDescriptorForTableDirectory(this.fs, tableDir, htd, forceCreation); 692 } 693 694 /** 695 * Create a new TableDescriptor in HDFS in the specified table directory. Happens when we create 696 * a new table snapshoting. Does not enforce read-only. That is for caller to determine. 697 * @param fs Filesystem to use. 698 * @param tableDir table directory under which we should write the file 699 * @param htd description of the table to write 700 * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will 701 * be overwritten 702 * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file 703 * already exists and we weren't forcing the descriptor creation. 704 * @throws IOException if a filesystem error occurs 705 */ 706 public static boolean createTableDescriptorForTableDirectory(FileSystem fs, Path tableDir, 707 TableDescriptor htd, boolean forceCreation) throws IOException { 708 FileStatus status = getTableInfoPath(fs, tableDir); 709 if (status != null) { 710 LOG.debug("Current path=" + status.getPath()); 711 if (!forceCreation) { 712 if (fs.exists(status.getPath()) && status.getLen() > 0) { 713 if (readTableDescriptor(fs, status).equals(htd)) { 714 LOG.trace("TableInfo already exists.. Skipping creation"); 715 return false; 716 } 717 } 718 } 719 } 720 return writeTableDescriptor(fs, htd, tableDir, status) != null; 721 } 722} 723