001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.util.Comparator; 023import java.util.List; 024import java.util.Map; 025import java.util.TreeMap; 026import java.util.concurrent.ConcurrentHashMap; 027import java.util.function.Function; 028import java.util.regex.Matcher; 029import java.util.regex.Pattern; 030 031import edu.umd.cs.findbugs.annotations.Nullable; 032import org.apache.commons.lang3.NotImplementedException; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.fs.FSDataInputStream; 035import org.apache.hadoop.fs.FSDataOutputStream; 036import org.apache.hadoop.fs.FileStatus; 037import org.apache.hadoop.fs.FileSystem; 038import org.apache.hadoop.fs.Path; 039import org.apache.hadoop.fs.PathFilter; 040import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder; 041import org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint; 042import org.apache.yetus.audience.InterfaceAudience; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 046import org.apache.hadoop.hbase.client.TableDescriptor; 047import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 048import org.apache.hadoop.hbase.Coprocessor; 049import org.apache.hadoop.hbase.exceptions.DeserializationException; 050import org.apache.hadoop.hbase.HConstants; 051import org.apache.hadoop.hbase.regionserver.BloomType; 052import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 053import org.apache.hbase.thirdparty.com.google.common.primitives.Ints; 054import org.apache.hadoop.hbase.TableDescriptors; 055import org.apache.hadoop.hbase.TableInfoMissingException; 056import org.apache.hadoop.hbase.TableName; 057 058/** 059 * Implementation of {@link TableDescriptors} that reads descriptors from the 060 * passed filesystem. It expects descriptors to be in a file in the 061 * {@link #TABLEINFO_DIR} subdir of the table's directory in FS. Can be read-only 062 * -- i.e. does not modify the filesystem or can be read and write. 063 * 064 * <p>Also has utility for keeping up the table descriptors tableinfo file. 065 * The table schema file is kept in the {@link #TABLEINFO_DIR} subdir 066 * of the table directory in the filesystem. 067 * It has a {@link #TABLEINFO_FILE_PREFIX} and then a suffix that is the 068 * edit sequenceid: e.g. <code>.tableinfo.0000000003</code>. This sequenceid 069 * is always increasing. It starts at zero. The table schema file with the 070 * highest sequenceid has the most recent schema edit. Usually there is one file 071 * only, the most recent but there may be short periods where there are more 072 * than one file. Old files are eventually cleaned. Presumption is that there 073 * will not be lots of concurrent clients making table schema edits. If so, 074 * the below needs a bit of a reworking and perhaps some supporting api in hdfs. 075 */ 076@InterfaceAudience.Private 077public class FSTableDescriptors implements TableDescriptors { 078 private static final Logger LOG = LoggerFactory.getLogger(FSTableDescriptors.class); 079 private final FileSystem fs; 080 private final Path rootdir; 081 private final boolean fsreadonly; 082 private volatile boolean usecache; 083 private volatile boolean fsvisited; 084 085 @VisibleForTesting 086 long cachehits = 0; 087 @VisibleForTesting 088 long invocations = 0; 089 090 /** 091 * The file name prefix used to store HTD in HDFS 092 */ 093 static final String TABLEINFO_FILE_PREFIX = ".tableinfo"; 094 static final String TABLEINFO_DIR = ".tabledesc"; 095 static final String TMP_DIR = ".tmp"; 096 097 // This cache does not age out the old stuff. Thinking is that the amount 098 // of data we keep up in here is so small, no need to do occasional purge. 099 // TODO. 100 private final Map<TableName, TableDescriptor> cache = new ConcurrentHashMap<>(); 101 102 /** 103 * Table descriptor for <code>hbase:meta</code> catalog table 104 */ 105 private final TableDescriptor metaTableDescriptor; 106 107 /** 108 * Construct a FSTableDescriptors instance using the hbase root dir of the given 109 * conf and the filesystem where that root dir lives. 110 * This instance can do write operations (is not read only). 111 */ 112 public FSTableDescriptors(final Configuration conf) throws IOException { 113 this(conf, FSUtils.getCurrentFileSystem(conf), FSUtils.getRootDir(conf)); 114 } 115 116 public FSTableDescriptors(final Configuration conf, final FileSystem fs, final Path rootdir) 117 throws IOException { 118 this(conf, fs, rootdir, false, true); 119 } 120 121 /** 122 * @param fsreadonly True if we are read-only when it comes to filesystem 123 * operations; i.e. on remove, we do not do delete in fs. 124 */ 125 public FSTableDescriptors(final Configuration conf, final FileSystem fs, 126 final Path rootdir, final boolean fsreadonly, final boolean usecache) throws IOException { 127 this(conf, fs, rootdir, fsreadonly, usecache, null); 128 } 129 130 /** 131 * @param fsreadonly True if we are read-only when it comes to filesystem 132 * operations; i.e. on remove, we do not do delete in fs. 133 * @param metaObserver Used by HMaster. It need to modify the META_REPLICAS_NUM for meta table descriptor. 134 * see HMaster#finishActiveMasterInitialization 135 * TODO: This is a workaround. Should remove this ugly code... 136 */ 137 public FSTableDescriptors(final Configuration conf, final FileSystem fs, 138 final Path rootdir, final boolean fsreadonly, final boolean usecache, 139 Function<TableDescriptorBuilder, TableDescriptorBuilder> metaObserver) throws IOException { 140 this.fs = fs; 141 this.rootdir = rootdir; 142 this.fsreadonly = fsreadonly; 143 this.usecache = usecache; 144 this.metaTableDescriptor = metaObserver == null ? createMetaTableDescriptor(conf) 145 : metaObserver.apply(createMetaTableDescriptorBuilder(conf)).build(); 146 } 147 148 @VisibleForTesting 149 public static TableDescriptorBuilder createMetaTableDescriptorBuilder(final Configuration conf) throws IOException { 150 // TODO We used to set CacheDataInL1 for META table. When we have BucketCache in file mode, now 151 // the META table data goes to File mode BC only. Test how that affect the system. If too much, 152 // we have to rethink about adding back the setCacheDataInL1 for META table CFs. 153 return TableDescriptorBuilder.newBuilder(TableName.META_TABLE_NAME) 154 .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.CATALOG_FAMILY) 155 .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS, 156 HConstants.DEFAULT_HBASE_META_VERSIONS)) 157 .setInMemory(true) 158 .setBlocksize(conf.getInt(HConstants.HBASE_META_BLOCK_SIZE, 159 HConstants.DEFAULT_HBASE_META_BLOCK_SIZE)) 160 .setScope(HConstants.REPLICATION_SCOPE_LOCAL) 161 // Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore. 162 .setBloomFilterType(BloomType.NONE) 163 .build()) 164 .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(HConstants.TABLE_FAMILY) 165 .setMaxVersions(conf.getInt(HConstants.HBASE_META_VERSIONS, 166 HConstants.DEFAULT_HBASE_META_VERSIONS)) 167 .setInMemory(true) 168 .setBlocksize(8 * 1024) 169 .setScope(HConstants.REPLICATION_SCOPE_LOCAL) 170 // Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore. 171 .setBloomFilterType(BloomType.NONE) 172 .build()) 173 .setColumnFamily(ColumnFamilyDescriptorBuilder 174 .newBuilder(HConstants.REPLICATION_BARRIER_FAMILY) 175 .setMaxVersions(HConstants.ALL_VERSIONS) 176 .setInMemory(true) 177 .setScope(HConstants.REPLICATION_SCOPE_LOCAL) 178 // Disable blooms for meta. Needs work. Seems to mess w/ getClosestOrBefore. 179 .setBloomFilterType(BloomType.NONE) 180 .build()) 181 .setCoprocessor(CoprocessorDescriptorBuilder.newBuilder( 182 MultiRowMutationEndpoint.class.getName()) 183 .setPriority(Coprocessor.PRIORITY_SYSTEM) 184 .build()); 185 } 186 187 @VisibleForTesting 188 public static TableDescriptor createMetaTableDescriptor(final Configuration conf) 189 throws IOException { 190 return createMetaTableDescriptorBuilder(conf).build(); 191 } 192 193 @Override 194 public void setCacheOn() throws IOException { 195 this.cache.clear(); 196 this.usecache = true; 197 } 198 199 @Override 200 public void setCacheOff() throws IOException { 201 this.usecache = false; 202 this.cache.clear(); 203 } 204 205 @VisibleForTesting 206 public boolean isUsecache() { 207 return this.usecache; 208 } 209 210 /** 211 * Get the current table descriptor for the given table, or null if none exists. 212 * 213 * Uses a local cache of the descriptor but still checks the filesystem on each call 214 * to see if a newer file has been created since the cached one was read. 215 */ 216 @Override 217 @Nullable 218 public TableDescriptor get(final TableName tablename) 219 throws IOException { 220 invocations++; 221 if (TableName.META_TABLE_NAME.equals(tablename)) { 222 cachehits++; 223 return metaTableDescriptor; 224 } 225 // hbase:meta is already handled. If some one tries to get the descriptor for 226 // .logs, .oldlogs or .corrupt throw an exception. 227 if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tablename.getNameAsString())) { 228 throw new IOException("No descriptor found for non table = " + tablename); 229 } 230 231 if (usecache) { 232 // Look in cache of descriptors. 233 TableDescriptor cachedtdm = this.cache.get(tablename); 234 if (cachedtdm != null) { 235 cachehits++; 236 return cachedtdm; 237 } 238 } 239 TableDescriptor tdmt = null; 240 try { 241 tdmt = getTableDescriptorFromFs(fs, rootdir, tablename); 242 } catch (NullPointerException e) { 243 LOG.debug("Exception during readTableDecriptor. Current table name = " 244 + tablename, e); 245 } catch (TableInfoMissingException e) { 246 // ignore. This is regular operation 247 } catch (IOException ioe) { 248 LOG.debug("Exception during readTableDecriptor. Current table name = " 249 + tablename, ioe); 250 } 251 // last HTD written wins 252 if (usecache && tdmt != null) { 253 this.cache.put(tablename, tdmt); 254 } 255 256 return tdmt; 257 } 258 259 /** 260 * Returns a map from table name to table descriptor for all tables. 261 */ 262 @Override 263 public Map<String, TableDescriptor> getAll() 264 throws IOException { 265 Map<String, TableDescriptor> tds = new TreeMap<>(); 266 267 if (fsvisited && usecache) { 268 for (Map.Entry<TableName, TableDescriptor> entry: this.cache.entrySet()) { 269 tds.put(entry.getKey().getNameWithNamespaceInclAsString(), entry.getValue()); 270 } 271 // add hbase:meta to the response 272 tds.put(this.metaTableDescriptor.getTableName().getNameAsString(), metaTableDescriptor); 273 } else { 274 LOG.trace("Fetching table descriptors from the filesystem."); 275 boolean allvisited = true; 276 for (Path d : FSUtils.getTableDirs(fs, rootdir)) { 277 TableDescriptor htd = null; 278 try { 279 htd = get(FSUtils.getTableName(d)); 280 } catch (FileNotFoundException fnfe) { 281 // inability of retrieving one HTD shouldn't stop getting the remaining 282 LOG.warn("Trouble retrieving htd", fnfe); 283 } 284 if (htd == null) { 285 allvisited = false; 286 continue; 287 } else { 288 tds.put(htd.getTableName().getNameWithNamespaceInclAsString(), htd); 289 } 290 fsvisited = allvisited; 291 } 292 } 293 return tds; 294 } 295 296 /** 297 * Find descriptors by namespace. 298 * @see #get(org.apache.hadoop.hbase.TableName) 299 */ 300 @Override 301 public Map<String, TableDescriptor> getByNamespace(String name) 302 throws IOException { 303 Map<String, TableDescriptor> htds = new TreeMap<>(); 304 List<Path> tableDirs = 305 FSUtils.getLocalTableDirs(fs, FSUtils.getNamespaceDir(rootdir, name)); 306 for (Path d: tableDirs) { 307 TableDescriptor htd = null; 308 try { 309 htd = get(FSUtils.getTableName(d)); 310 } catch (FileNotFoundException fnfe) { 311 // inability of retrieving one HTD shouldn't stop getting the remaining 312 LOG.warn("Trouble retrieving htd", fnfe); 313 } 314 if (htd == null) continue; 315 htds.put(FSUtils.getTableName(d).getNameAsString(), htd); 316 } 317 return htds; 318 } 319 320 /** 321 * Adds (or updates) the table descriptor to the FileSystem 322 * and updates the local cache with it. 323 */ 324 @Override 325 public void add(TableDescriptor htd) throws IOException { 326 if (fsreadonly) { 327 throw new NotImplementedException("Cannot add a table descriptor - in read only mode"); 328 } 329 TableName tableName = htd.getTableName(); 330 if (TableName.META_TABLE_NAME.equals(tableName)) { 331 throw new NotImplementedException(HConstants.NOT_IMPLEMENTED); 332 } 333 if (HConstants.HBASE_NON_USER_TABLE_DIRS.contains(tableName.getNameAsString())) { 334 throw new NotImplementedException( 335 "Cannot add a table descriptor for a reserved subdirectory name: " 336 + htd.getTableName().getNameAsString()); 337 } 338 updateTableDescriptor(htd); 339 } 340 341 /** 342 * Removes the table descriptor from the local cache and returns it. 343 * If not in read only mode, it also deletes the entire table directory(!) 344 * from the FileSystem. 345 */ 346 @Override 347 public TableDescriptor remove(final TableName tablename) 348 throws IOException { 349 if (fsreadonly) { 350 throw new NotImplementedException("Cannot remove a table descriptor - in read only mode"); 351 } 352 Path tabledir = getTableDir(tablename); 353 if (this.fs.exists(tabledir)) { 354 if (!this.fs.delete(tabledir, true)) { 355 throw new IOException("Failed delete of " + tabledir.toString()); 356 } 357 } 358 TableDescriptor descriptor = this.cache.remove(tablename); 359 return descriptor; 360 } 361 362 /** 363 * Checks if a current table info file exists for the given table 364 * 365 * @param tableName name of table 366 * @return true if exists 367 * @throws IOException 368 */ 369 public boolean isTableInfoExists(TableName tableName) throws IOException { 370 return getTableInfoPath(tableName) != null; 371 } 372 373 /** 374 * Find the most current table info file for the given table in the hbase root directory. 375 * @return The file status of the current table info file or null if it does not exist 376 */ 377 private FileStatus getTableInfoPath(final TableName tableName) throws IOException { 378 Path tableDir = getTableDir(tableName); 379 return getTableInfoPath(tableDir); 380 } 381 382 private FileStatus getTableInfoPath(Path tableDir) 383 throws IOException { 384 return getTableInfoPath(fs, tableDir, !fsreadonly); 385 } 386 387 /** 388 * Find the most current table info file for the table located in the given table directory. 389 * 390 * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info 391 * files and takes the 'current' one - meaning the one with the highest sequence number if present 392 * or no sequence number at all if none exist (for backward compatibility from before there 393 * were sequence numbers). 394 * 395 * @return The file status of the current table info file or null if it does not exist 396 * @throws IOException 397 */ 398 public static FileStatus getTableInfoPath(FileSystem fs, Path tableDir) 399 throws IOException { 400 return getTableInfoPath(fs, tableDir, false); 401 } 402 403 /** 404 * Find the most current table info file for the table in the given table directory. 405 * 406 * Looks within the {@link #TABLEINFO_DIR} subdirectory of the given directory for any table info 407 * files and takes the 'current' one - meaning the one with the highest sequence number if 408 * present or no sequence number at all if none exist (for backward compatibility from before 409 * there were sequence numbers). 410 * If there are multiple table info files found and removeOldFiles is true it also deletes the 411 * older files. 412 * 413 * @return The file status of the current table info file or null if none exist 414 * @throws IOException 415 */ 416 private static FileStatus getTableInfoPath(FileSystem fs, Path tableDir, boolean removeOldFiles) 417 throws IOException { 418 Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR); 419 return getCurrentTableInfoStatus(fs, tableInfoDir, removeOldFiles); 420 } 421 422 /** 423 * Find the most current table info file in the given directory 424 * 425 * Looks within the given directory for any table info files 426 * and takes the 'current' one - meaning the one with the highest sequence number if present 427 * or no sequence number at all if none exist (for backward compatibility from before there 428 * were sequence numbers). 429 * If there are multiple possible files found 430 * and the we're not in read only mode it also deletes the older files. 431 * 432 * @return The file status of the current table info file or null if it does not exist 433 * @throws IOException 434 */ 435 // only visible for FSTableDescriptorMigrationToSubdir, can be removed with that 436 static FileStatus getCurrentTableInfoStatus(FileSystem fs, Path dir, boolean removeOldFiles) 437 throws IOException { 438 FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER); 439 if (status == null || status.length < 1) return null; 440 FileStatus mostCurrent = null; 441 for (FileStatus file : status) { 442 if (mostCurrent == null || TABLEINFO_FILESTATUS_COMPARATOR.compare(file, mostCurrent) < 0) { 443 mostCurrent = file; 444 } 445 } 446 if (removeOldFiles && status.length > 1) { 447 // Clean away old versions 448 for (FileStatus file : status) { 449 Path path = file.getPath(); 450 if (!file.equals(mostCurrent)) { 451 if (!fs.delete(file.getPath(), false)) { 452 LOG.warn("Failed cleanup of " + path); 453 } else { 454 LOG.debug("Cleaned up old tableinfo file " + path); 455 } 456 } 457 } 458 } 459 return mostCurrent; 460 } 461 462 /** 463 * Compare {@link FileStatus} instances by {@link Path#getName()}. Returns in 464 * reverse order. 465 */ 466 @VisibleForTesting 467 static final Comparator<FileStatus> TABLEINFO_FILESTATUS_COMPARATOR = 468 new Comparator<FileStatus>() { 469 @Override 470 public int compare(FileStatus left, FileStatus right) { 471 return right.compareTo(left); 472 }}; 473 474 /** 475 * Return the table directory in HDFS 476 */ 477 @VisibleForTesting Path getTableDir(final TableName tableName) { 478 return FSUtils.getTableDir(rootdir, tableName); 479 } 480 481 private static final PathFilter TABLEINFO_PATHFILTER = new PathFilter() { 482 @Override 483 public boolean accept(Path p) { 484 // Accept any file that starts with TABLEINFO_NAME 485 return p.getName().startsWith(TABLEINFO_FILE_PREFIX); 486 }}; 487 488 /** 489 * Width of the sequenceid that is a suffix on a tableinfo file. 490 */ 491 @VisibleForTesting static final int WIDTH_OF_SEQUENCE_ID = 10; 492 493 /* 494 * @param number Number to use as suffix. 495 * @return Returns zero-prefixed decimal version of passed 496 * number (Does absolute in case number is negative). 497 */ 498 private static String formatTableInfoSequenceId(final int number) { 499 byte [] b = new byte[WIDTH_OF_SEQUENCE_ID]; 500 int d = Math.abs(number); 501 for (int i = b.length - 1; i >= 0; i--) { 502 b[i] = (byte)((d % 10) + '0'); 503 d /= 10; 504 } 505 return Bytes.toString(b); 506 } 507 508 /** 509 * Regex to eat up sequenceid suffix on a .tableinfo file. 510 * Use regex because may encounter oldstyle .tableinfos where there is no 511 * sequenceid on the end. 512 */ 513 private static final Pattern TABLEINFO_FILE_REGEX = 514 Pattern.compile(TABLEINFO_FILE_PREFIX + "(\\.([0-9]{" + WIDTH_OF_SEQUENCE_ID + "}))?$"); 515 516 /** 517 * @param p Path to a <code>.tableinfo</code> file. 518 * @return The current editid or 0 if none found. 519 */ 520 @VisibleForTesting static int getTableInfoSequenceId(final Path p) { 521 if (p == null) return 0; 522 Matcher m = TABLEINFO_FILE_REGEX.matcher(p.getName()); 523 if (!m.matches()) throw new IllegalArgumentException(p.toString()); 524 String suffix = m.group(2); 525 if (suffix == null || suffix.length() <= 0) return 0; 526 return Integer.parseInt(m.group(2)); 527 } 528 529 /** 530 * @param sequenceid 531 * @return Name of tableinfo file. 532 */ 533 @VisibleForTesting static String getTableInfoFileName(final int sequenceid) { 534 return TABLEINFO_FILE_PREFIX + "." + formatTableInfoSequenceId(sequenceid); 535 } 536 537 /** 538 * Returns the latest table descriptor for the given table directly from the file system 539 * if it exists, bypassing the local cache. 540 * Returns null if it's not found. 541 */ 542 public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, 543 Path hbaseRootDir, TableName tableName) throws IOException { 544 Path tableDir = FSUtils.getTableDir(hbaseRootDir, tableName); 545 return getTableDescriptorFromFs(fs, tableDir); 546 } 547 548 /** 549 * Returns the latest table descriptor for the table located at the given directory 550 * directly from the file system if it exists. 551 * @throws TableInfoMissingException if there is no descriptor 552 */ 553 public static TableDescriptor getTableDescriptorFromFs(FileSystem fs, Path tableDir) 554 throws IOException { 555 FileStatus status = getTableInfoPath(fs, tableDir, false); 556 if (status == null) { 557 throw new TableInfoMissingException("No table descriptor file under " + tableDir); 558 } 559 return readTableDescriptor(fs, status); 560 } 561 562 private static TableDescriptor readTableDescriptor(FileSystem fs, FileStatus status) 563 throws IOException { 564 int len = Ints.checkedCast(status.getLen()); 565 byte [] content = new byte[len]; 566 FSDataInputStream fsDataInputStream = fs.open(status.getPath()); 567 try { 568 fsDataInputStream.readFully(content); 569 } finally { 570 fsDataInputStream.close(); 571 } 572 TableDescriptor htd = null; 573 try { 574 htd = TableDescriptorBuilder.parseFrom(content); 575 } catch (DeserializationException e) { 576 throw new IOException("content=" + Bytes.toShort(content), e); 577 } 578 return htd; 579 } 580 581 /** 582 * Update table descriptor on the file system 583 * @throws IOException Thrown if failed update. 584 * @throws NotImplementedException if in read only mode 585 */ 586 @VisibleForTesting Path updateTableDescriptor(TableDescriptor td) 587 throws IOException { 588 if (fsreadonly) { 589 throw new NotImplementedException("Cannot update a table descriptor - in read only mode"); 590 } 591 TableName tableName = td.getTableName(); 592 Path tableDir = getTableDir(tableName); 593 Path p = writeTableDescriptor(fs, td, tableDir, getTableInfoPath(tableDir)); 594 if (p == null) throw new IOException("Failed update"); 595 LOG.info("Updated tableinfo=" + p); 596 if (usecache) { 597 this.cache.put(td.getTableName(), td); 598 } 599 return p; 600 } 601 602 /** 603 * Deletes all the table descriptor files from the file system. 604 * Used in unit tests only. 605 * @throws NotImplementedException if in read only mode 606 */ 607 public void deleteTableDescriptorIfExists(TableName tableName) throws IOException { 608 if (fsreadonly) { 609 throw new NotImplementedException("Cannot delete a table descriptor - in read only mode"); 610 } 611 612 Path tableDir = getTableDir(tableName); 613 Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR); 614 deleteTableDescriptorFiles(fs, tableInfoDir, Integer.MAX_VALUE); 615 } 616 617 /** 618 * Deletes files matching the table info file pattern within the given directory 619 * whose sequenceId is at most the given max sequenceId. 620 */ 621 private static void deleteTableDescriptorFiles(FileSystem fs, Path dir, int maxSequenceId) 622 throws IOException { 623 FileStatus [] status = FSUtils.listStatus(fs, dir, TABLEINFO_PATHFILTER); 624 for (FileStatus file : status) { 625 Path path = file.getPath(); 626 int sequenceId = getTableInfoSequenceId(path); 627 if (sequenceId <= maxSequenceId) { 628 boolean success = FSUtils.delete(fs, path, false); 629 if (success) { 630 LOG.debug("Deleted " + path); 631 } else { 632 LOG.error("Failed to delete table descriptor at " + path); 633 } 634 } 635 } 636 } 637 638 /** 639 * Attempts to write a new table descriptor to the given table's directory. 640 * It first writes it to the .tmp dir then uses an atomic rename to move it into place. 641 * It begins at the currentSequenceId + 1 and tries 10 times to find a new sequence number 642 * not already in use. 643 * Removes the current descriptor file if passed in. 644 * 645 * @return Descriptor file or null if we failed write. 646 */ 647 private static Path writeTableDescriptor(final FileSystem fs, 648 final TableDescriptor htd, final Path tableDir, 649 final FileStatus currentDescriptorFile) 650 throws IOException { 651 // Get temporary dir into which we'll first write a file to avoid half-written file phenomenon. 652 // This directory is never removed to avoid removing it out from under a concurrent writer. 653 Path tmpTableDir = new Path(tableDir, TMP_DIR); 654 Path tableInfoDir = new Path(tableDir, TABLEINFO_DIR); 655 656 // What is current sequenceid? We read the current sequenceid from 657 // the current file. After we read it, another thread could come in and 658 // compete with us writing out next version of file. The below retries 659 // should help in this case some but its hard to do guarantees in face of 660 // concurrent schema edits. 661 int currentSequenceId = currentDescriptorFile == null ? 0 : 662 getTableInfoSequenceId(currentDescriptorFile.getPath()); 663 int newSequenceId = currentSequenceId; 664 665 // Put arbitrary upperbound on how often we retry 666 int retries = 10; 667 int retrymax = currentSequenceId + retries; 668 Path tableInfoDirPath = null; 669 do { 670 newSequenceId += 1; 671 String filename = getTableInfoFileName(newSequenceId); 672 Path tempPath = new Path(tmpTableDir, filename); 673 if (fs.exists(tempPath)) { 674 LOG.debug(tempPath + " exists; retrying up to " + retries + " times"); 675 continue; 676 } 677 tableInfoDirPath = new Path(tableInfoDir, filename); 678 try { 679 writeTD(fs, tempPath, htd); 680 fs.mkdirs(tableInfoDirPath.getParent()); 681 if (!fs.rename(tempPath, tableInfoDirPath)) { 682 throw new IOException("Failed rename of " + tempPath + " to " + tableInfoDirPath); 683 } 684 LOG.debug("Wrote into " + tableInfoDirPath); 685 } catch (IOException ioe) { 686 // Presume clash of names or something; go around again. 687 LOG.debug("Failed write and/or rename; retrying", ioe); 688 if (!FSUtils.deleteDirectory(fs, tempPath)) { 689 LOG.warn("Failed cleanup of " + tempPath); 690 } 691 tableInfoDirPath = null; 692 continue; 693 } 694 break; 695 } while (newSequenceId < retrymax); 696 if (tableInfoDirPath != null) { 697 // if we succeeded, remove old table info files. 698 deleteTableDescriptorFiles(fs, tableInfoDir, newSequenceId - 1); 699 } 700 return tableInfoDirPath; 701 } 702 703 private static void writeTD(final FileSystem fs, final Path p, final TableDescriptor htd) 704 throws IOException { 705 FSDataOutputStream out = fs.create(p, false); 706 try { 707 // We used to write this file out as a serialized HTD Writable followed by two '\n's and then 708 // the toString version of HTD. Now we just write out the pb serialization. 709 out.write(TableDescriptorBuilder.toByteArray(htd)); 710 } finally { 711 out.close(); 712 } 713 } 714 715 /** 716 * Create new TableDescriptor in HDFS. Happens when we are creating table. 717 * Used by tests. 718 * @return True if we successfully created file. 719 */ 720 public boolean createTableDescriptor(TableDescriptor htd) throws IOException { 721 return createTableDescriptor(htd, false); 722 } 723 724 /** 725 * Create new TableDescriptor in HDFS. Happens when we are creating table. If 726 * forceCreation is true then even if previous table descriptor is present it 727 * will be overwritten 728 * 729 * @return True if we successfully created file. 730 */ 731 public boolean createTableDescriptor(TableDescriptor htd, boolean forceCreation) 732 throws IOException { 733 Path tableDir = getTableDir(htd.getTableName()); 734 return createTableDescriptorForTableDirectory(tableDir, htd, forceCreation); 735 } 736 737 /** 738 * Create a new TableDescriptor in HDFS in the specified table directory. Happens when we create 739 * a new table or snapshot a table. 740 * @param tableDir table directory under which we should write the file 741 * @param htd description of the table to write 742 * @param forceCreation if <tt>true</tt>,then even if previous table descriptor is present it will 743 * be overwritten 744 * @return <tt>true</tt> if the we successfully created the file, <tt>false</tt> if the file 745 * already exists and we weren't forcing the descriptor creation. 746 * @throws IOException if a filesystem error occurs 747 */ 748 public boolean createTableDescriptorForTableDirectory(Path tableDir, 749 TableDescriptor htd, boolean forceCreation) throws IOException { 750 if (fsreadonly) { 751 throw new NotImplementedException("Cannot create a table descriptor - in read only mode"); 752 } 753 FileStatus status = getTableInfoPath(fs, tableDir); 754 if (status != null) { 755 LOG.debug("Current path=" + status.getPath()); 756 if (!forceCreation) { 757 if (fs.exists(status.getPath()) && status.getLen() > 0) { 758 if (readTableDescriptor(fs, status).equals(htd)) { 759 LOG.trace("TableInfo already exists.. Skipping creation"); 760 return false; 761 } 762 } 763 } 764 } 765 Path p = writeTableDescriptor(fs, htd, tableDir, status); 766 return p != null; 767 } 768 769} 770