001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.regionserver; 021 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.util.concurrent.atomic.AtomicInteger; 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.fs.FileStatus; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.HDFSBlocksDistribution; 033import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 034import org.apache.hadoop.hbase.io.HFileLink; 035import org.apache.hadoop.hbase.io.HalfStoreFileReader; 036import org.apache.hadoop.hbase.io.Reference; 037import org.apache.hadoop.hbase.io.hfile.CacheConfig; 038import org.apache.hadoop.hbase.io.hfile.HFileInfo; 039import org.apache.hadoop.hbase.io.hfile.ReaderContext; 040import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType; 041import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; 042import org.apache.hadoop.hbase.util.FSUtils; 043import org.apache.yetus.audience.InterfaceAudience; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047/** 048 * Describe a StoreFile (hfile, reference, link) 049 */ 050@InterfaceAudience.Private 051public class StoreFileInfo { 052 private static final Logger LOG = LoggerFactory.getLogger(StoreFileInfo.class); 053 054 /** 055 * A non-capture group, for hfiles, so that this can be embedded. 056 * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix. 057 * The mob del file has (_del) as suffix. 058 */ 059 public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?"; 060 061 /** Regex that will work for hfiles */ 062 private static final Pattern HFILE_NAME_PATTERN = 063 Pattern.compile("^(" + HFILE_NAME_REGEX + ")"); 064 065 /** 066 * A non-capture group, for del files, so that this can be embedded. 067 * A del file has (_del) as suffix. 068 */ 069 public static final String DELFILE_NAME_REGEX = "[0-9a-f]+(?:_del)"; 070 071 /** Regex that will work for del files */ 072 private static final Pattern DELFILE_NAME_PATTERN = 073 Pattern.compile("^(" + DELFILE_NAME_REGEX + ")"); 074 075 /** 076 * Regex that will work for straight reference names ({@code <hfile>.<parentEncRegion>}) 077 * and hfilelink reference names ({@code <table>=<region>-<hfile>.<parentEncRegion>}) 078 * If reference, then the regex has more than just one group. 079 * Group 1, hfile/hfilelink pattern, is this file's id. 080 * Group 2 '(.+)' is the reference's parent region name. 081 */ 082 private static final Pattern REF_NAME_PATTERN = 083 Pattern.compile(String.format("^(%s|%s)\\.(.+)$", 084 HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX)); 085 086 public static final String STORE_FILE_READER_NO_READAHEAD = "hbase.store.reader.no-readahead"; 087 public static final boolean DEFAULT_STORE_FILE_READER_NO_READAHEAD = false; 088 089 // Configuration 090 private final Configuration conf; 091 092 // FileSystem handle 093 private final FileSystem fs; 094 095 // HDFS blocks distribution information 096 private HDFSBlocksDistribution hdfsBlocksDistribution = null; 097 098 private HFileInfo hfileInfo; 099 100 // If this storefile references another, this is the reference instance. 101 private final Reference reference; 102 103 // If this storefile is a link to another, this is the link instance. 104 private final HFileLink link; 105 106 private final Path initialPath; 107 108 private RegionCoprocessorHost coprocessorHost; 109 110 // timestamp on when the file was created, is 0 and ignored for reference or link files 111 private long createdTimestamp; 112 113 private long size; 114 115 private final boolean primaryReplica; 116 117 private final boolean noReadahead; 118 119 // Counter that is incremented every time a scanner is created on the 120 // store file. It is decremented when the scan on the store file is 121 // done. 122 final AtomicInteger refCount = new AtomicInteger(0); 123 124 /** 125 * Create a Store File Info 126 * @param conf the {@link Configuration} to use 127 * @param fs The current file system to use. 128 * @param initialPath The {@link Path} of the file 129 * @param primaryReplica true if this is a store file for primary replica, otherwise false. 130 */ 131 public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath, 132 final boolean primaryReplica) throws IOException { 133 this(conf, fs, null, initialPath, primaryReplica); 134 } 135 136 private StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 137 final Path initialPath, final boolean primaryReplica) throws IOException { 138 assert fs != null; 139 assert initialPath != null; 140 assert conf != null; 141 142 this.fs = fs; 143 this.conf = conf; 144 this.initialPath = initialPath; 145 this.primaryReplica = primaryReplica; 146 this.noReadahead = this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, 147 DEFAULT_STORE_FILE_READER_NO_READAHEAD); 148 Path p = initialPath; 149 if (HFileLink.isHFileLink(p)) { 150 // HFileLink 151 this.reference = null; 152 this.link = HFileLink.buildFromHFileLinkPattern(conf, p); 153 LOG.trace("{} is a link", p); 154 } else if (isReference(p)) { 155 this.reference = Reference.read(fs, p); 156 Path referencePath = getReferredToFile(p); 157 if (HFileLink.isHFileLink(referencePath)) { 158 // HFileLink Reference 159 this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath); 160 } else { 161 // Reference 162 this.link = null; 163 } 164 LOG.trace("{} is a {} reference to {}", p, reference.getFileRegion(), referencePath); 165 } else if (isHFile(p)) { 166 // HFile 167 if (fileStatus != null) { 168 this.createdTimestamp = fileStatus.getModificationTime(); 169 this.size = fileStatus.getLen(); 170 } else { 171 FileStatus fStatus = fs.getFileStatus(initialPath); 172 this.createdTimestamp = fStatus.getModificationTime(); 173 this.size = fStatus.getLen(); 174 } 175 this.reference = null; 176 this.link = null; 177 } else { 178 throw new IOException("path=" + p + " doesn't look like a valid StoreFile"); 179 } 180 } 181 182 /** 183 * Create a Store File Info 184 * @param conf the {@link Configuration} to use 185 * @param fs The current file system to use. 186 * @param fileStatus The {@link FileStatus} of the file 187 */ 188 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus) 189 throws IOException { 190 this(conf, fs, fileStatus, fileStatus.getPath(), true); 191 } 192 193 /** 194 * Create a Store File Info from an HFileLink 195 * @param conf The {@link Configuration} to use 196 * @param fs The current file system to use 197 * @param fileStatus The {@link FileStatus} of the file 198 */ 199 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 200 final HFileLink link) { 201 this(conf, fs, fileStatus, null, link); 202 } 203 204 /** 205 * Create a Store File Info from an HFileLink 206 * @param conf The {@link Configuration} to use 207 * @param fs The current file system to use 208 * @param fileStatus The {@link FileStatus} of the file 209 * @param reference The reference instance 210 */ 211 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 212 final Reference reference) { 213 this(conf, fs, fileStatus, reference, null); 214 } 215 216 /** 217 * Create a Store File Info from an HFileLink and a Reference 218 * @param conf The {@link Configuration} to use 219 * @param fs The current file system to use 220 * @param fileStatus The {@link FileStatus} of the file 221 * @param reference The reference instance 222 * @param link The link instance 223 */ 224 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 225 final Reference reference, final HFileLink link) { 226 this.fs = fs; 227 this.conf = conf; 228 this.primaryReplica = false; 229 this.initialPath = (fileStatus == null) ? null : fileStatus.getPath(); 230 this.createdTimestamp = (fileStatus == null) ? 0 :fileStatus.getModificationTime(); 231 this.reference = reference; 232 this.link = link; 233 this.noReadahead = this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, 234 DEFAULT_STORE_FILE_READER_NO_READAHEAD); 235 } 236 237 /** 238 * Size of the Hfile 239 * @return size 240 */ 241 public long getSize() { 242 return size; 243 } 244 245 /** 246 * Sets the region coprocessor env. 247 * @param coprocessorHost 248 */ 249 public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) { 250 this.coprocessorHost = coprocessorHost; 251 } 252 253 /* 254 * @return the Reference object associated to this StoreFileInfo. 255 * null if the StoreFile is not a reference. 256 */ 257 public Reference getReference() { 258 return this.reference; 259 } 260 261 /** @return True if the store file is a Reference */ 262 public boolean isReference() { 263 return this.reference != null; 264 } 265 266 /** @return True if the store file is a top Reference */ 267 public boolean isTopReference() { 268 return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion()); 269 } 270 271 /** @return True if the store file is a link */ 272 public boolean isLink() { 273 return this.link != null && this.reference == null; 274 } 275 276 /** @return the HDFS block distribution */ 277 public HDFSBlocksDistribution getHDFSBlockDistribution() { 278 return this.hdfsBlocksDistribution; 279 } 280 281 StoreFileReader createReader(ReaderContext context, CacheConfig cacheConf) 282 throws IOException { 283 StoreFileReader reader = null; 284 if (this.reference != null) { 285 reader = new HalfStoreFileReader(context, hfileInfo, cacheConf, reference, refCount, conf); 286 } else { 287 reader = new StoreFileReader(context, hfileInfo, cacheConf, refCount, conf); 288 } 289 return reader; 290 } 291 292 ReaderContext createReaderContext(boolean doDropBehind, long readahead, ReaderType type) 293 throws IOException { 294 FSDataInputStreamWrapper in; 295 FileStatus status; 296 if (this.link != null) { 297 // HFileLink 298 in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind, readahead); 299 status = this.link.getFileStatus(fs); 300 } else if (this.reference != null) { 301 // HFile Reference 302 Path referencePath = getReferredToFile(this.getPath()); 303 try { 304 in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead); 305 } catch (FileNotFoundException fnfe) { 306 // Intercept the exception so can insert more info about the Reference; otherwise 307 // exception just complains about some random file -- operator doesn't realize it 308 // other end of a Reference 309 FileNotFoundException newFnfe = new FileNotFoundException(toString()); 310 newFnfe.initCause(fnfe); 311 throw newFnfe; 312 } 313 status = fs.getFileStatus(referencePath); 314 } else { 315 in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead); 316 status = fs.getFileStatus(initialPath); 317 } 318 long length = status.getLen(); 319 ReaderContextBuilder contextBuilder = new ReaderContextBuilder() 320 .withInputStreamWrapper(in) 321 .withFileSize(length) 322 .withPrimaryReplicaReader(this.primaryReplica) 323 .withReaderType(type) 324 .withFileSystem(fs); 325 if (this.reference != null) { 326 contextBuilder.withFilePath(this.getPath()); 327 } else { 328 contextBuilder.withFilePath(status.getPath()); 329 } 330 return contextBuilder.build(); 331 } 332 333 /** 334 * Compute the HDFS Block Distribution for this StoreFile 335 */ 336 public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs) 337 throws IOException { 338 // guard against the case where we get the FileStatus from link, but by the time we 339 // call compute the file is moved again 340 if (this.link != null) { 341 FileNotFoundException exToThrow = null; 342 for (int i = 0; i < this.link.getLocations().length; i++) { 343 try { 344 return computeHDFSBlocksDistributionInternal(fs); 345 } catch (FileNotFoundException ex) { 346 // try the other location 347 exToThrow = ex; 348 } 349 } 350 throw exToThrow; 351 } else { 352 return computeHDFSBlocksDistributionInternal(fs); 353 } 354 } 355 356 private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs) 357 throws IOException { 358 FileStatus status = getReferencedFileStatus(fs); 359 if (this.reference != null) { 360 return computeRefFileHDFSBlockDistribution(fs, reference, status); 361 } else { 362 return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen()); 363 } 364 } 365 366 /** 367 * Get the {@link FileStatus} of the file referenced by this StoreFileInfo 368 * @param fs The current file system to use. 369 * @return The {@link FileStatus} of the file referenced by this StoreFileInfo 370 */ 371 public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException { 372 FileStatus status; 373 if (this.reference != null) { 374 if (this.link != null) { 375 FileNotFoundException exToThrow = null; 376 for (int i = 0; i < this.link.getLocations().length; i++) { 377 // HFileLink Reference 378 try { 379 return link.getFileStatus(fs); 380 } catch (FileNotFoundException ex) { 381 // try the other location 382 exToThrow = ex; 383 } 384 } 385 throw exToThrow; 386 } else { 387 // HFile Reference 388 Path referencePath = getReferredToFile(this.getPath()); 389 status = fs.getFileStatus(referencePath); 390 } 391 } else { 392 if (this.link != null) { 393 FileNotFoundException exToThrow = null; 394 for (int i = 0; i < this.link.getLocations().length; i++) { 395 // HFileLink 396 try { 397 return link.getFileStatus(fs); 398 } catch (FileNotFoundException ex) { 399 // try the other location 400 exToThrow = ex; 401 } 402 } 403 throw exToThrow; 404 } else { 405 status = fs.getFileStatus(initialPath); 406 } 407 } 408 return status; 409 } 410 411 /** @return The {@link Path} of the file */ 412 public Path getPath() { 413 return initialPath; 414 } 415 416 /** @return The {@link FileStatus} of the file */ 417 public FileStatus getFileStatus() throws IOException { 418 return getReferencedFileStatus(fs); 419 } 420 421 /** @return Get the modification time of the file. */ 422 public long getModificationTime() throws IOException { 423 return getFileStatus().getModificationTime(); 424 } 425 426 @Override 427 public String toString() { 428 return this.getPath() + 429 (isReference() ? "->" + getReferredToFile(this.getPath()) + "-" + reference : ""); 430 } 431 432 /** 433 * @param path Path to check. 434 * @return True if the path has format of a HFile. 435 */ 436 public static boolean isHFile(final Path path) { 437 return isHFile(path.getName()); 438 } 439 440 public static boolean isHFile(final String fileName) { 441 Matcher m = HFILE_NAME_PATTERN.matcher(fileName); 442 return m.matches() && m.groupCount() > 0; 443 } 444 445 /** 446 * @param path Path to check. 447 * @return True if the path has format of a del file. 448 */ 449 public static boolean isDelFile(final Path path) { 450 return isDelFile(path.getName()); 451 } 452 453 /** 454 * @param fileName Sting version of path to validate. 455 * @return True if the file name has format of a del file. 456 */ 457 public static boolean isDelFile(final String fileName) { 458 Matcher m = DELFILE_NAME_PATTERN.matcher(fileName); 459 return m.matches() && m.groupCount() > 0; 460 } 461 462 /** 463 * @param path Path to check. 464 * @return True if the path has format of a HStoreFile reference. 465 */ 466 public static boolean isReference(final Path path) { 467 return isReference(path.getName()); 468 } 469 470 /** 471 * @param name file name to check. 472 * @return True if the path has format of a HStoreFile reference. 473 */ 474 public static boolean isReference(final String name) { 475 Matcher m = REF_NAME_PATTERN.matcher(name); 476 return m.matches() && m.groupCount() > 1; 477 } 478 479 /** 480 * @return timestamp when this file was created (as returned by filesystem) 481 */ 482 public long getCreatedTimestamp() { 483 return createdTimestamp; 484 } 485 486 /* 487 * Return path to the file referred to by a Reference. Presumes a directory 488 * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>. 489 * @param p Path to a Reference file. 490 * @return Calculated path to parent region file. 491 * @throws IllegalArgumentException when path regex fails to match. 492 */ 493 public static Path getReferredToFile(final Path p) { 494 Matcher m = REF_NAME_PATTERN.matcher(p.getName()); 495 if (m == null || !m.matches()) { 496 LOG.warn("Failed match of store file name {}", p.toString()); 497 throw new IllegalArgumentException("Failed match of store file name " + 498 p.toString()); 499 } 500 501 // Other region name is suffix on the passed Reference file name 502 String otherRegion = m.group(2); 503 // Tabledir is up two directories from where Reference was written. 504 Path tableDir = p.getParent().getParent().getParent(); 505 String nameStrippedOfSuffix = m.group(1); 506 LOG.trace("reference {} to region={} hfile={}", p, otherRegion, nameStrippedOfSuffix); 507 508 // Build up new path with the referenced region in place of our current 509 // region in the reference path. Also strip regionname suffix from name. 510 return new Path(new Path(new Path(tableDir, otherRegion), 511 p.getParent().getName()), nameStrippedOfSuffix); 512 } 513 514 /** 515 * Validate the store file name. 516 * @param fileName name of the file to validate 517 * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise 518 */ 519 public static boolean validateStoreFileName(final String fileName) { 520 if (HFileLink.isHFileLink(fileName) || isReference(fileName)) 521 return(true); 522 return !fileName.contains("-"); 523 } 524 525 /** 526 * Return if the specified file is a valid store file or not. 527 * @param fileStatus The {@link FileStatus} of the file 528 * @return <tt>true</tt> if the file is valid 529 */ 530 public static boolean isValid(final FileStatus fileStatus) 531 throws IOException { 532 final Path p = fileStatus.getPath(); 533 534 if (fileStatus.isDirectory()) 535 return false; 536 537 // Check for empty hfile. Should never be the case but can happen 538 // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646 539 // NOTE: that the HFileLink is just a name, so it's an empty file. 540 if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) { 541 LOG.warn("Skipping {} because it is empty. HBASE-646 DATA LOSS?", p); 542 return false; 543 } 544 545 return validateStoreFileName(p.getName()); 546 } 547 548 /** 549 * helper function to compute HDFS blocks distribution of a given reference 550 * file.For reference file, we don't compute the exact value. We use some 551 * estimate instead given it might be good enough. we assume bottom part 552 * takes the first half of reference file, top part takes the second half 553 * of the reference file. This is just estimate, given 554 * midkey ofregion != midkey of HFile, also the number and size of keys vary. 555 * If this estimate isn't good enough, we can improve it later. 556 * @param fs The FileSystem 557 * @param reference The reference 558 * @param status The reference FileStatus 559 * @return HDFS blocks distribution 560 */ 561 private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution( 562 final FileSystem fs, final Reference reference, final FileStatus status) 563 throws IOException { 564 if (status == null) { 565 return null; 566 } 567 568 long start = 0; 569 long length = 0; 570 571 if (Reference.isTopFileRegion(reference.getFileRegion())) { 572 start = status.getLen()/2; 573 length = status.getLen() - status.getLen()/2; 574 } else { 575 start = 0; 576 length = status.getLen()/2; 577 } 578 return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); 579 } 580 581 @Override 582 public boolean equals(Object that) { 583 if (this == that) return true; 584 if (that == null) return false; 585 586 if (!(that instanceof StoreFileInfo)) return false; 587 588 StoreFileInfo o = (StoreFileInfo)that; 589 if (initialPath != null && o.initialPath == null) return false; 590 if (initialPath == null && o.initialPath != null) return false; 591 if (initialPath != o.initialPath && initialPath != null 592 && !initialPath.equals(o.initialPath)) return false; 593 594 if (reference != null && o.reference == null) return false; 595 if (reference == null && o.reference != null) return false; 596 if (reference != o.reference && reference != null 597 && !reference.equals(o.reference)) return false; 598 599 if (link != null && o.link == null) return false; 600 if (link == null && o.link != null) return false; 601 if (link != o.link && link != null && !link.equals(o.link)) return false; 602 603 return true; 604 }; 605 606 607 @Override 608 public int hashCode() { 609 int hash = 17; 610 hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode()); 611 hash = hash * 31 + ((initialPath == null) ? 0 : initialPath.hashCode()); 612 hash = hash * 31 + ((link == null) ? 0 : link.hashCode()); 613 return hash; 614 } 615 616 /** 617 * Return the active file name that contains the real data. 618 * <p> 619 * For referenced hfile, we will return the name of the reference file as it will be used to 620 * construct the StoreFileReader. And for linked hfile, we will return the name of the file being 621 * linked. 622 */ 623 public String getActiveFileName() { 624 if (reference != null || link == null) { 625 return initialPath.getName(); 626 } else { 627 return HFileLink.getReferencedHFileName(initialPath.getName()); 628 } 629 } 630 631 FileSystem getFileSystem() { 632 return this.fs; 633 } 634 635 Configuration getConf() { 636 return this.conf; 637 } 638 639 boolean isNoReadahead() { 640 return this.noReadahead; 641 } 642 643 HFileInfo getHFileInfo() { 644 return hfileInfo; 645 } 646 647 void initHDFSBlocksDistribution() throws IOException { 648 hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs); 649 } 650 651 StoreFileReader preStoreFileReaderOpen(ReaderContext context, CacheConfig cacheConf) 652 throws IOException { 653 StoreFileReader reader = null; 654 if (this.coprocessorHost != null) { 655 reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), 656 context.getInputStreamWrapper(), context.getFileSize(), 657 cacheConf, reference); 658 } 659 return reader; 660 } 661 662 StoreFileReader postStoreFileReaderOpen(ReaderContext context, CacheConfig cacheConf, 663 StoreFileReader reader) throws IOException { 664 StoreFileReader res = reader; 665 if (this.coprocessorHost != null) { 666 res = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), 667 context.getInputStreamWrapper(), context.getFileSize(), 668 cacheConf, reference, reader); 669 } 670 return res; 671 } 672 673 public void initHFileInfo(ReaderContext context) throws IOException { 674 this.hfileInfo = new HFileInfo(context, conf); 675 } 676}