001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.util.OptionalLong; 023import java.util.concurrent.atomic.AtomicInteger; 024import java.util.regex.Matcher; 025import java.util.regex.Pattern; 026import org.apache.hadoop.conf.Configurable; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.fs.FileStatus; 029import org.apache.hadoop.fs.FileSystem; 030import org.apache.hadoop.fs.Path; 031import org.apache.hadoop.hbase.HDFSBlocksDistribution; 032import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 033import org.apache.hadoop.hbase.io.HFileLink; 034import org.apache.hadoop.hbase.io.HalfStoreFileReader; 035import org.apache.hadoop.hbase.io.Reference; 036import org.apache.hadoop.hbase.io.hfile.CacheConfig; 037import org.apache.hadoop.hbase.io.hfile.HFileInfo; 038import org.apache.hadoop.hbase.io.hfile.InvalidHFileException; 039import org.apache.hadoop.hbase.io.hfile.ReaderContext; 040import org.apache.hadoop.hbase.io.hfile.ReaderContext.ReaderType; 041import org.apache.hadoop.hbase.io.hfile.ReaderContextBuilder; 042import org.apache.hadoop.hbase.keymeta.ManagedKeyDataCache; 043import org.apache.hadoop.hbase.keymeta.SystemKeyCache; 044import org.apache.hadoop.hbase.mob.MobUtils; 045import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 046import org.apache.hadoop.hbase.util.FSUtils; 047import org.apache.hadoop.hbase.util.Pair; 048import org.apache.yetus.audience.InterfaceAudience; 049import org.slf4j.Logger; 050import org.slf4j.LoggerFactory; 051 052/** 053 * Describe a StoreFile (hfile, reference, link) 054 */ 055@InterfaceAudience.Private 056public class StoreFileInfo implements Configurable { 057 private static final Logger LOG = LoggerFactory.getLogger(StoreFileInfo.class); 058 059 /** 060 * A non-capture group, for hfiles, so that this can be embedded. HFiles are uuid ([0-9a-z]+). 061 * Bulk loaded hfiles have (_SeqId_[0-9]+_) as a suffix. The mob del file has (_del) as a suffix. 062 */ 063 public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?"; 064 065 /** Regex that will work for hfiles */ 066 private static final Pattern HFILE_NAME_PATTERN = Pattern.compile("^(" + HFILE_NAME_REGEX + ")"); 067 068 /** 069 * Regex that will work for straight reference names ({@code <hfile>.<parentEncRegion>}) and 070 * hfilelink reference names ({@code 071 * 072 <table> 073 * =<region>-<hfile>.<parentEncRegion>}). If reference, then the regex has more than just one 074 * group. Group 1, hfile/hfilelink pattern, is this file's id. Group 2 '(.+)' is the reference's 075 * parent region name. 076 */ 077 private static final Pattern REF_NAME_PATTERN = 078 Pattern.compile(String.format("^(%s|%s)\\.(.+)$", HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX)); 079 080 public static final String STORE_FILE_READER_NO_READAHEAD = "hbase.store.reader.no-readahead"; 081 public static final boolean DEFAULT_STORE_FILE_READER_NO_READAHEAD = true; 082 083 // Configuration 084 private Configuration conf; 085 086 // FileSystem handle 087 private final FileSystem fs; 088 089 // HDFS blocks distribution information 090 private HDFSBlocksDistribution hdfsBlocksDistribution = null; 091 092 private HFileInfo hfileInfo; 093 094 // If this storefile references another, this is the reference instance. 095 private final Reference reference; 096 097 // If this storefile is a link to another, this is the link instance. 098 private final HFileLink link; 099 100 private final Path initialPath; 101 102 private RegionCoprocessorHost coprocessorHost; 103 104 // timestamp on when the file was created, is 0 and ignored for reference or link files 105 private long createdTimestamp; 106 107 private long size; 108 109 private final boolean primaryReplica; 110 111 private final boolean noReadahead; 112 113 // Counter that is incremented every time a scanner is created on the 114 // store file. It is decremented when the scan on the store file is 115 // done. 116 private final AtomicInteger refCount = new AtomicInteger(0); 117 118 private StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 119 final Path initialPath, final boolean primaryReplica, final StoreFileTracker sft) 120 throws IOException { 121 assert fs != null; 122 assert initialPath != null; 123 assert conf != null; 124 125 this.fs = fs; 126 this.conf = conf; 127 this.initialPath = fs.makeQualified(initialPath); 128 this.primaryReplica = primaryReplica; 129 this.noReadahead = 130 this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD); 131 Path p = initialPath; 132 if (HFileLink.isHFileLink(p)) { 133 // HFileLink 134 this.reference = null; 135 this.link = HFileLink.buildFromHFileLinkPattern(conf, p); 136 LOG.trace("{} is a link", p); 137 } else if (isReference(p)) { 138 this.reference = sft.readReference(p); 139 Path referencePath = getReferredToFile(p); 140 if (HFileLink.isHFileLink(referencePath)) { 141 // HFileLink Reference 142 this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath); 143 } else { 144 // Reference 145 this.link = null; 146 } 147 LOG.trace("{} is a {} reference to {}", p, reference.getFileRegion(), referencePath); 148 } else if (isHFile(p) || isMobFile(p) || isMobRefFile(p)) { 149 // HFile 150 if (fileStatus != null) { 151 this.createdTimestamp = fileStatus.getModificationTime(); 152 this.size = fileStatus.getLen(); 153 } else { 154 FileStatus fStatus = fs.getFileStatus(initialPath); 155 this.createdTimestamp = fStatus.getModificationTime(); 156 this.size = fStatus.getLen(); 157 } 158 this.reference = null; 159 this.link = null; 160 } else { 161 throw new IOException("path=" + p + " doesn't look like a valid StoreFile"); 162 } 163 } 164 165 /** 166 * Create a Store File Info from an HFileLink 167 * @param conf The {@link Configuration} to use 168 * @param fs The current file system to use 169 * @param fileStatus The {@link FileStatus} of the file 170 */ 171 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 172 final HFileLink link) { 173 this(conf, fs, fileStatus, null, link); 174 } 175 176 /** 177 * Create a Store File Info from an HFileLink 178 * @param conf The {@link Configuration} to use 179 * @param fs The current file system to use 180 * @param fileStatus The {@link FileStatus} of the file 181 */ 182 public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initiaPath, 183 final HFileLink link) { 184 this(conf, fs, initiaPath, null, link); 185 } 186 187 /** 188 * Create a Store File Info from an HFileLink 189 * @param conf The {@link Configuration} to use 190 * @param fs The current file system to use 191 * @param fileStatus The {@link FileStatus} of the file 192 * @param reference The reference instance 193 */ 194 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 195 final Reference reference) { 196 this(conf, fs, fileStatus, reference, null); 197 } 198 199 /** 200 * Create a Store File Info from an HFileLink 201 * @param conf The {@link Configuration} to use 202 * @param fs The current file system to use 203 * @param fileStatus The {@link FileStatus} of the file 204 * @param reference The reference instance 205 */ 206 public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath, 207 final Reference reference) { 208 this(conf, fs, initialPath, reference, null); 209 } 210 211 /** 212 * Create a Store File Info from an HFileLink and a Reference 213 * @param conf The {@link Configuration} to use 214 * @param fs The current file system to use 215 * @param fileStatus The {@link FileStatus} of the file 216 * @param reference The reference instance 217 * @param link The link instance 218 */ 219 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 220 final Reference reference, final HFileLink link) { 221 this.fs = fs; 222 this.conf = conf; 223 this.primaryReplica = false; 224 this.initialPath = (fileStatus == null) ? null : fileStatus.getPath(); 225 this.createdTimestamp = (fileStatus == null) ? 0 : fileStatus.getModificationTime(); 226 this.reference = reference; 227 this.link = link; 228 this.noReadahead = 229 this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD); 230 } 231 232 /** 233 * Create a Store File Info from an HFileLink and a Reference 234 * @param conf The {@link Configuration} to use 235 * @param fs The current file system to use 236 * @param fileStatus The {@link FileStatus} of the file 237 * @param reference The reference instance 238 * @param link The link instance 239 */ 240 public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path path, 241 final Reference reference, final HFileLink link) { 242 this.fs = fs; 243 this.conf = conf; 244 this.primaryReplica = false; 245 this.initialPath = path; 246 this.reference = reference; 247 this.link = link; 248 this.noReadahead = 249 this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD); 250 } 251 252 /** 253 * Create a Store File Info from an HFileLink and a Reference 254 * @param conf The {@link Configuration} to use 255 * @param fs The current file system to use 256 * @param fileStatus The {@link FileStatus} of the file 257 * @param reference The reference instance 258 * @param link The link instance 259 */ 260 public StoreFileInfo(final Configuration conf, final FileSystem fs, final long createdTimestamp, 261 final Path initialPath, final long size, final Reference reference, final HFileLink link, 262 final boolean primaryReplica) { 263 this.fs = fs; 264 this.conf = conf; 265 this.primaryReplica = primaryReplica; 266 this.initialPath = initialPath; 267 this.createdTimestamp = createdTimestamp; 268 this.size = size; 269 this.reference = reference; 270 this.link = link; 271 this.noReadahead = 272 this.conf.getBoolean(STORE_FILE_READER_NO_READAHEAD, DEFAULT_STORE_FILE_READER_NO_READAHEAD); 273 } 274 275 public HFileLink getLink() { 276 return link; 277 } 278 279 @Override 280 public Configuration getConf() { 281 return conf; 282 } 283 284 @Override 285 public void setConf(Configuration conf) { 286 this.conf = conf; 287 } 288 289 /** 290 * Size of the Hfile 291 */ 292 public long getSize() { 293 return size; 294 } 295 296 /** 297 * Sets the region coprocessor env. 298 */ 299 public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) { 300 this.coprocessorHost = coprocessorHost; 301 } 302 303 /** 304 * @return the Reference object associated to this StoreFileInfo. null if the StoreFile is not a 305 * reference. 306 */ 307 public Reference getReference() { 308 return this.reference; 309 } 310 311 /** Returns True if the store file is a Reference */ 312 public boolean isReference() { 313 return this.reference != null; 314 } 315 316 /** Returns True if the store file is a top Reference */ 317 public boolean isTopReference() { 318 return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion()); 319 } 320 321 /** Returns True if the store file is a link */ 322 public boolean isLink() { 323 return this.link != null && this.reference == null; 324 } 325 326 /** Returns the HDFS block distribution */ 327 public HDFSBlocksDistribution getHDFSBlockDistribution() { 328 return this.hdfsBlocksDistribution; 329 } 330 331 public StoreFileReader createReader(ReaderContext context, CacheConfig cacheConf) 332 throws IOException { 333 StoreFileReader reader = null; 334 if (this.reference != null) { 335 reader = new HalfStoreFileReader(context, hfileInfo, cacheConf, reference, this, conf); 336 } else { 337 reader = new StoreFileReader(context, hfileInfo, cacheConf, this, conf); 338 } 339 return reader; 340 } 341 342 ReaderContext createReaderContext(boolean doDropBehind, long readahead, ReaderType type, 343 String keyNamespace, SystemKeyCache systemKeyCache, ManagedKeyDataCache managedKeyDataCache) 344 throws IOException { 345 FSDataInputStreamWrapper in; 346 FileStatus status; 347 if (this.link != null) { 348 // HFileLink 349 in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind, readahead); 350 status = this.link.getFileStatus(fs); 351 } else if (this.reference != null) { 352 // HFile Reference 353 Path referencePath = getReferredToFile(this.getPath()); 354 try { 355 in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead); 356 } catch (FileNotFoundException fnfe) { 357 // Intercept the exception so can insert more info about the Reference; otherwise 358 // exception just complains about some random file -- operator doesn't realize it 359 // other end of a Reference 360 FileNotFoundException newFnfe = new FileNotFoundException(toString()); 361 newFnfe.initCause(fnfe); 362 throw newFnfe; 363 } 364 status = fs.getFileStatus(referencePath); 365 } else { 366 in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead); 367 status = fs.getFileStatus(initialPath); 368 } 369 long length = status.getLen(); 370 ReaderContextBuilder contextBuilder = 371 new ReaderContextBuilder().withInputStreamWrapper(in).withFileSize(length) 372 .withPrimaryReplicaReader(this.primaryReplica).withReaderType(type).withFileSystem(fs) 373 .withSystemKeyCache(systemKeyCache).withManagedKeyDataCache(managedKeyDataCache); 374 if (this.reference != null) { 375 contextBuilder.withFilePath(this.getPath()); 376 } else { 377 contextBuilder.withFilePath(status.getPath()); 378 } 379 return contextBuilder.build(); 380 } 381 382 /** 383 * Compute the HDFS Block Distribution for this StoreFile 384 */ 385 public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs) 386 throws IOException { 387 // guard against the case where we get the FileStatus from link, but by the time we 388 // call compute the file is moved again 389 if (this.link != null) { 390 FileNotFoundException exToThrow = null; 391 for (int i = 0; i < this.link.getLocations().length; i++) { 392 try { 393 return computeHDFSBlocksDistributionInternal(fs); 394 } catch (FileNotFoundException ex) { 395 // try the other location 396 exToThrow = ex; 397 } 398 } 399 throw exToThrow; 400 } else { 401 return computeHDFSBlocksDistributionInternal(fs); 402 } 403 } 404 405 private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs) 406 throws IOException { 407 FileStatus status = getReferencedFileStatus(fs); 408 if (this.reference != null) { 409 return computeRefFileHDFSBlockDistribution(fs, reference, status); 410 } else { 411 return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen()); 412 } 413 } 414 415 /** 416 * Get the {@link FileStatus} of the file referenced by this StoreFileInfo 417 * @param fs The current file system to use. 418 * @return The {@link FileStatus} of the file referenced by this StoreFileInfo 419 */ 420 public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException { 421 FileStatus status; 422 if (this.reference != null) { 423 if (this.link != null) { 424 FileNotFoundException exToThrow = null; 425 for (int i = 0; i < this.link.getLocations().length; i++) { 426 // HFileLink Reference 427 try { 428 return link.getFileStatus(fs); 429 } catch (FileNotFoundException ex) { 430 // try the other location 431 exToThrow = ex; 432 } 433 } 434 throw exToThrow; 435 } else { 436 // HFile Reference 437 Path referencePath = getReferredToFile(this.getPath()); 438 status = fs.getFileStatus(referencePath); 439 } 440 } else { 441 if (this.link != null) { 442 FileNotFoundException exToThrow = null; 443 for (int i = 0; i < this.link.getLocations().length; i++) { 444 // HFileLink 445 try { 446 return link.getFileStatus(fs); 447 } catch (FileNotFoundException ex) { 448 // try the other location 449 exToThrow = ex; 450 } 451 } 452 throw exToThrow; 453 } else { 454 status = fs.getFileStatus(initialPath); 455 } 456 } 457 return status; 458 } 459 460 /** Returns The {@link Path} of the file */ 461 public Path getPath() { 462 return initialPath; 463 } 464 465 /** Returns The {@link FileStatus} of the file */ 466 public FileStatus getFileStatus() throws IOException { 467 return getReferencedFileStatus(fs); 468 } 469 470 /** Returns Get the modification time of the file. */ 471 public long getModificationTime() throws IOException { 472 return getFileStatus().getModificationTime(); 473 } 474 475 @Override 476 public String toString() { 477 return this.getPath() 478 + (isReference() ? "->" + getReferredToFile(this.getPath()) + "-" + reference : ""); 479 } 480 481 /** 482 * Cells in a bulkloaded file don't have a sequenceId since they don't go through memstore. When a 483 * bulkload file is committed, the current memstore ts is stamped onto the file name as the 484 * sequenceId of the file. At read time, the sequenceId is copied onto all of the cells returned 485 * so that they can be properly sorted relative to other cells in other files. Further, when 486 * opening multiple files for scan, the sequence id is used to ensusre that the bulkload file's 487 * scanner is porperly sorted amongst the other scanners. Non-bulkloaded files get their 488 * sequenceId from the MAX_MEMSTORE_TS_KEY since those go through the memstore and have true 489 * sequenceIds. 490 */ 491 private static final String SEQ_ID_MARKER = "_SeqId_"; 492 private static final int SEQ_ID_MARKER_LENGTH = SEQ_ID_MARKER.length(); 493 494 /** 495 * @see #SEQ_ID_MARKER 496 * @return True if the file name looks like a bulkloaded file, based on the presence of the SeqId 497 * marker added to those files. 498 */ 499 public static boolean hasBulkloadSeqId(final Path path) { 500 String fileName = path.getName(); 501 return fileName.contains(SEQ_ID_MARKER); 502 } 503 504 /** 505 * @see #SEQ_ID_MARKER 506 * @return If the path is a properly named bulkloaded file, returns the sequence id stamped at the 507 * end of the file name. 508 */ 509 public static OptionalLong getBulkloadSeqId(final Path path) { 510 String fileName = path.getName(); 511 int startPos = fileName.indexOf(SEQ_ID_MARKER); 512 if (startPos != -1) { 513 String strVal = fileName.substring(startPos + SEQ_ID_MARKER_LENGTH, 514 fileName.indexOf('_', startPos + SEQ_ID_MARKER_LENGTH)); 515 return OptionalLong.of(Long.parseLong(strVal)); 516 } 517 return OptionalLong.empty(); 518 } 519 520 /** 521 * @see #SEQ_ID_MARKER 522 * @return A string value for appending to the end of a bulkloaded file name, containing the 523 * properly formatted SeqId marker. 524 */ 525 public static String formatBulkloadSeqId(long seqId) { 526 return SEQ_ID_MARKER + seqId + "_"; 527 } 528 529 /** 530 * @param path Path to check. 531 * @return True if the path has format of a HFile. 532 */ 533 public static boolean isHFile(final Path path) { 534 return isHFile(path.getName()); 535 } 536 537 public static boolean isHFile(final String fileName) { 538 Matcher m = HFILE_NAME_PATTERN.matcher(fileName); 539 return m.matches() && m.groupCount() > 0; 540 } 541 542 /** 543 * Checks if the file is a MOB file 544 * @param path path to a file 545 * @return true, if - yes, false otherwise 546 */ 547 public static boolean isMobFile(final Path path) { 548 String fileName = path.getName(); 549 String[] parts = fileName.split(MobUtils.SEP); 550 if (parts.length != 2) { 551 return false; 552 } 553 Matcher m = HFILE_NAME_PATTERN.matcher(parts[0]); 554 Matcher mm = HFILE_NAME_PATTERN.matcher(parts[1]); 555 return m.matches() && mm.matches(); 556 } 557 558 /** 559 * Checks if the file is a MOB reference file, created by snapshot 560 * @param path path to a file 561 * @return true, if - yes, false otherwise 562 */ 563 public static boolean isMobRefFile(final Path path) { 564 String fileName = path.getName(); 565 int lastIndex = fileName.lastIndexOf(MobUtils.SEP); 566 if (lastIndex < 0) { 567 return false; 568 } 569 String[] parts = new String[2]; 570 parts[0] = fileName.substring(0, lastIndex); 571 parts[1] = fileName.substring(lastIndex + 1); 572 String name = parts[0] + "." + parts[1]; 573 Matcher m = REF_NAME_PATTERN.matcher(name); 574 return m.matches() && m.groupCount() > 1; 575 } 576 577 /** 578 * @param path Path to check. 579 * @return True if the path has format of a HStoreFile reference. 580 */ 581 public static boolean isReference(final Path path) { 582 return isReference(path.getName()); 583 } 584 585 /** 586 * @param name file name to check. 587 * @return True if the path has format of a HStoreFile reference. 588 */ 589 public static boolean isReference(final String name) { 590 // The REF_NAME_PATTERN regex is not computationally trivial, so see if we can fast-fail 591 // on a simple heuristic first. The regex contains a literal ".", so if that character 592 // isn't in the name, then the regex cannot match. 593 if (!name.contains(".")) { 594 return false; 595 } 596 597 Matcher m = REF_NAME_PATTERN.matcher(name); 598 return m.matches() && m.groupCount() > 1; 599 } 600 601 /** Returns timestamp when this file was created (as returned by filesystem) */ 602 public long getCreatedTimestamp() { 603 return createdTimestamp; 604 } 605 606 /* 607 * Return path to the file referred to by a Reference. Presumes a directory hierarchy of 608 * <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>. 609 * @param p Path to a Reference file. 610 * @return Calculated path to parent region file. 611 * @throws IllegalArgumentException when path regex fails to match. 612 */ 613 public static Path getReferredToFile(final Path p) { 614 Matcher m = REF_NAME_PATTERN.matcher(p.getName()); 615 if (m == null || !m.matches()) { 616 LOG.warn("Failed match of store file name {}", p.toString()); 617 throw new IllegalArgumentException("Failed match of store file name " + p.toString()); 618 } 619 620 // Other region name is suffix on the passed Reference file name 621 String otherRegion = m.group(2); 622 // Tabledir is up two directories from where Reference was written. 623 Path tableDir = p.getParent().getParent().getParent(); 624 String nameStrippedOfSuffix = m.group(1); 625 LOG.trace("reference {} to region={} hfile={}", p, otherRegion, nameStrippedOfSuffix); 626 627 // Build up new path with the referenced region in place of our current 628 // region in the reference path. Also strip regionname suffix from name. 629 return new Path(new Path(new Path(tableDir, otherRegion), p.getParent().getName()), 630 nameStrippedOfSuffix); 631 } 632 633 /* 634 * Return region and file name referred to by a Reference. 635 * @param referenceFile HFile name which is a Reference. 636 * @return Calculated referenced region and file name. 637 * @throws IllegalArgumentException when referenceFile regex fails to match. 638 */ 639 public static Pair<String, String> getReferredToRegionAndFile(final String referenceFile) { 640 Matcher m = REF_NAME_PATTERN.matcher(referenceFile); 641 if (m == null || !m.matches()) { 642 LOG.warn("Failed match of store file name {}", referenceFile); 643 throw new IllegalArgumentException("Failed match of store file name " + referenceFile); 644 } 645 String referencedRegion = m.group(2); 646 String referencedFile = m.group(1); 647 LOG.trace("reference {} to region={} file={}", referenceFile, referencedRegion, referencedFile); 648 return new Pair<>(referencedRegion, referencedFile); 649 } 650 651 /** 652 * Validate the store file name. 653 * @param fileName name of the file to validate 654 * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise 655 */ 656 public static boolean validateStoreFileName(final String fileName) { 657 if (HFileLink.isHFileLink(fileName) || isReference(fileName) || isMobFileLink(fileName)) { 658 return true; 659 } 660 return !fileName.contains("-"); 661 } 662 663 public static boolean isMobFileLink(String fileName) { 664 Matcher m = HFileLink.REF_OR_HFILE_LINK_PATTERN.matcher(fileName); 665 return m.matches() && !isReference(fileName); 666 } 667 668 /** 669 * Return if the specified file is a valid store file or not. 670 * @param fileStatus The {@link FileStatus} of the file 671 * @return <tt>true</tt> if the file is valid 672 */ 673 public static boolean isValid(final FileStatus fileStatus) throws IOException { 674 final Path p = fileStatus.getPath(); 675 676 if (fileStatus.isDirectory()) { 677 return false; 678 } 679 680 // Check for empty hfile. Should never be the case but can happen 681 // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646 682 // NOTE: that the HFileLink is just a name, so it's an empty file. 683 if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0 && !isMobFileLink(p.getName())) { 684 LOG.warn("Skipping {} because it is empty. HBASE-646 DATA LOSS?", p); 685 return false; 686 } 687 688 return validateStoreFileName(p.getName()); 689 } 690 691 /** 692 * helper function to compute HDFS blocks distribution of a given reference file.For reference 693 * file, we don't compute the exact value. We use some estimate instead given it might be good 694 * enough. we assume bottom part takes the first half of reference file, top part takes the second 695 * half of the reference file. This is just estimate, given midkey ofregion != midkey of HFile, 696 * also the number and size of keys vary. If this estimate isn't good enough, we can improve it 697 * later. 698 * @param fs The FileSystem 699 * @param reference The reference 700 * @param status The reference FileStatus 701 * @return HDFS blocks distribution 702 */ 703 private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution(final FileSystem fs, 704 final Reference reference, final FileStatus status) throws IOException { 705 if (status == null) { 706 return null; 707 } 708 709 long start = 0; 710 long length = 0; 711 712 if (Reference.isTopFileRegion(reference.getFileRegion())) { 713 start = status.getLen() / 2; 714 length = status.getLen() - status.getLen() / 2; 715 } else { 716 start = 0; 717 length = status.getLen() / 2; 718 } 719 return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); 720 } 721 722 @Override 723 public boolean equals(Object that) { 724 if (this == that) { 725 return true; 726 } 727 if (that == null) { 728 return false; 729 } 730 731 if (!(that instanceof StoreFileInfo)) { 732 return false; 733 } 734 735 StoreFileInfo o = (StoreFileInfo) that; 736 if (initialPath != null && o.initialPath == null) { 737 return false; 738 } 739 if (initialPath == null && o.initialPath != null) { 740 return false; 741 } 742 if (initialPath != o.initialPath && initialPath != null && !initialPath.equals(o.initialPath)) { 743 return false; 744 } 745 if (reference != null && o.reference == null) { 746 return false; 747 } 748 if (reference == null && o.reference != null) { 749 return false; 750 } 751 if (reference != o.reference && reference != null && !reference.equals(o.reference)) { 752 return false; 753 } 754 755 if (link != null && o.link == null) { 756 return false; 757 } 758 if (link == null && o.link != null) { 759 return false; 760 } 761 if (link != o.link && link != null && !link.equals(o.link)) { 762 return false; 763 } 764 765 return true; 766 } 767 768 @Override 769 public int hashCode() { 770 int hash = 17; 771 hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode()); 772 hash = hash * 31 + ((initialPath == null) ? 0 : initialPath.hashCode()); 773 hash = hash * 31 + ((link == null) ? 0 : link.hashCode()); 774 return hash; 775 } 776 777 /** 778 * Return the active file name that contains the real data. 779 * <p> 780 * For referenced hfile, we will return the name of the reference file as it will be used to 781 * construct the StoreFileReader. And for linked hfile, we will return the name of the file being 782 * linked. 783 */ 784 public String getActiveFileName() { 785 if (reference != null || link == null) { 786 return initialPath.getName(); 787 } else { 788 return HFileLink.getReferencedHFileName(initialPath.getName()); 789 } 790 } 791 792 public FileSystem getFileSystem() { 793 return this.fs; 794 } 795 796 boolean isNoReadahead() { 797 return this.noReadahead; 798 } 799 800 public HFileInfo getHFileInfo() { 801 return hfileInfo; 802 } 803 804 void initHDFSBlocksDistribution() throws IOException { 805 hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs); 806 } 807 808 StoreFileReader preStoreFileReaderOpen(ReaderContext context, CacheConfig cacheConf) 809 throws IOException { 810 StoreFileReader reader = null; 811 if (this.coprocessorHost != null) { 812 reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), 813 context.getInputStreamWrapper(), context.getFileSize(), cacheConf, reference); 814 } 815 return reader; 816 } 817 818 StoreFileReader postStoreFileReaderOpen(ReaderContext context, CacheConfig cacheConf, 819 StoreFileReader reader) throws IOException { 820 StoreFileReader res = reader; 821 if (this.coprocessorHost != null) { 822 res = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), 823 context.getInputStreamWrapper(), context.getFileSize(), cacheConf, reference, reader); 824 } 825 return res; 826 } 827 828 public void initHFileInfo(ReaderContext context) throws IOException { 829 this.hfileInfo = new HFileInfo(context, conf); 830 } 831 832 int getRefCount() { 833 return this.refCount.get(); 834 } 835 836 int increaseRefCount() { 837 return this.refCount.incrementAndGet(); 838 } 839 840 int decreaseRefCount() { 841 return this.refCount.decrementAndGet(); 842 } 843 844 public static StoreFileInfo createStoreFileInfoForHFile(final Configuration conf, 845 final FileSystem fs, final Path initialPath, final boolean primaryReplica) throws IOException { 846 if (HFileLink.isHFileLink(initialPath) || isReference(initialPath)) { 847 throw new InvalidHFileException("Path " + initialPath + " is a Hfile link or a Regerence"); 848 } 849 StoreFileInfo storeFileInfo = 850 new StoreFileInfo(conf, fs, null, initialPath, primaryReplica, null); 851 return storeFileInfo; 852 } 853 854}