001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.regionserver; 021 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.util.concurrent.atomic.AtomicInteger; 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.fs.FileStatus; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.HDFSBlocksDistribution; 033import org.apache.yetus.audience.InterfaceAudience; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 037import org.apache.hadoop.hbase.io.HFileLink; 038import org.apache.hadoop.hbase.io.HalfStoreFileReader; 039import org.apache.hadoop.hbase.io.Reference; 040import org.apache.hadoop.hbase.io.hfile.CacheConfig; 041import org.apache.hadoop.hbase.util.FSUtils; 042 043/** 044 * Describe a StoreFile (hfile, reference, link) 045 */ 046@InterfaceAudience.Private 047public class StoreFileInfo { 048 private static final Logger LOG = LoggerFactory.getLogger(StoreFileInfo.class); 049 050 /** 051 * A non-capture group, for hfiles, so that this can be embedded. 052 * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix. 053 * The mob del file has (_del) as suffix. 054 */ 055 public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?"; 056 057 /** Regex that will work for hfiles */ 058 private static final Pattern HFILE_NAME_PATTERN = 059 Pattern.compile("^(" + HFILE_NAME_REGEX + ")"); 060 061 /** 062 * A non-capture group, for del files, so that this can be embedded. 063 * A del file has (_del) as suffix. 064 */ 065 public static final String DELFILE_NAME_REGEX = "[0-9a-f]+(?:_del)"; 066 067 /** Regex that will work for del files */ 068 private static final Pattern DELFILE_NAME_PATTERN = 069 Pattern.compile("^(" + DELFILE_NAME_REGEX + ")"); 070 071 /** 072 * Regex that will work for straight reference names ({@code <hfile>.<parentEncRegion>}) 073 * and hfilelink reference names ({@code <table>=<region>-<hfile>.<parentEncRegion>}) 074 * If reference, then the regex has more than just one group. 075 * Group 1, hfile/hfilelink pattern, is this file's id. 076 * Group 2 '(.+)' is the reference's parent region name. 077 */ 078 private static final Pattern REF_NAME_PATTERN = 079 Pattern.compile(String.format("^(%s|%s)\\.(.+)$", 080 HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX)); 081 082 // Configuration 083 private Configuration conf; 084 085 // FileSystem handle 086 private final FileSystem fs; 087 088 // HDFS blocks distribution information 089 private HDFSBlocksDistribution hdfsBlocksDistribution = null; 090 091 // If this storefile references another, this is the reference instance. 092 private final Reference reference; 093 094 // If this storefile is a link to another, this is the link instance. 095 private final HFileLink link; 096 097 private final Path initialPath; 098 099 private RegionCoprocessorHost coprocessorHost; 100 101 // timestamp on when the file was created, is 0 and ignored for reference or link files 102 private long createdTimestamp; 103 104 private long size; 105 106 /** 107 * Create a Store File Info 108 * @param conf the {@link Configuration} to use 109 * @param fs The current file system to use. 110 * @param initialPath The {@link Path} of the file 111 */ 112 public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath) 113 throws IOException { 114 this(conf, fs, null, initialPath); 115 } 116 117 private StoreFileInfo(final Configuration conf, final FileSystem fs, 118 final FileStatus fileStatus, final Path initialPath) throws IOException { 119 assert fs != null; 120 assert initialPath != null; 121 assert conf != null; 122 123 this.fs = fs; 124 this.conf = conf; 125 this.initialPath = initialPath; 126 Path p = initialPath; 127 if (HFileLink.isHFileLink(p)) { 128 // HFileLink 129 this.reference = null; 130 this.link = HFileLink.buildFromHFileLinkPattern(conf, p); 131 if (LOG.isTraceEnabled()) LOG.trace(p + " is a link"); 132 } else if (isReference(p)) { 133 this.reference = Reference.read(fs, p); 134 Path referencePath = getReferredToFile(p); 135 if (HFileLink.isHFileLink(referencePath)) { 136 // HFileLink Reference 137 this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath); 138 } else { 139 // Reference 140 this.link = null; 141 } 142 if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() + 143 " reference to " + referencePath); 144 } else if (isHFile(p)) { 145 // HFile 146 if (fileStatus != null) { 147 this.createdTimestamp = fileStatus.getModificationTime(); 148 this.size = fileStatus.getLen(); 149 } else { 150 FileStatus fStatus = fs.getFileStatus(initialPath); 151 this.createdTimestamp = fStatus.getModificationTime(); 152 this.size = fStatus.getLen(); 153 } 154 this.reference = null; 155 this.link = null; 156 } else { 157 throw new IOException("path=" + p + " doesn't look like a valid StoreFile"); 158 } 159 } 160 161 /** 162 * Create a Store File Info 163 * @param conf the {@link Configuration} to use 164 * @param fs The current file system to use. 165 * @param fileStatus The {@link FileStatus} of the file 166 */ 167 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus) 168 throws IOException { 169 this(conf, fs, fileStatus, fileStatus.getPath()); 170 } 171 172 /** 173 * Create a Store File Info from an HFileLink 174 * @param conf The {@link Configuration} to use 175 * @param fs The current file system to use 176 * @param fileStatus The {@link FileStatus} of the file 177 */ 178 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 179 final HFileLink link) { 180 this.fs = fs; 181 this.conf = conf; 182 // initialPath can be null only if we get a link. 183 this.initialPath = (fileStatus == null) ? null : fileStatus.getPath(); 184 // HFileLink 185 this.reference = null; 186 this.link = link; 187 } 188 189 /** 190 * Create a Store File Info from an HFileLink 191 * @param conf The {@link Configuration} to use 192 * @param fs The current file system to use 193 * @param fileStatus The {@link FileStatus} of the file 194 * @param reference The reference instance 195 */ 196 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 197 final Reference reference) { 198 this.fs = fs; 199 this.conf = conf; 200 this.initialPath = fileStatus.getPath(); 201 this.createdTimestamp = fileStatus.getModificationTime(); 202 this.reference = reference; 203 this.link = null; 204 } 205 206 /** 207 * Create a Store File Info from an HFileLink and a Reference 208 * @param conf The {@link Configuration} to use 209 * @param fs The current file system to use 210 * @param fileStatus The {@link FileStatus} of the file 211 * @param reference The reference instance 212 * @param link The link instance 213 */ 214 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 215 final Reference reference, final HFileLink link) { 216 this.fs = fs; 217 this.conf = conf; 218 this.initialPath = fileStatus.getPath(); 219 this.createdTimestamp = fileStatus.getModificationTime(); 220 this.reference = reference; 221 this.link = link; 222 } 223 224 /** 225 * Size of the Hfile 226 * @return size 227 */ 228 public long getSize() { 229 return size; 230 } 231 232 /** 233 * Sets the region coprocessor env. 234 * @param coprocessorHost 235 */ 236 public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) { 237 this.coprocessorHost = coprocessorHost; 238 } 239 240 /* 241 * @return the Reference object associated to this StoreFileInfo. 242 * null if the StoreFile is not a reference. 243 */ 244 public Reference getReference() { 245 return this.reference; 246 } 247 248 /** @return True if the store file is a Reference */ 249 public boolean isReference() { 250 return this.reference != null; 251 } 252 253 /** @return True if the store file is a top Reference */ 254 public boolean isTopReference() { 255 return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion()); 256 } 257 258 /** @return True if the store file is a link */ 259 public boolean isLink() { 260 return this.link != null && this.reference == null; 261 } 262 263 /** @return the HDFS block distribution */ 264 public HDFSBlocksDistribution getHDFSBlockDistribution() { 265 return this.hdfsBlocksDistribution; 266 } 267 268 /** 269 * Open a Reader for the StoreFile 270 * @param fs The current file system to use. 271 * @param cacheConf The cache configuration and block cache reference. 272 * @return The StoreFile.Reader for the file 273 */ 274 public StoreFileReader open(FileSystem fs, CacheConfig cacheConf, boolean canUseDropBehind, 275 long readahead, boolean isPrimaryReplicaStoreFile, AtomicInteger refCount, boolean shared) 276 throws IOException { 277 FSDataInputStreamWrapper in; 278 FileStatus status; 279 280 final boolean doDropBehind = canUseDropBehind && cacheConf.shouldDropBehindCompaction(); 281 if (this.link != null) { 282 // HFileLink 283 in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind, readahead); 284 status = this.link.getFileStatus(fs); 285 } else if (this.reference != null) { 286 // HFile Reference 287 Path referencePath = getReferredToFile(this.getPath()); 288 try { 289 in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead); 290 } catch (FileNotFoundException fnfe) { 291 // Intercept the exception so can insert more info about the Reference; otherwise 292 // exception just complains about some random file -- operator doesn't realize it 293 // other end of a Reference 294 FileNotFoundException newFnfe = new FileNotFoundException(toString()); 295 newFnfe.initCause(fnfe); 296 throw newFnfe; 297 } 298 status = fs.getFileStatus(referencePath); 299 } else { 300 in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead); 301 status = fs.getFileStatus(initialPath); 302 } 303 long length = status.getLen(); 304 hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs); 305 306 StoreFileReader reader = null; 307 if (this.coprocessorHost != null) { 308 reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length, 309 cacheConf, reference); 310 } 311 if (reader == null) { 312 if (this.reference != null) { 313 reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference, 314 isPrimaryReplicaStoreFile, refCount, shared, conf); 315 } else { 316 reader = new StoreFileReader(fs, status.getPath(), in, length, cacheConf, 317 isPrimaryReplicaStoreFile, refCount, shared, conf); 318 } 319 } 320 if (this.coprocessorHost != null) { 321 reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length, 322 cacheConf, reference, reader); 323 } 324 return reader; 325 } 326 327 /** 328 * Compute the HDFS Block Distribution for this StoreFile 329 */ 330 public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs) 331 throws IOException { 332 // guard against the case where we get the FileStatus from link, but by the time we 333 // call compute the file is moved again 334 if (this.link != null) { 335 FileNotFoundException exToThrow = null; 336 for (int i = 0; i < this.link.getLocations().length; i++) { 337 try { 338 return computeHDFSBlocksDistributionInternal(fs); 339 } catch (FileNotFoundException ex) { 340 // try the other location 341 exToThrow = ex; 342 } 343 } 344 throw exToThrow; 345 } else { 346 return computeHDFSBlocksDistributionInternal(fs); 347 } 348 } 349 350 private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs) 351 throws IOException { 352 FileStatus status = getReferencedFileStatus(fs); 353 if (this.reference != null) { 354 return computeRefFileHDFSBlockDistribution(fs, reference, status); 355 } else { 356 return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen()); 357 } 358 } 359 360 /** 361 * Get the {@link FileStatus} of the file referenced by this StoreFileInfo 362 * @param fs The current file system to use. 363 * @return The {@link FileStatus} of the file referenced by this StoreFileInfo 364 */ 365 public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException { 366 FileStatus status; 367 if (this.reference != null) { 368 if (this.link != null) { 369 FileNotFoundException exToThrow = null; 370 for (int i = 0; i < this.link.getLocations().length; i++) { 371 // HFileLink Reference 372 try { 373 return link.getFileStatus(fs); 374 } catch (FileNotFoundException ex) { 375 // try the other location 376 exToThrow = ex; 377 } 378 } 379 throw exToThrow; 380 } else { 381 // HFile Reference 382 Path referencePath = getReferredToFile(this.getPath()); 383 status = fs.getFileStatus(referencePath); 384 } 385 } else { 386 if (this.link != null) { 387 FileNotFoundException exToThrow = null; 388 for (int i = 0; i < this.link.getLocations().length; i++) { 389 // HFileLink 390 try { 391 return link.getFileStatus(fs); 392 } catch (FileNotFoundException ex) { 393 // try the other location 394 exToThrow = ex; 395 } 396 } 397 throw exToThrow; 398 } else { 399 status = fs.getFileStatus(initialPath); 400 } 401 } 402 return status; 403 } 404 405 /** @return The {@link Path} of the file */ 406 public Path getPath() { 407 return initialPath; 408 } 409 410 /** @return The {@link FileStatus} of the file */ 411 public FileStatus getFileStatus() throws IOException { 412 return getReferencedFileStatus(fs); 413 } 414 415 /** @return Get the modification time of the file. */ 416 public long getModificationTime() throws IOException { 417 return getFileStatus().getModificationTime(); 418 } 419 420 @Override 421 public String toString() { 422 return this.getPath() + 423 (isReference() ? "->" + getReferredToFile(this.getPath()) + "-" + reference : ""); 424 } 425 426 /** 427 * @param path Path to check. 428 * @return True if the path has format of a HFile. 429 */ 430 public static boolean isHFile(final Path path) { 431 return isHFile(path.getName()); 432 } 433 434 public static boolean isHFile(final String fileName) { 435 Matcher m = HFILE_NAME_PATTERN.matcher(fileName); 436 return m.matches() && m.groupCount() > 0; 437 } 438 439 /** 440 * @param path Path to check. 441 * @return True if the path has format of a del file. 442 */ 443 public static boolean isDelFile(final Path path) { 444 return isDelFile(path.getName()); 445 } 446 447 /** 448 * @param fileName Sting version of path to validate. 449 * @return True if the file name has format of a del file. 450 */ 451 public static boolean isDelFile(final String fileName) { 452 Matcher m = DELFILE_NAME_PATTERN.matcher(fileName); 453 return m.matches() && m.groupCount() > 0; 454 } 455 456 /** 457 * @param path Path to check. 458 * @return True if the path has format of a HStoreFile reference. 459 */ 460 public static boolean isReference(final Path path) { 461 return isReference(path.getName()); 462 } 463 464 /** 465 * @param name file name to check. 466 * @return True if the path has format of a HStoreFile reference. 467 */ 468 public static boolean isReference(final String name) { 469 Matcher m = REF_NAME_PATTERN.matcher(name); 470 return m.matches() && m.groupCount() > 1; 471 } 472 473 /** 474 * @return timestamp when this file was created (as returned by filesystem) 475 */ 476 public long getCreatedTimestamp() { 477 return createdTimestamp; 478 } 479 480 /* 481 * Return path to the file referred to by a Reference. Presumes a directory 482 * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>. 483 * @param p Path to a Reference file. 484 * @return Calculated path to parent region file. 485 * @throws IllegalArgumentException when path regex fails to match. 486 */ 487 public static Path getReferredToFile(final Path p) { 488 Matcher m = REF_NAME_PATTERN.matcher(p.getName()); 489 if (m == null || !m.matches()) { 490 LOG.warn("Failed match of store file name " + p.toString()); 491 throw new IllegalArgumentException("Failed match of store file name " + 492 p.toString()); 493 } 494 495 // Other region name is suffix on the passed Reference file name 496 String otherRegion = m.group(2); 497 // Tabledir is up two directories from where Reference was written. 498 Path tableDir = p.getParent().getParent().getParent(); 499 String nameStrippedOfSuffix = m.group(1); 500 if (LOG.isTraceEnabled()) { 501 LOG.trace("reference '" + p + "' to region=" + otherRegion 502 + " hfile=" + nameStrippedOfSuffix); 503 } 504 505 // Build up new path with the referenced region in place of our current 506 // region in the reference path. Also strip regionname suffix from name. 507 return new Path(new Path(new Path(tableDir, otherRegion), 508 p.getParent().getName()), nameStrippedOfSuffix); 509 } 510 511 /** 512 * Validate the store file name. 513 * @param fileName name of the file to validate 514 * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise 515 */ 516 public static boolean validateStoreFileName(final String fileName) { 517 if (HFileLink.isHFileLink(fileName) || isReference(fileName)) 518 return(true); 519 return !fileName.contains("-"); 520 } 521 522 /** 523 * Return if the specified file is a valid store file or not. 524 * @param fileStatus The {@link FileStatus} of the file 525 * @return <tt>true</tt> if the file is valid 526 */ 527 public static boolean isValid(final FileStatus fileStatus) 528 throws IOException { 529 final Path p = fileStatus.getPath(); 530 531 if (fileStatus.isDirectory()) 532 return false; 533 534 // Check for empty hfile. Should never be the case but can happen 535 // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646 536 // NOTE: that the HFileLink is just a name, so it's an empty file. 537 if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) { 538 LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?"); 539 return false; 540 } 541 542 return validateStoreFileName(p.getName()); 543 } 544 545 /** 546 * helper function to compute HDFS blocks distribution of a given reference 547 * file.For reference file, we don't compute the exact value. We use some 548 * estimate instead given it might be good enough. we assume bottom part 549 * takes the first half of reference file, top part takes the second half 550 * of the reference file. This is just estimate, given 551 * midkey ofregion != midkey of HFile, also the number and size of keys vary. 552 * If this estimate isn't good enough, we can improve it later. 553 * @param fs The FileSystem 554 * @param reference The reference 555 * @param status The reference FileStatus 556 * @return HDFS blocks distribution 557 */ 558 private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution( 559 final FileSystem fs, final Reference reference, final FileStatus status) 560 throws IOException { 561 if (status == null) { 562 return null; 563 } 564 565 long start = 0; 566 long length = 0; 567 568 if (Reference.isTopFileRegion(reference.getFileRegion())) { 569 start = status.getLen()/2; 570 length = status.getLen() - status.getLen()/2; 571 } else { 572 start = 0; 573 length = status.getLen()/2; 574 } 575 return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); 576 } 577 578 @Override 579 public boolean equals(Object that) { 580 if (this == that) return true; 581 if (that == null) return false; 582 583 if (!(that instanceof StoreFileInfo)) return false; 584 585 StoreFileInfo o = (StoreFileInfo)that; 586 if (initialPath != null && o.initialPath == null) return false; 587 if (initialPath == null && o.initialPath != null) return false; 588 if (initialPath != o.initialPath && initialPath != null 589 && !initialPath.equals(o.initialPath)) return false; 590 591 if (reference != null && o.reference == null) return false; 592 if (reference == null && o.reference != null) return false; 593 if (reference != o.reference && reference != null 594 && !reference.equals(o.reference)) return false; 595 596 if (link != null && o.link == null) return false; 597 if (link == null && o.link != null) return false; 598 if (link != o.link && link != null && !link.equals(o.link)) return false; 599 600 return true; 601 }; 602 603 604 @Override 605 public int hashCode() { 606 int hash = 17; 607 hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode()); 608 hash = hash * 31 + ((initialPath == null) ? 0 : initialPath.hashCode()); 609 hash = hash * 31 + ((link == null) ? 0 : link.hashCode()); 610 return hash; 611 } 612 613 /** 614 * Return the active file name that contains the real data. 615 * <p> 616 * For referenced hfile, we will return the name of the reference file as it will be used to 617 * construct the StoreFileReader. And for linked hfile, we will return the name of the file being 618 * linked. 619 */ 620 public String getActiveFileName() { 621 if (reference != null || link == null) { 622 return initialPath.getName(); 623 } else { 624 return HFileLink.getReferencedHFileName(initialPath.getName()); 625 } 626 } 627}