001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019 020package org.apache.hadoop.hbase.regionserver; 021 022import java.io.FileNotFoundException; 023import java.io.IOException; 024import java.util.concurrent.atomic.AtomicInteger; 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.fs.FileStatus; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.HDFSBlocksDistribution; 033import org.apache.yetus.audience.InterfaceAudience; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 037import org.apache.hadoop.hbase.io.HFileLink; 038import org.apache.hadoop.hbase.io.HalfStoreFileReader; 039import org.apache.hadoop.hbase.io.Reference; 040import org.apache.hadoop.hbase.io.hfile.CacheConfig; 041import org.apache.hadoop.hbase.util.FSUtils; 042 043/** 044 * Describe a StoreFile (hfile, reference, link) 045 */ 046@InterfaceAudience.Private 047public class StoreFileInfo { 048 private static final Logger LOG = LoggerFactory.getLogger(StoreFileInfo.class); 049 050 /** 051 * A non-capture group, for hfiles, so that this can be embedded. 052 * HFiles are uuid ([0-9a-z]+). Bulk loaded hfiles has (_SeqId_[0-9]+_) has suffix. 053 * The mob del file has (_del) as suffix. 054 */ 055 public static final String HFILE_NAME_REGEX = "[0-9a-f]+(?:(?:_SeqId_[0-9]+_)|(?:_del))?"; 056 057 /** Regex that will work for hfiles */ 058 private static final Pattern HFILE_NAME_PATTERN = 059 Pattern.compile("^(" + HFILE_NAME_REGEX + ")"); 060 061 /** 062 * A non-capture group, for del files, so that this can be embedded. 063 * A del file has (_del) as suffix. 064 */ 065 public static final String DELFILE_NAME_REGEX = "[0-9a-f]+(?:_del)"; 066 067 /** Regex that will work for del files */ 068 private static final Pattern DELFILE_NAME_PATTERN = 069 Pattern.compile("^(" + DELFILE_NAME_REGEX + ")"); 070 071 /** 072 * Regex that will work for straight reference names ({@code <hfile>.<parentEncRegion>}) 073 * and hfilelink reference names ({@code <table>=<region>-<hfile>.<parentEncRegion>}) 074 * If reference, then the regex has more than just one group. 075 * Group 1, hfile/hfilelink pattern, is this file's id. 076 * Group 2 '(.+)' is the reference's parent region name. 077 */ 078 private static final Pattern REF_NAME_PATTERN = 079 Pattern.compile(String.format("^(%s|%s)\\.(.+)$", 080 HFILE_NAME_REGEX, HFileLink.LINK_NAME_REGEX)); 081 082 // Configuration 083 private Configuration conf; 084 085 // FileSystem handle 086 private final FileSystem fs; 087 088 // HDFS blocks distribution information 089 private HDFSBlocksDistribution hdfsBlocksDistribution = null; 090 091 // If this storefile references another, this is the reference instance. 092 private final Reference reference; 093 094 // If this storefile is a link to another, this is the link instance. 095 private final HFileLink link; 096 097 private final Path initialPath; 098 099 private RegionCoprocessorHost coprocessorHost; 100 101 // timestamp on when the file was created, is 0 and ignored for reference or link files 102 private long createdTimestamp; 103 104 /** 105 * Create a Store File Info 106 * @param conf the {@link Configuration} to use 107 * @param fs The current file system to use. 108 * @param initialPath The {@link Path} of the file 109 */ 110 public StoreFileInfo(final Configuration conf, final FileSystem fs, final Path initialPath) 111 throws IOException { 112 assert fs != null; 113 assert initialPath != null; 114 assert conf != null; 115 116 this.fs = fs; 117 this.conf = conf; 118 this.initialPath = initialPath; 119 Path p = initialPath; 120 if (HFileLink.isHFileLink(p)) { 121 // HFileLink 122 this.reference = null; 123 this.link = HFileLink.buildFromHFileLinkPattern(conf, p); 124 if (LOG.isTraceEnabled()) LOG.trace(p + " is a link"); 125 } else if (isReference(p)) { 126 this.reference = Reference.read(fs, p); 127 Path referencePath = getReferredToFile(p); 128 if (HFileLink.isHFileLink(referencePath)) { 129 // HFileLink Reference 130 this.link = HFileLink.buildFromHFileLinkPattern(conf, referencePath); 131 } else { 132 // Reference 133 this.link = null; 134 } 135 if (LOG.isTraceEnabled()) LOG.trace(p + " is a " + reference.getFileRegion() + 136 " reference to " + referencePath); 137 } else if (isHFile(p)) { 138 // HFile 139 this.createdTimestamp = fs.getFileStatus(initialPath).getModificationTime(); 140 this.reference = null; 141 this.link = null; 142 } else { 143 throw new IOException("path=" + p + " doesn't look like a valid StoreFile"); 144 } 145 } 146 147 /** 148 * Create a Store File Info 149 * @param conf the {@link Configuration} to use 150 * @param fs The current file system to use. 151 * @param fileStatus The {@link FileStatus} of the file 152 */ 153 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus) 154 throws IOException { 155 this(conf, fs, fileStatus.getPath()); 156 } 157 158 /** 159 * Create a Store File Info from an HFileLink 160 * @param conf the {@link Configuration} to use 161 * @param fs The current file system to use. 162 * @param fileStatus The {@link FileStatus} of the file 163 */ 164 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 165 final HFileLink link) 166 throws IOException { 167 this.fs = fs; 168 this.conf = conf; 169 // initialPath can be null only if we get a link. 170 this.initialPath = (fileStatus == null) ? null : fileStatus.getPath(); 171 // HFileLink 172 this.reference = null; 173 this.link = link; 174 } 175 176 /** 177 * Create a Store File Info from an HFileLink 178 * @param conf 179 * @param fs 180 * @param fileStatus 181 * @param reference 182 * @throws IOException 183 */ 184 public StoreFileInfo(final Configuration conf, final FileSystem fs, final FileStatus fileStatus, 185 final Reference reference) 186 throws IOException { 187 this.fs = fs; 188 this.conf = conf; 189 this.initialPath = fileStatus.getPath(); 190 this.createdTimestamp = fileStatus.getModificationTime(); 191 this.reference = reference; 192 this.link = null; 193 } 194 195 /** 196 * Sets the region coprocessor env. 197 * @param coprocessorHost 198 */ 199 public void setRegionCoprocessorHost(RegionCoprocessorHost coprocessorHost) { 200 this.coprocessorHost = coprocessorHost; 201 } 202 203 /* 204 * @return the Reference object associated to this StoreFileInfo. 205 * null if the StoreFile is not a reference. 206 */ 207 public Reference getReference() { 208 return this.reference; 209 } 210 211 /** @return True if the store file is a Reference */ 212 public boolean isReference() { 213 return this.reference != null; 214 } 215 216 /** @return True if the store file is a top Reference */ 217 public boolean isTopReference() { 218 return this.reference != null && Reference.isTopFileRegion(this.reference.getFileRegion()); 219 } 220 221 /** @return True if the store file is a link */ 222 public boolean isLink() { 223 return this.link != null && this.reference == null; 224 } 225 226 /** @return the HDFS block distribution */ 227 public HDFSBlocksDistribution getHDFSBlockDistribution() { 228 return this.hdfsBlocksDistribution; 229 } 230 231 /** 232 * Open a Reader for the StoreFile 233 * @param fs The current file system to use. 234 * @param cacheConf The cache configuration and block cache reference. 235 * @return The StoreFile.Reader for the file 236 */ 237 public StoreFileReader open(FileSystem fs, CacheConfig cacheConf, boolean canUseDropBehind, 238 long readahead, boolean isPrimaryReplicaStoreFile, AtomicInteger refCount, boolean shared) 239 throws IOException { 240 FSDataInputStreamWrapper in; 241 FileStatus status; 242 243 final boolean doDropBehind = canUseDropBehind && cacheConf.shouldDropBehindCompaction(); 244 if (this.link != null) { 245 // HFileLink 246 in = new FSDataInputStreamWrapper(fs, this.link, doDropBehind, readahead); 247 status = this.link.getFileStatus(fs); 248 } else if (this.reference != null) { 249 // HFile Reference 250 Path referencePath = getReferredToFile(this.getPath()); 251 in = new FSDataInputStreamWrapper(fs, referencePath, doDropBehind, readahead); 252 status = fs.getFileStatus(referencePath); 253 } else { 254 in = new FSDataInputStreamWrapper(fs, this.getPath(), doDropBehind, readahead); 255 status = fs.getFileStatus(initialPath); 256 } 257 long length = status.getLen(); 258 hdfsBlocksDistribution = computeHDFSBlocksDistribution(fs); 259 260 StoreFileReader reader = null; 261 if (this.coprocessorHost != null) { 262 reader = this.coprocessorHost.preStoreFileReaderOpen(fs, this.getPath(), in, length, 263 cacheConf, reference); 264 } 265 if (reader == null) { 266 if (this.reference != null) { 267 reader = new HalfStoreFileReader(fs, this.getPath(), in, length, cacheConf, reference, 268 isPrimaryReplicaStoreFile, refCount, shared, conf); 269 } else { 270 reader = new StoreFileReader(fs, status.getPath(), in, length, cacheConf, 271 isPrimaryReplicaStoreFile, refCount, shared, conf); 272 } 273 } 274 if (this.coprocessorHost != null) { 275 reader = this.coprocessorHost.postStoreFileReaderOpen(fs, this.getPath(), in, length, 276 cacheConf, reference, reader); 277 } 278 return reader; 279 } 280 281 /** 282 * Compute the HDFS Block Distribution for this StoreFile 283 */ 284 public HDFSBlocksDistribution computeHDFSBlocksDistribution(final FileSystem fs) 285 throws IOException { 286 // guard against the case where we get the FileStatus from link, but by the time we 287 // call compute the file is moved again 288 if (this.link != null) { 289 FileNotFoundException exToThrow = null; 290 for (int i = 0; i < this.link.getLocations().length; i++) { 291 try { 292 return computeHDFSBlocksDistributionInternal(fs); 293 } catch (FileNotFoundException ex) { 294 // try the other location 295 exToThrow = ex; 296 } 297 } 298 throw exToThrow; 299 } else { 300 return computeHDFSBlocksDistributionInternal(fs); 301 } 302 } 303 304 private HDFSBlocksDistribution computeHDFSBlocksDistributionInternal(final FileSystem fs) 305 throws IOException { 306 FileStatus status = getReferencedFileStatus(fs); 307 if (this.reference != null) { 308 return computeRefFileHDFSBlockDistribution(fs, reference, status); 309 } else { 310 return FSUtils.computeHDFSBlocksDistribution(fs, status, 0, status.getLen()); 311 } 312 } 313 314 /** 315 * Get the {@link FileStatus} of the file referenced by this StoreFileInfo 316 * @param fs The current file system to use. 317 * @return The {@link FileStatus} of the file referenced by this StoreFileInfo 318 */ 319 public FileStatus getReferencedFileStatus(final FileSystem fs) throws IOException { 320 FileStatus status; 321 if (this.reference != null) { 322 if (this.link != null) { 323 FileNotFoundException exToThrow = null; 324 for (int i = 0; i < this.link.getLocations().length; i++) { 325 // HFileLink Reference 326 try { 327 return link.getFileStatus(fs); 328 } catch (FileNotFoundException ex) { 329 // try the other location 330 exToThrow = ex; 331 } 332 } 333 throw exToThrow; 334 } else { 335 // HFile Reference 336 Path referencePath = getReferredToFile(this.getPath()); 337 status = fs.getFileStatus(referencePath); 338 } 339 } else { 340 if (this.link != null) { 341 FileNotFoundException exToThrow = null; 342 for (int i = 0; i < this.link.getLocations().length; i++) { 343 // HFileLink 344 try { 345 return link.getFileStatus(fs); 346 } catch (FileNotFoundException ex) { 347 // try the other location 348 exToThrow = ex; 349 } 350 } 351 throw exToThrow; 352 } else { 353 status = fs.getFileStatus(initialPath); 354 } 355 } 356 return status; 357 } 358 359 /** @return The {@link Path} of the file */ 360 public Path getPath() { 361 return initialPath; 362 } 363 364 /** @return The {@link FileStatus} of the file */ 365 public FileStatus getFileStatus() throws IOException { 366 return getReferencedFileStatus(fs); 367 } 368 369 /** @return Get the modification time of the file. */ 370 public long getModificationTime() throws IOException { 371 return getFileStatus().getModificationTime(); 372 } 373 374 @Override 375 public String toString() { 376 return this.getPath() + 377 (isReference() ? "->" + getReferredToFile(this.getPath()) + "-" + reference : ""); 378 } 379 380 /** 381 * @param path Path to check. 382 * @return True if the path has format of a HFile. 383 */ 384 public static boolean isHFile(final Path path) { 385 return isHFile(path.getName()); 386 } 387 388 public static boolean isHFile(final String fileName) { 389 Matcher m = HFILE_NAME_PATTERN.matcher(fileName); 390 return m.matches() && m.groupCount() > 0; 391 } 392 393 /** 394 * @param path Path to check. 395 * @return True if the path has format of a del file. 396 */ 397 public static boolean isDelFile(final Path path) { 398 return isDelFile(path.getName()); 399 } 400 401 /** 402 * @param fileName Sting version of path to validate. 403 * @return True if the file name has format of a del file. 404 */ 405 public static boolean isDelFile(final String fileName) { 406 Matcher m = DELFILE_NAME_PATTERN.matcher(fileName); 407 return m.matches() && m.groupCount() > 0; 408 } 409 410 /** 411 * @param path Path to check. 412 * @return True if the path has format of a HStoreFile reference. 413 */ 414 public static boolean isReference(final Path path) { 415 return isReference(path.getName()); 416 } 417 418 /** 419 * @param name file name to check. 420 * @return True if the path has format of a HStoreFile reference. 421 */ 422 public static boolean isReference(final String name) { 423 Matcher m = REF_NAME_PATTERN.matcher(name); 424 return m.matches() && m.groupCount() > 1; 425 } 426 427 /** 428 * @return timestamp when this file was created (as returned by filesystem) 429 */ 430 public long getCreatedTimestamp() { 431 return createdTimestamp; 432 } 433 434 /* 435 * Return path to the file referred to by a Reference. Presumes a directory 436 * hierarchy of <code>${hbase.rootdir}/data/${namespace}/tablename/regionname/familyname</code>. 437 * @param p Path to a Reference file. 438 * @return Calculated path to parent region file. 439 * @throws IllegalArgumentException when path regex fails to match. 440 */ 441 public static Path getReferredToFile(final Path p) { 442 Matcher m = REF_NAME_PATTERN.matcher(p.getName()); 443 if (m == null || !m.matches()) { 444 LOG.warn("Failed match of store file name " + p.toString()); 445 throw new IllegalArgumentException("Failed match of store file name " + 446 p.toString()); 447 } 448 449 // Other region name is suffix on the passed Reference file name 450 String otherRegion = m.group(2); 451 // Tabledir is up two directories from where Reference was written. 452 Path tableDir = p.getParent().getParent().getParent(); 453 String nameStrippedOfSuffix = m.group(1); 454 if (LOG.isTraceEnabled()) { 455 LOG.trace("reference '" + p + "' to region=" + otherRegion 456 + " hfile=" + nameStrippedOfSuffix); 457 } 458 459 // Build up new path with the referenced region in place of our current 460 // region in the reference path. Also strip regionname suffix from name. 461 return new Path(new Path(new Path(tableDir, otherRegion), 462 p.getParent().getName()), nameStrippedOfSuffix); 463 } 464 465 /** 466 * Validate the store file name. 467 * @param fileName name of the file to validate 468 * @return <tt>true</tt> if the file could be a valid store file, <tt>false</tt> otherwise 469 */ 470 public static boolean validateStoreFileName(final String fileName) { 471 if (HFileLink.isHFileLink(fileName) || isReference(fileName)) 472 return(true); 473 return !fileName.contains("-"); 474 } 475 476 /** 477 * Return if the specified file is a valid store file or not. 478 * @param fileStatus The {@link FileStatus} of the file 479 * @return <tt>true</tt> if the file is valid 480 */ 481 public static boolean isValid(final FileStatus fileStatus) 482 throws IOException { 483 final Path p = fileStatus.getPath(); 484 485 if (fileStatus.isDirectory()) 486 return false; 487 488 // Check for empty hfile. Should never be the case but can happen 489 // after data loss in hdfs for whatever reason (upgrade, etc.): HBASE-646 490 // NOTE: that the HFileLink is just a name, so it's an empty file. 491 if (!HFileLink.isHFileLink(p) && fileStatus.getLen() <= 0) { 492 LOG.warn("Skipping " + p + " because it is empty. HBASE-646 DATA LOSS?"); 493 return false; 494 } 495 496 return validateStoreFileName(p.getName()); 497 } 498 499 /** 500 * helper function to compute HDFS blocks distribution of a given reference 501 * file.For reference file, we don't compute the exact value. We use some 502 * estimate instead given it might be good enough. we assume bottom part 503 * takes the first half of reference file, top part takes the second half 504 * of the reference file. This is just estimate, given 505 * midkey ofregion != midkey of HFile, also the number and size of keys vary. 506 * If this estimate isn't good enough, we can improve it later. 507 * @param fs The FileSystem 508 * @param reference The reference 509 * @param status The reference FileStatus 510 * @return HDFS blocks distribution 511 */ 512 private static HDFSBlocksDistribution computeRefFileHDFSBlockDistribution( 513 final FileSystem fs, final Reference reference, final FileStatus status) 514 throws IOException { 515 if (status == null) { 516 return null; 517 } 518 519 long start = 0; 520 long length = 0; 521 522 if (Reference.isTopFileRegion(reference.getFileRegion())) { 523 start = status.getLen()/2; 524 length = status.getLen() - status.getLen()/2; 525 } else { 526 start = 0; 527 length = status.getLen()/2; 528 } 529 return FSUtils.computeHDFSBlocksDistribution(fs, status, start, length); 530 } 531 532 @Override 533 public boolean equals(Object that) { 534 if (this == that) return true; 535 if (that == null) return false; 536 537 if (!(that instanceof StoreFileInfo)) return false; 538 539 StoreFileInfo o = (StoreFileInfo)that; 540 if (initialPath != null && o.initialPath == null) return false; 541 if (initialPath == null && o.initialPath != null) return false; 542 if (initialPath != o.initialPath && initialPath != null 543 && !initialPath.equals(o.initialPath)) return false; 544 545 if (reference != null && o.reference == null) return false; 546 if (reference == null && o.reference != null) return false; 547 if (reference != o.reference && reference != null 548 && !reference.equals(o.reference)) return false; 549 550 if (link != null && o.link == null) return false; 551 if (link == null && o.link != null) return false; 552 if (link != o.link && link != null && !link.equals(o.link)) return false; 553 554 return true; 555 }; 556 557 558 @Override 559 public int hashCode() { 560 int hash = 17; 561 hash = hash * 31 + ((reference == null) ? 0 : reference.hashCode()); 562 hash = hash * 31 + ((initialPath == null) ? 0 : initialPath.hashCode()); 563 hash = hash * 31 + ((link == null) ? 0 : link.hashCode()); 564 return hash; 565 } 566 567 /** 568 * Return the active file name that contains the real data. 569 * <p> 570 * For referenced hfile, we will return the name of the reference file as it will be used to 571 * construct the StoreFileReader. And for linked hfile, we will return the name of the file being 572 * linked. 573 */ 574 public String getActiveFileName() { 575 if (reference != null || link == null) { 576 return initialPath.getName(); 577 } else { 578 return HFileLink.getReferencedHFileName(initialPath.getName()); 579 } 580 } 581}