001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.io.InputStream; 023import java.util.ArrayList; 024import java.util.Arrays; 025import java.util.Collection; 026import java.util.List; 027import org.apache.hadoop.fs.CanSetDropBehind; 028import org.apache.hadoop.fs.CanSetReadahead; 029import org.apache.hadoop.fs.CanUnbuffer; 030import org.apache.hadoop.fs.FSDataInputStream; 031import org.apache.hadoop.fs.FileStatus; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.fs.PositionedReadable; 035import org.apache.hadoop.fs.Seekable; 036import org.apache.hadoop.hbase.util.CommonFSUtils; 037import org.apache.hadoop.ipc.RemoteException; 038import org.apache.hadoop.security.AccessControlException; 039import org.apache.yetus.audience.InterfaceAudience; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042 043/** 044 * The FileLink is a sort of hardlink, that allows access to a file given a set of locations. 045 * <p> 046 * <b>The Problem:</b> 047 * <ul> 048 * <li>HDFS doesn't have support for hardlinks, and this make impossible to referencing the same 049 * data blocks using different names.</li> 050 * <li>HBase store files in one location (e.g. table/region/family/) and when the file is not needed 051 * anymore (e.g. compaction, region deletion, ...) moves it to an archive directory.</li> 052 * </ul> 053 * If we want to create a reference to a file, we need to remember that it can be in its original 054 * location or in the archive folder. The FileLink class tries to abstract this concept and given a 055 * set of locations it is able to switch between them making this operation transparent for the 056 * user. {@link HFileLink} is a more concrete implementation of the {@code FileLink}. 057 * <p> 058 * <b>Back-references:</b> To help the {@link org.apache.hadoop.hbase.master.cleaner.CleanerChore} 059 * to keep track of the links to a particular file, during the {@code FileLink} creation, a new file 060 * is placed inside a back-reference directory. There's one back-reference directory for each file 061 * that has links, and in the directory there's one file per link. 062 * <p> 063 * HFileLink Example 064 * <ul> 065 * <li>/hbase/table/region-x/cf/file-k (Original File)</li> 066 * <li>/hbase/table-cloned/region-y/cf/file-k.region-x.table (HFileLink to the original file)</li> 067 * <li>/hbase/table-2nd-cloned/region-z/cf/file-k.region-x.table (HFileLink to the original file) 068 * </li> 069 * <li>/hbase/.archive/table/region-x/.links-file-k/region-y.table-cloned (Back-reference to the 070 * link in table-cloned)</li> 071 * <li>/hbase/.archive/table/region-x/.links-file-k/region-z.table-2nd-cloned (Back-reference to the 072 * link in table-2nd-cloned)</li> 073 * </ul> 074 */ 075@InterfaceAudience.Private 076public class FileLink { 077 private static final Logger LOG = LoggerFactory.getLogger(FileLink.class); 078 079 /** Define the Back-reference directory name prefix: .links-<hfile>/ */ 080 public static final String BACK_REFERENCES_DIRECTORY_PREFIX = ".links-"; 081 082 /** 083 * FileLink InputStream that handles the switch between the original path and the alternative 084 * locations, when the file is moved. 085 */ 086 protected static class FileLinkInputStream extends InputStream 087 implements Seekable, PositionedReadable, CanSetDropBehind, CanSetReadahead, CanUnbuffer { 088 private FSDataInputStream in = null; 089 private Path currentPath = null; 090 private long pos = 0; 091 092 private final FileLink fileLink; 093 private final int bufferSize; 094 private final FileSystem fs; 095 096 public FileLinkInputStream(final FileSystem fs, final FileLink fileLink) throws IOException { 097 this(fs, fileLink, CommonFSUtils.getDefaultBufferSize(fs)); 098 } 099 100 public FileLinkInputStream(final FileSystem fs, final FileLink fileLink, int bufferSize) 101 throws IOException { 102 this.bufferSize = bufferSize; 103 this.fileLink = fileLink; 104 this.fs = fs; 105 106 this.in = tryOpen(); 107 } 108 109 private FSDataInputStream getUnderlyingInputStream() { 110 return in; 111 } 112 113 @Override 114 public int read() throws IOException { 115 int res; 116 try { 117 res = in.read(); 118 } catch (FileNotFoundException e) { 119 res = tryOpen().read(); 120 } 121 if (res > 0) { 122 pos += 1; 123 } 124 return res; 125 } 126 127 @Override 128 public int read(byte[] b) throws IOException { 129 return read(b, 0, b.length); 130 } 131 132 @Override 133 public int read(byte[] b, int off, int len) throws IOException { 134 int n; 135 try { 136 n = in.read(b, off, len); 137 } catch (FileNotFoundException e) { 138 n = tryOpen().read(b, off, len); 139 } 140 if (n > 0) { 141 pos += n; 142 } 143 assert (in.getPos() == pos); 144 return n; 145 } 146 147 @Override 148 public int read(long position, byte[] buffer, int offset, int length) throws IOException { 149 int n; 150 try { 151 n = in.read(position, buffer, offset, length); 152 } catch (FileNotFoundException e) { 153 n = tryOpen().read(position, buffer, offset, length); 154 } 155 return n; 156 } 157 158 @Override 159 public void readFully(long position, byte[] buffer) throws IOException { 160 readFully(position, buffer, 0, buffer.length); 161 } 162 163 @Override 164 public void readFully(long position, byte[] buffer, int offset, int length) throws IOException { 165 try { 166 in.readFully(position, buffer, offset, length); 167 } catch (FileNotFoundException e) { 168 tryOpen().readFully(position, buffer, offset, length); 169 } 170 } 171 172 @Override 173 public long skip(long n) throws IOException { 174 long skipped; 175 176 try { 177 skipped = in.skip(n); 178 } catch (FileNotFoundException e) { 179 skipped = tryOpen().skip(n); 180 } 181 182 if (skipped > 0) { 183 pos += skipped; 184 } 185 return skipped; 186 } 187 188 @Override 189 public int available() throws IOException { 190 try { 191 return in.available(); 192 } catch (FileNotFoundException e) { 193 return tryOpen().available(); 194 } 195 } 196 197 @Override 198 public void seek(long pos) throws IOException { 199 try { 200 in.seek(pos); 201 } catch (FileNotFoundException e) { 202 tryOpen().seek(pos); 203 } 204 this.pos = pos; 205 } 206 207 @Override 208 public long getPos() throws IOException { 209 return pos; 210 } 211 212 @Override 213 public boolean seekToNewSource(long targetPos) throws IOException { 214 boolean res; 215 try { 216 res = in.seekToNewSource(targetPos); 217 } catch (FileNotFoundException e) { 218 res = tryOpen().seekToNewSource(targetPos); 219 } 220 if (res) pos = targetPos; 221 return res; 222 } 223 224 @Override 225 public void close() throws IOException { 226 in.close(); 227 } 228 229 @Override 230 public synchronized void mark(int readlimit) { 231 } 232 233 @Override 234 public synchronized void reset() throws IOException { 235 throw new IOException("mark/reset not supported"); 236 } 237 238 @Override 239 public boolean markSupported() { 240 return false; 241 } 242 243 @Override 244 public void unbuffer() { 245 if (in == null) { 246 return; 247 } 248 in.unbuffer(); 249 } 250 251 /** 252 * Try to open the file from one of the available locations. 253 * @return FSDataInputStream stream of the opened file link 254 * @throws IOException on unexpected error, or file not found. 255 */ 256 private FSDataInputStream tryOpen() throws IOException { 257 IOException exception = null; 258 for (Path path : fileLink.getLocations()) { 259 if (path.equals(currentPath)) continue; 260 try { 261 in = fs.open(path, bufferSize); 262 if (pos != 0) in.seek(pos); 263 assert (in.getPos() == pos) : "Link unable to seek to the right position=" + pos; 264 if (LOG.isTraceEnabled()) { 265 if (currentPath == null) { 266 LOG.debug("link open path=" + path); 267 } else { 268 LOG.trace("link switch from path=" + currentPath + " to path=" + path); 269 } 270 } 271 currentPath = path; 272 return (in); 273 } catch (FileNotFoundException | AccessControlException | RemoteException e) { 274 exception = FileLink.handleAccessLocationException(fileLink, e, exception); 275 } 276 } 277 throw exception; 278 } 279 280 @Override 281 public void setReadahead(Long readahead) throws IOException, UnsupportedOperationException { 282 in.setReadahead(readahead); 283 } 284 285 @Override 286 public void setDropBehind(Boolean dropCache) throws IOException, UnsupportedOperationException { 287 in.setDropBehind(dropCache); 288 } 289 290 public Path getCurrentPath() { 291 return currentPath; 292 } 293 } 294 295 private Path[] locations = null; 296 297 protected FileLink() { 298 this.locations = null; 299 } 300 301 /** 302 * @param originPath Original location of the file to link 303 * @param alternativePaths Alternative locations to look for the linked file 304 */ 305 public FileLink(Path originPath, Path... alternativePaths) { 306 setLocations(originPath, alternativePaths); 307 } 308 309 /** 310 * @param locations locations to look for the linked file 311 */ 312 public FileLink(final Collection<Path> locations) { 313 this.locations = locations.toArray(new Path[locations.size()]); 314 } 315 316 /** Returns the locations to look for the linked file. */ 317 public Path[] getLocations() { 318 return locations; 319 } 320 321 @Override 322 public String toString() { 323 StringBuilder str = new StringBuilder(getClass().getSimpleName()); 324 str.append(" locations=["); 325 for (int i = 0; i < locations.length; ++i) { 326 if (i > 0) str.append(", "); 327 str.append(locations[i].toString()); 328 } 329 str.append("]"); 330 return str.toString(); 331 } 332 333 /** Returns true if the file pointed by the link exists */ 334 public boolean exists(final FileSystem fs) throws IOException { 335 for (int i = 0; i < locations.length; ++i) { 336 if (fs.exists(locations[i])) { 337 return true; 338 } 339 } 340 return false; 341 } 342 343 /** Returns the path of the first available link. */ 344 public Path getAvailablePath(FileSystem fs) throws IOException { 345 for (int i = 0; i < locations.length; ++i) { 346 if (fs.exists(locations[i])) { 347 return locations[i]; 348 } 349 } 350 throw new FileNotFoundException(toString()); 351 } 352 353 /** 354 * Get the FileStatus of the referenced file. 355 * @param fs {@link FileSystem} on which to get the file status 356 * @return InputStream for the hfile link. 357 * @throws IOException on unexpected error. 358 */ 359 public FileStatus getFileStatus(FileSystem fs) throws IOException { 360 IOException exception = null; 361 for (int i = 0; i < locations.length; ++i) { 362 try { 363 return fs.getFileStatus(locations[i]); 364 } catch (FileNotFoundException | AccessControlException e) { 365 exception = handleAccessLocationException(this, e, exception); 366 } 367 } 368 throw exception; 369 } 370 371 /** 372 * Handle exceptions which are thrown when access locations of file link 373 * @param fileLink the file link 374 * @param newException the exception caught by access the current location 375 * @param previousException the previous exception caught by access the other locations 376 * @return return AccessControlException if access one of the locations caught, otherwise return 377 * FileNotFoundException. The AccessControlException is threw if user scan snapshot 378 * feature is enabled, see 379 * {@link org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclController}. 380 * @throws IOException if the exception is neither AccessControlException nor 381 * FileNotFoundException 382 */ 383 private static IOException handleAccessLocationException(FileLink fileLink, 384 IOException newException, IOException previousException) throws IOException { 385 if (newException instanceof RemoteException) { 386 newException = ((RemoteException) newException) 387 .unwrapRemoteException(FileNotFoundException.class, AccessControlException.class); 388 } 389 if (newException instanceof FileNotFoundException) { 390 // Try another file location 391 if (previousException == null) { 392 previousException = new FileNotFoundException(fileLink.toString()); 393 } 394 } else if (newException instanceof AccessControlException) { 395 // Try another file location 396 previousException = newException; 397 } else { 398 throw newException; 399 } 400 return previousException; 401 } 402 403 /** 404 * Open the FileLink for read. 405 * <p> 406 * It uses a wrapper of FSDataInputStream that is agnostic to the location of the file, even if 407 * the file switches between locations. 408 * @param fs {@link FileSystem} on which to open the FileLink 409 * @return InputStream for reading the file link. 410 * @throws IOException on unexpected error. 411 */ 412 public FSDataInputStream open(final FileSystem fs) throws IOException { 413 return new FSDataInputStream(new FileLinkInputStream(fs, this)); 414 } 415 416 /** 417 * Open the FileLink for read. 418 * <p> 419 * It uses a wrapper of FSDataInputStream that is agnostic to the location of the file, even if 420 * the file switches between locations. 421 * @param fs {@link FileSystem} on which to open the FileLink 422 * @param bufferSize the size of the buffer to be used. 423 * @return InputStream for reading the file link. 424 * @throws IOException on unexpected error. 425 */ 426 public FSDataInputStream open(final FileSystem fs, int bufferSize) throws IOException { 427 return new FSDataInputStream(new FileLinkInputStream(fs, this, bufferSize)); 428 } 429 430 /** 431 * If the passed FSDataInputStream is backed by a FileLink, returns the underlying InputStream for 432 * the resolved link target. Otherwise, returns null. 433 */ 434 public static FSDataInputStream getUnderlyingFileLinkInputStream(FSDataInputStream stream) { 435 if (stream.getWrappedStream() instanceof FileLinkInputStream) { 436 return ((FileLinkInputStream) stream.getWrappedStream()).getUnderlyingInputStream(); 437 } 438 return null; 439 } 440 441 /** 442 * NOTE: This method must be used only in the constructor! It creates a List with the specified 443 * locations for the link. 444 */ 445 protected void setLocations(Path originPath, Path... alternativePaths) { 446 assert this.locations == null : "Link locations already set"; 447 448 List<Path> paths = new ArrayList<>(alternativePaths.length + 1); 449 if (originPath != null) { 450 paths.add(originPath); 451 } 452 453 for (int i = 0; i < alternativePaths.length; i++) { 454 if (alternativePaths[i] != null) { 455 paths.add(alternativePaths[i]); 456 } 457 } 458 this.locations = paths.toArray(new Path[0]); 459 } 460 461 /** 462 * Get the directory to store the link back references 463 * <p> 464 * To simplify the reference count process, during the FileLink creation a back-reference is added 465 * to the back-reference directory of the specified file. 466 * @param storeDir Root directory for the link reference folder 467 * @param fileName File Name with links 468 * @return Path for the link back references. 469 */ 470 public static Path getBackReferencesDir(final Path storeDir, final String fileName) { 471 return new Path(storeDir, BACK_REFERENCES_DIRECTORY_PREFIX + fileName); 472 } 473 474 /** 475 * Get the referenced file name from the reference link directory path. 476 * @param dirPath Link references directory path 477 * @return Name of the file referenced 478 */ 479 public static String getBackReferenceFileName(final Path dirPath) { 480 return dirPath.getName().substring(BACK_REFERENCES_DIRECTORY_PREFIX.length()); 481 } 482 483 /** 484 * Checks if the specified directory path is a back reference links folder. 485 * @param dirPath Directory path to verify 486 * @return True if the specified directory is a link references folder 487 */ 488 public static boolean isBackReferencesDir(final Path dirPath) { 489 if (dirPath == null) { 490 return false; 491 } 492 return dirPath.getName().startsWith(BACK_REFERENCES_DIRECTORY_PREFIX); 493 } 494 495 @Override 496 public boolean equals(Object obj) { 497 if (obj == null) { 498 return false; 499 } 500 // Assumes that the ordering of locations between objects are the same. This is true for the 501 // current subclasses already (HFileLink, WALLink). Otherwise, we may have to sort the locations 502 // or keep them presorted 503 if (this.getClass().equals(obj.getClass())) { 504 return Arrays.equals(this.locations, ((FileLink) obj).locations); 505 } 506 507 return false; 508 } 509 510 @Override 511 public int hashCode() { 512 return Arrays.hashCode(locations); 513 } 514}