001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.io;
019
020import java.io.FileNotFoundException;
021import java.io.IOException;
022import java.io.InputStream;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.Collection;
026import java.util.List;
027import org.apache.hadoop.fs.CanSetDropBehind;
028import org.apache.hadoop.fs.CanSetReadahead;
029import org.apache.hadoop.fs.CanUnbuffer;
030import org.apache.hadoop.fs.FSDataInputStream;
031import org.apache.hadoop.fs.FileStatus;
032import org.apache.hadoop.fs.FileSystem;
033import org.apache.hadoop.fs.Path;
034import org.apache.hadoop.fs.PositionedReadable;
035import org.apache.hadoop.fs.Seekable;
036import org.apache.hadoop.hbase.util.CommonFSUtils;
037import org.apache.hadoop.ipc.RemoteException;
038import org.apache.hadoop.security.AccessControlException;
039import org.apache.yetus.audience.InterfaceAudience;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043/**
044 * The FileLink is a sort of hardlink, that allows access to a file given a set of locations.
045 * <p>
046 * <b>The Problem:</b>
047 * <ul>
048 * <li>HDFS doesn't have support for hardlinks, and this make impossible to referencing the same
049 * data blocks using different names.</li>
050 * <li>HBase store files in one location (e.g. table/region/family/) and when the file is not needed
051 * anymore (e.g. compaction, region deletion, ...) moves it to an archive directory.</li>
052 * </ul>
053 * If we want to create a reference to a file, we need to remember that it can be in its original
054 * location or in the archive folder. The FileLink class tries to abstract this concept and given a
055 * set of locations it is able to switch between them making this operation transparent for the
056 * user. {@link HFileLink} is a more concrete implementation of the {@code FileLink}.
057 * <p>
058 * <b>Back-references:</b> To help the {@link org.apache.hadoop.hbase.master.cleaner.CleanerChore}
059 * to keep track of the links to a particular file, during the {@code FileLink} creation, a new file
060 * is placed inside a back-reference directory. There's one back-reference directory for each file
061 * that has links, and in the directory there's one file per link.
062 * <p>
063 * HFileLink Example
064 * <ul>
065 * <li>/hbase/table/region-x/cf/file-k (Original File)</li>
066 * <li>/hbase/table-cloned/region-y/cf/file-k.region-x.table (HFileLink to the original file)</li>
067 * <li>/hbase/table-2nd-cloned/region-z/cf/file-k.region-x.table (HFileLink to the original file)
068 * </li>
069 * <li>/hbase/.archive/table/region-x/.links-file-k/region-y.table-cloned (Back-reference to the
070 * link in table-cloned)</li>
071 * <li>/hbase/.archive/table/region-x/.links-file-k/region-z.table-2nd-cloned (Back-reference to the
072 * link in table-2nd-cloned)</li>
073 * </ul>
074 */
075@InterfaceAudience.Private
076public class FileLink {
077  private static final Logger LOG = LoggerFactory.getLogger(FileLink.class);
078
079  /** Define the Back-reference directory name prefix: .links-&lt;hfile&gt;/ */
080  public static final String BACK_REFERENCES_DIRECTORY_PREFIX = ".links-";
081
082  /**
083   * FileLink InputStream that handles the switch between the original path and the alternative
084   * locations, when the file is moved.
085   */
086  protected static class FileLinkInputStream extends InputStream
087    implements Seekable, PositionedReadable, CanSetDropBehind, CanSetReadahead, CanUnbuffer {
088    private FSDataInputStream in = null;
089    private Path currentPath = null;
090    private long pos = 0;
091
092    private final FileLink fileLink;
093    private final int bufferSize;
094    private final FileSystem fs;
095
096    public FileLinkInputStream(final FileSystem fs, final FileLink fileLink) throws IOException {
097      this(fs, fileLink, CommonFSUtils.getDefaultBufferSize(fs));
098    }
099
100    public FileLinkInputStream(final FileSystem fs, final FileLink fileLink, int bufferSize)
101      throws IOException {
102      this.bufferSize = bufferSize;
103      this.fileLink = fileLink;
104      this.fs = fs;
105
106      this.in = tryOpen();
107    }
108
109    private FSDataInputStream getUnderlyingInputStream() {
110      return in;
111    }
112
113    @Override
114    public int read() throws IOException {
115      int res;
116      try {
117        res = in.read();
118      } catch (FileNotFoundException e) {
119        res = tryOpen().read();
120      }
121      if (res > 0) {
122        pos += 1;
123      }
124      return res;
125    }
126
127    @Override
128    public int read(byte[] b) throws IOException {
129      return read(b, 0, b.length);
130    }
131
132    @Override
133    public int read(byte[] b, int off, int len) throws IOException {
134      int n;
135      try {
136        n = in.read(b, off, len);
137      } catch (FileNotFoundException e) {
138        n = tryOpen().read(b, off, len);
139      }
140      if (n > 0) {
141        pos += n;
142      }
143      assert (in.getPos() == pos);
144      return n;
145    }
146
147    @Override
148    public int read(long position, byte[] buffer, int offset, int length) throws IOException {
149      int n;
150      try {
151        n = in.read(position, buffer, offset, length);
152      } catch (FileNotFoundException e) {
153        n = tryOpen().read(position, buffer, offset, length);
154      }
155      return n;
156    }
157
158    @Override
159    public void readFully(long position, byte[] buffer) throws IOException {
160      readFully(position, buffer, 0, buffer.length);
161    }
162
163    @Override
164    public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
165      try {
166        in.readFully(position, buffer, offset, length);
167      } catch (FileNotFoundException e) {
168        tryOpen().readFully(position, buffer, offset, length);
169      }
170    }
171
172    @Override
173    public long skip(long n) throws IOException {
174      long skipped;
175
176      try {
177        skipped = in.skip(n);
178      } catch (FileNotFoundException e) {
179        skipped = tryOpen().skip(n);
180      }
181
182      if (skipped > 0) {
183        pos += skipped;
184      }
185      return skipped;
186    }
187
188    @Override
189    public int available() throws IOException {
190      try {
191        return in.available();
192      } catch (FileNotFoundException e) {
193        return tryOpen().available();
194      }
195    }
196
197    @Override
198    public void seek(long pos) throws IOException {
199      try {
200        in.seek(pos);
201      } catch (FileNotFoundException e) {
202        tryOpen().seek(pos);
203      }
204      this.pos = pos;
205    }
206
207    @Override
208    public long getPos() throws IOException {
209      return pos;
210    }
211
212    @Override
213    public boolean seekToNewSource(long targetPos) throws IOException {
214      boolean res;
215      try {
216        res = in.seekToNewSource(targetPos);
217      } catch (FileNotFoundException e) {
218        res = tryOpen().seekToNewSource(targetPos);
219      }
220      if (res) pos = targetPos;
221      return res;
222    }
223
224    @Override
225    public void close() throws IOException {
226      in.close();
227    }
228
229    @Override
230    public synchronized void mark(int readlimit) {
231    }
232
233    @Override
234    public synchronized void reset() throws IOException {
235      throw new IOException("mark/reset not supported");
236    }
237
238    @Override
239    public boolean markSupported() {
240      return false;
241    }
242
243    @Override
244    public void unbuffer() {
245      if (in == null) {
246        return;
247      }
248      in.unbuffer();
249    }
250
251    /**
252     * Try to open the file from one of the available locations.
253     * @return FSDataInputStream stream of the opened file link
254     * @throws IOException on unexpected error, or file not found.
255     */
256    private FSDataInputStream tryOpen() throws IOException {
257      IOException exception = null;
258      for (Path path : fileLink.getLocations()) {
259        if (path.equals(currentPath)) continue;
260        try {
261          in = fs.open(path, bufferSize);
262          if (pos != 0) in.seek(pos);
263          assert (in.getPos() == pos) : "Link unable to seek to the right position=" + pos;
264          if (LOG.isTraceEnabled()) {
265            if (currentPath == null) {
266              LOG.debug("link open path=" + path);
267            } else {
268              LOG.trace("link switch from path=" + currentPath + " to path=" + path);
269            }
270          }
271          currentPath = path;
272          return (in);
273        } catch (FileNotFoundException | AccessControlException | RemoteException e) {
274          exception = FileLink.handleAccessLocationException(fileLink, e, exception);
275        }
276      }
277      throw exception;
278    }
279
280    @Override
281    public void setReadahead(Long readahead) throws IOException, UnsupportedOperationException {
282      in.setReadahead(readahead);
283    }
284
285    @Override
286    public void setDropBehind(Boolean dropCache) throws IOException, UnsupportedOperationException {
287      in.setDropBehind(dropCache);
288    }
289
290    public Path getCurrentPath() {
291      return currentPath;
292    }
293  }
294
295  private Path[] locations = null;
296
297  protected FileLink() {
298    this.locations = null;
299  }
300
301  /**
302   * @param originPath       Original location of the file to link
303   * @param alternativePaths Alternative locations to look for the linked file
304   */
305  public FileLink(Path originPath, Path... alternativePaths) {
306    setLocations(originPath, alternativePaths);
307  }
308
309  /**
310   * @param locations locations to look for the linked file
311   */
312  public FileLink(final Collection<Path> locations) {
313    this.locations = locations.toArray(new Path[locations.size()]);
314  }
315
316  /** Returns the locations to look for the linked file. */
317  public Path[] getLocations() {
318    return locations;
319  }
320
321  @Override
322  public String toString() {
323    StringBuilder str = new StringBuilder(getClass().getSimpleName());
324    str.append(" locations=[");
325    for (int i = 0; i < locations.length; ++i) {
326      if (i > 0) str.append(", ");
327      str.append(locations[i].toString());
328    }
329    str.append("]");
330    return str.toString();
331  }
332
333  /** Returns true if the file pointed by the link exists */
334  public boolean exists(final FileSystem fs) throws IOException {
335    for (int i = 0; i < locations.length; ++i) {
336      if (fs.exists(locations[i])) {
337        return true;
338      }
339    }
340    return false;
341  }
342
343  /** Returns the path of the first available link. */
344  public Path getAvailablePath(FileSystem fs) throws IOException {
345    for (int i = 0; i < locations.length; ++i) {
346      if (fs.exists(locations[i])) {
347        return locations[i];
348      }
349    }
350    throw new FileNotFoundException(toString());
351  }
352
353  /**
354   * Get the FileStatus of the referenced file.
355   * @param fs {@link FileSystem} on which to get the file status
356   * @return InputStream for the hfile link.
357   * @throws IOException on unexpected error.
358   */
359  public FileStatus getFileStatus(FileSystem fs) throws IOException {
360    IOException exception = null;
361    for (int i = 0; i < locations.length; ++i) {
362      try {
363        return fs.getFileStatus(locations[i]);
364      } catch (FileNotFoundException | AccessControlException e) {
365        exception = handleAccessLocationException(this, e, exception);
366      }
367    }
368    throw exception;
369  }
370
371  /**
372   * Handle exceptions which are thrown when access locations of file link
373   * @param fileLink          the file link
374   * @param newException      the exception caught by access the current location
375   * @param previousException the previous exception caught by access the other locations
376   * @return return AccessControlException if access one of the locations caught, otherwise return
377   *         FileNotFoundException. The AccessControlException is threw if user scan snapshot
378   *         feature is enabled, see
379   *         {@link org.apache.hadoop.hbase.security.access.SnapshotScannerHDFSAclController}.
380   * @throws IOException if the exception is neither AccessControlException nor
381   *                     FileNotFoundException
382   */
383  private static IOException handleAccessLocationException(FileLink fileLink,
384    IOException newException, IOException previousException) throws IOException {
385    if (newException instanceof RemoteException) {
386      newException = ((RemoteException) newException)
387        .unwrapRemoteException(FileNotFoundException.class, AccessControlException.class);
388    }
389    if (newException instanceof FileNotFoundException) {
390      // Try another file location
391      if (previousException == null) {
392        previousException = new FileNotFoundException(fileLink.toString());
393      }
394    } else if (newException instanceof AccessControlException) {
395      // Try another file location
396      previousException = newException;
397    } else {
398      throw newException;
399    }
400    return previousException;
401  }
402
403  /**
404   * Open the FileLink for read.
405   * <p>
406   * It uses a wrapper of FSDataInputStream that is agnostic to the location of the file, even if
407   * the file switches between locations.
408   * @param fs {@link FileSystem} on which to open the FileLink
409   * @return InputStream for reading the file link.
410   * @throws IOException on unexpected error.
411   */
412  public FSDataInputStream open(final FileSystem fs) throws IOException {
413    return new FSDataInputStream(new FileLinkInputStream(fs, this));
414  }
415
416  /**
417   * Open the FileLink for read.
418   * <p>
419   * It uses a wrapper of FSDataInputStream that is agnostic to the location of the file, even if
420   * the file switches between locations.
421   * @param fs         {@link FileSystem} on which to open the FileLink
422   * @param bufferSize the size of the buffer to be used.
423   * @return InputStream for reading the file link.
424   * @throws IOException on unexpected error.
425   */
426  public FSDataInputStream open(final FileSystem fs, int bufferSize) throws IOException {
427    return new FSDataInputStream(new FileLinkInputStream(fs, this, bufferSize));
428  }
429
430  /**
431   * If the passed FSDataInputStream is backed by a FileLink, returns the underlying InputStream for
432   * the resolved link target. Otherwise, returns null.
433   */
434  public static FSDataInputStream getUnderlyingFileLinkInputStream(FSDataInputStream stream) {
435    if (stream.getWrappedStream() instanceof FileLinkInputStream) {
436      return ((FileLinkInputStream) stream.getWrappedStream()).getUnderlyingInputStream();
437    }
438    return null;
439  }
440
441  /**
442   * NOTE: This method must be used only in the constructor! It creates a List with the specified
443   * locations for the link.
444   */
445  protected void setLocations(Path originPath, Path... alternativePaths) {
446    assert this.locations == null : "Link locations already set";
447
448    List<Path> paths = new ArrayList<>(alternativePaths.length + 1);
449    if (originPath != null) {
450      paths.add(originPath);
451    }
452
453    for (int i = 0; i < alternativePaths.length; i++) {
454      if (alternativePaths[i] != null) {
455        paths.add(alternativePaths[i]);
456      }
457    }
458    this.locations = paths.toArray(new Path[0]);
459  }
460
461  /**
462   * Get the directory to store the link back references
463   * <p>
464   * To simplify the reference count process, during the FileLink creation a back-reference is added
465   * to the back-reference directory of the specified file.
466   * @param storeDir Root directory for the link reference folder
467   * @param fileName File Name with links
468   * @return Path for the link back references.
469   */
470  public static Path getBackReferencesDir(final Path storeDir, final String fileName) {
471    return new Path(storeDir, BACK_REFERENCES_DIRECTORY_PREFIX + fileName);
472  }
473
474  /**
475   * Get the referenced file name from the reference link directory path.
476   * @param dirPath Link references directory path
477   * @return Name of the file referenced
478   */
479  public static String getBackReferenceFileName(final Path dirPath) {
480    return dirPath.getName().substring(BACK_REFERENCES_DIRECTORY_PREFIX.length());
481  }
482
483  /**
484   * Checks if the specified directory path is a back reference links folder.
485   * @param dirPath Directory path to verify
486   * @return True if the specified directory is a link references folder
487   */
488  public static boolean isBackReferencesDir(final Path dirPath) {
489    if (dirPath == null) {
490      return false;
491    }
492    return dirPath.getName().startsWith(BACK_REFERENCES_DIRECTORY_PREFIX);
493  }
494
495  @Override
496  public boolean equals(Object obj) {
497    if (obj == null) {
498      return false;
499    }
500    // Assumes that the ordering of locations between objects are the same. This is true for the
501    // current subclasses already (HFileLink, WALLink). Otherwise, we may have to sort the locations
502    // or keep them presorted
503    if (this.getClass().equals(obj.getClass())) {
504      return Arrays.equals(this.locations, ((FileLink) obj).locations);
505    }
506
507    return false;
508  }
509
510  @Override
511  public int hashCode() {
512    return Arrays.hashCode(locations);
513  }
514}