View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.io;
20  
21  import java.util.Collection;
22  
23  import java.io.IOException;
24  import java.io.InputStream;
25  import java.io.FileNotFoundException;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.fs.FSDataInputStream;
31  import org.apache.hadoop.fs.FileSystem;
32  import org.apache.hadoop.fs.FileStatus;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.fs.PositionedReadable;
35  import org.apache.hadoop.fs.Seekable;
36  import org.apache.hadoop.hbase.util.FSUtils;
37  
38  /**
39   * The FileLink is a sort of hardlink, that allows access to a file given a set of locations.
40   *
41   * <p><b>The Problem:</b>
42   * <ul>
43   *  <li>
44   *    HDFS doesn't have support for hardlinks, and this make impossible to referencing
45   *    the same data blocks using different names.
46   *  </li>
47   *  <li>
48   *    HBase store files in one location (e.g. table/region/family/) and when the file is not
49   *    needed anymore (e.g. compaction, region deletion, ...) moves it to an archive directory.
50   *  </li>
51   * </ul>
52   * If we want to create a reference to a file, we need to remember that it can be in its
53   * original location or in the archive folder.
54   * The FileLink class tries to abstract this concept and given a set of locations
55   * it is able to switch between them making this operation transparent for the user.
56   * {@link HFileLink} is a more concrete implementation of the {@code FileLink}.
57   *
58   * <p><b>Back-references:</b>
59   * To help the {@link org.apache.hadoop.hbase.master.cleaner.CleanerChore} to keep track of
60   * the links to a particular file, during the {@code FileLink} creation, a new file is placed
61   * inside a back-reference directory. There's one back-reference directory for each file that
62   * has links, and in the directory there's one file per link.
63   *
64   * <p>HFileLink Example
65   * <ul>
66   *  <li>
67   *      /hbase/table/region-x/cf/file-k
68   *      (Original File)
69   *  </li>
70   *  <li>
71   *      /hbase/table-cloned/region-y/cf/file-k.region-x.table
72   *     (HFileLink to the original file)
73   *  </li>
74   *  <li>
75   *      /hbase/table-2nd-cloned/region-z/cf/file-k.region-x.table
76   *      (HFileLink to the original file)
77   *  </li>
78   *  <li>
79   *      /hbase/.archive/table/region-x/.links-file-k/region-y.table-cloned
80   *      (Back-reference to the link in table-cloned)
81   *  </li>
82   *  <li>
83   *      /hbase/.archive/table/region-x/.links-file-k/region-z.table-2nd-cloned
84   *      (Back-reference to the link in table-2nd-cloned)
85   *  </li>
86   * </ul>
87   */
88  @InterfaceAudience.Private
89  public class FileLink {
90    private static final Log LOG = LogFactory.getLog(FileLink.class);
91  
92    /** Define the Back-reference directory name prefix: .links-<hfile>/ */
93    public static final String BACK_REFERENCES_DIRECTORY_PREFIX = ".links-";
94  
95    /**
96     * FileLink InputStream that handles the switch between the original path
97     * and the alternative locations, when the file is moved.
98     */
99    private static class FileLinkInputStream extends InputStream
100       implements Seekable, PositionedReadable {
101     private FSDataInputStream in = null;
102     private Path currentPath = null;
103     private long pos = 0;
104 
105     private final FileLink fileLink;
106     private final int bufferSize;
107     private final FileSystem fs;
108 
109     public FileLinkInputStream(final FileSystem fs, final FileLink fileLink)
110         throws IOException {
111       this(fs, fileLink, FSUtils.getDefaultBufferSize(fs));
112     }
113 
114     public FileLinkInputStream(final FileSystem fs, final FileLink fileLink, int bufferSize)
115         throws IOException {
116       this.bufferSize = bufferSize;
117       this.fileLink = fileLink;
118       this.fs = fs;
119 
120       this.in = tryOpen();
121     }
122 
123     @Override
124     public int read() throws IOException {
125       int res;
126       try {
127         res = in.read();
128       } catch (FileNotFoundException e) {
129         res = tryOpen().read();
130       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
131         res = tryOpen().read();
132       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
133         res = tryOpen().read();
134       }
135       if (res > 0) pos += 1;
136       return res;
137     }
138 
139     @Override
140     public int read(byte b[]) throws IOException {
141        return read(b, 0, b.length);
142     }
143 
144     @Override
145     public int read(byte b[], int off, int len) throws IOException {
146       int n;
147       try {
148         n = in.read(b, off, len);
149       } catch (FileNotFoundException e) {
150         n = tryOpen().read(b, off, len);
151       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
152         n = tryOpen().read(b, off, len);
153       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
154         n = tryOpen().read(b, off, len);
155       }
156       if (n > 0) pos += n;
157       assert(in.getPos() == pos);
158       return n;
159     }
160 
161     @Override
162     public int read(long position, byte[] buffer, int offset, int length) throws IOException {
163       int n;
164       try {
165         n = in.read(position, buffer, offset, length);
166       } catch (FileNotFoundException e) {
167         n = tryOpen().read(position, buffer, offset, length);
168       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
169         n = tryOpen().read(position, buffer, offset, length);
170       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
171         n = tryOpen().read(position, buffer, offset, length);
172       }
173       return n;
174     }
175 
176     @Override
177     public void readFully(long position, byte[] buffer) throws IOException {
178       readFully(position, buffer, 0, buffer.length);
179     }
180 
181     @Override
182     public void readFully(long position, byte[] buffer, int offset, int length) throws IOException {
183       try {
184         in.readFully(position, buffer, offset, length);
185       } catch (FileNotFoundException e) {
186         tryOpen().readFully(position, buffer, offset, length);
187       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
188         tryOpen().readFully(position, buffer, offset, length);
189       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
190         tryOpen().readFully(position, buffer, offset, length);
191       }
192     }
193 
194     @Override
195     public long skip(long n) throws IOException {
196       long skipped;
197 
198       try {
199         skipped = in.skip(n);
200       } catch (FileNotFoundException e) {
201         skipped = tryOpen().skip(n);
202       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
203         skipped = tryOpen().skip(n);
204       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
205         skipped = tryOpen().skip(n);
206       }
207 
208       if (skipped > 0) pos += skipped;
209       return skipped;
210     }
211 
212     @Override
213     public int available() throws IOException {
214       try {
215         return in.available();
216       } catch (FileNotFoundException e) {
217         return tryOpen().available();
218       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
219         return tryOpen().available();
220       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
221         return tryOpen().available();
222       }
223     }
224 
225     @Override
226     public void seek(long pos) throws IOException {
227       try {
228         in.seek(pos);
229       } catch (FileNotFoundException e) {
230         tryOpen().seek(pos);
231       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
232         tryOpen().seek(pos);
233       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
234         tryOpen().seek(pos);
235       }
236       this.pos = pos;
237     }
238 
239     @Override
240     public long getPos() throws IOException {
241       return pos;
242     }
243 
244     @Override
245     public boolean seekToNewSource(long targetPos) throws IOException {
246       boolean res;
247       try {
248         res = in.seekToNewSource(targetPos);
249       } catch (FileNotFoundException e) {
250         res = tryOpen().seekToNewSource(targetPos);
251       } catch (NullPointerException e) { // HDFS 1.x - DFSInputStream.getBlockAt()
252         res = tryOpen().seekToNewSource(targetPos);
253       } catch (AssertionError e) { // assert in HDFS 1.x - DFSInputStream.getBlockAt()
254         res = tryOpen().seekToNewSource(targetPos);
255       }
256       if (res) pos = targetPos;
257       return res;
258     }
259 
260     @Override
261     public void close() throws IOException {
262       in.close();
263     }
264 
265     @Override
266     public synchronized void mark(int readlimit) {
267     }
268 
269     @Override
270     public synchronized void reset() throws IOException {
271       throw new IOException("mark/reset not supported");
272     }
273 
274     @Override
275     public boolean markSupported() {
276       return false;
277     }
278 
279     /**
280      * Try to open the file from one of the available locations.
281      *
282      * @return FSDataInputStream stream of the opened file link
283      * @throws IOException on unexpected error, or file not found.
284      */
285     private FSDataInputStream tryOpen() throws IOException {
286       for (Path path: fileLink.getLocations()) {
287         if (path.equals(currentPath)) continue;
288         try {
289           in = fs.open(path, bufferSize);
290           if (pos != 0) in.seek(pos);
291           assert(in.getPos() == pos) : "Link unable to seek to the right position=" + pos;
292           if (LOG.isTraceEnabled()) {
293             if (currentPath != null) {
294               LOG.debug("link open path=" + path);
295             } else {
296               LOG.trace("link switch from path=" + currentPath + " to path=" + path);
297             }
298           }
299           currentPath = path;
300           return(in);
301         } catch (FileNotFoundException e) {
302           // Try another file location
303         }
304       }
305       throw new FileNotFoundException("Unable to open link: " + fileLink);
306     }
307   }
308 
309   private Path[] locations = null;
310 
311   protected FileLink() {
312     this.locations = null;
313   }
314 
315   /**
316    * @param originPath Original location of the file to link
317    * @param alternativePaths Alternative locations to look for the linked file
318    */
319   public FileLink(Path originPath, Path... alternativePaths) {
320     setLocations(originPath, alternativePaths);
321   }
322 
323   /**
324    * @param locations locations to look for the linked file
325    */
326   public FileLink(final Collection<Path> locations) {
327     this.locations = locations.toArray(new Path[locations.size()]);
328   }
329 
330   /**
331    * @return the locations to look for the linked file.
332    */
333   public Path[] getLocations() {
334     return locations;
335   }
336 
337   public String toString() {
338     StringBuilder str = new StringBuilder(getClass().getName());
339     str.append(" locations=[");
340     for (int i = 0; i < locations.length; ++i) {
341       if (i > 0) str.append(", ");
342       str.append(locations[i].toString());
343     }
344     str.append("]");
345     return str.toString();
346   }
347 
348   /**
349    * @return true if the file pointed by the link exists
350    */
351   public boolean exists(final FileSystem fs) throws IOException {
352     for (int i = 0; i < locations.length; ++i) {
353       if (fs.exists(locations[i])) {
354         return true;
355       }
356     }
357     return false;
358   }
359 
360   /**
361    * @return the path of the first available link.
362    */
363   public Path getAvailablePath(FileSystem fs) throws IOException {
364     for (int i = 0; i < locations.length; ++i) {
365       if (fs.exists(locations[i])) {
366         return locations[i];
367       }
368     }
369     throw new FileNotFoundException("Unable to open link: " + this);
370   }
371 
372   /**
373    * Get the FileStatus of the referenced file.
374    *
375    * @param fs {@link FileSystem} on which to get the file status
376    * @return InputStream for the hfile link.
377    * @throws IOException on unexpected error.
378    */
379   public FileStatus getFileStatus(FileSystem fs) throws IOException {
380     for (int i = 0; i < locations.length; ++i) {
381       try {
382         return fs.getFileStatus(locations[i]);
383       } catch (FileNotFoundException e) {
384         // Try another file location
385       }
386     }
387     throw new FileNotFoundException("Unable to open link: " + this);
388   }
389 
390   /**
391    * Open the FileLink for read.
392    * <p>
393    * It uses a wrapper of FSDataInputStream that is agnostic to the location
394    * of the file, even if the file switches between locations.
395    *
396    * @param fs {@link FileSystem} on which to open the FileLink
397    * @return InputStream for reading the file link.
398    * @throws IOException on unexpected error.
399    */
400   public FSDataInputStream open(final FileSystem fs) throws IOException {
401     return new FSDataInputStream(new FileLinkInputStream(fs, this));
402   }
403 
404   /**
405    * Open the FileLink for read.
406    * <p>
407    * It uses a wrapper of FSDataInputStream that is agnostic to the location
408    * of the file, even if the file switches between locations.
409    *
410    * @param fs {@link FileSystem} on which to open the FileLink
411    * @param bufferSize the size of the buffer to be used.
412    * @return InputStream for reading the file link.
413    * @throws IOException on unexpected error.
414    */
415   public FSDataInputStream open(final FileSystem fs, int bufferSize) throws IOException {
416     return new FSDataInputStream(new FileLinkInputStream(fs, this, bufferSize));
417   }
418 
419   /**
420    * NOTE: This method must be used only in the constructor!
421    * It creates a List with the specified locations for the link.
422    */
423   protected void setLocations(Path originPath, Path... alternativePaths) {
424     assert this.locations == null : "Link locations already set";
425     this.locations = new Path[1 + alternativePaths.length];
426     this.locations[0] = originPath;
427     System.arraycopy(alternativePaths, 0, this.locations, 1, alternativePaths.length);
428   }
429 
430   /**
431    * Get the directory to store the link back references
432    *
433    * <p>To simplify the reference count process, during the FileLink creation
434    * a back-reference is added to the back-reference directory of the specified file.
435    *
436    * @param storeDir Root directory for the link reference folder
437    * @param fileName File Name with links
438    * @return Path for the link back references.
439    */
440   public static Path getBackReferencesDir(final Path storeDir, final String fileName) {
441     return new Path(storeDir, BACK_REFERENCES_DIRECTORY_PREFIX + fileName);
442   }
443 
444   /**
445    * Get the referenced file name from the reference link directory path.
446    *
447    * @param dirPath Link references directory path
448    * @return Name of the file referenced
449    */
450   public static String getBackReferenceFileName(final Path dirPath) {
451     return dirPath.getName().substring(BACK_REFERENCES_DIRECTORY_PREFIX.length());
452   }
453 
454   /**
455    * Checks if the specified directory path is a back reference links folder.
456    *
457    * @param dirPath Directory path to verify
458    * @return True if the specified directory is a link references folder
459    */
460   public static boolean isBackReferencesDir(final Path dirPath) {
461     if (dirPath == null) return false;
462     return dirPath.getName().startsWith(BACK_REFERENCES_DIRECTORY_PREFIX);
463   }
464 }
465