View Javadoc

1   /*
2    * Copyright The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.fs;
22  
23  import java.io.IOException;
24  import java.net.URI;
25  
26  import org.apache.hadoop.conf.Configuration;
27  import org.apache.hadoop.fs.FileSystem;
28  import org.apache.hadoop.fs.FilterFileSystem;
29  import org.apache.hadoop.fs.FSDataInputStream;
30  import org.apache.hadoop.fs.FSDataOutputStream;
31  import org.apache.hadoop.fs.LocalFileSystem;
32  import org.apache.hadoop.fs.Path;
33  import org.apache.hadoop.hbase.util.Methods;
34  import org.apache.hadoop.util.ReflectionUtils;
35  import org.apache.hadoop.util.Progressable;
36  
37  /**
38   * An encapsulation for the FileSystem object that hbase uses to access
39   * data. This class allows the flexibility of using  
40   * separate filesystem objects for reading and writing hfiles and hlogs.
41   * In future, if we want to make hlogs be in a different filesystem,
42   * this is the place to make it happen.
43   */
44  public class HFileSystem extends FilterFileSystem {
45  
46    private final FileSystem noChecksumFs;   // read hfile data from storage
47    private final boolean useHBaseChecksum;
48  
49    /**
50     * Create a FileSystem object for HBase regionservers.
51     * @param conf The configuration to be used for the filesystem
52     * @param useHBaseChecksums if true, then use
53     *        checksum verfication in hbase, otherwise
54     *        delegate checksum verification to the FileSystem.
55     */
56    public HFileSystem(Configuration conf, boolean useHBaseChecksum)
57      throws IOException {
58  
59      // Create the default filesystem with checksum verification switched on.
60      // By default, any operation to this FilterFileSystem occurs on
61      // the underlying filesystem that has checksums switched on.
62      this.fs = FileSystem.get(conf);
63      this.useHBaseChecksum = useHBaseChecksum;
64      
65      fs.initialize(getDefaultUri(conf), conf);
66  
67      // If hbase checksum verification is switched on, then create a new
68      // filesystem object that has cksum verification turned off.
69      // We will avoid verifying checksums in the fs client, instead do it
70      // inside of hbase.
71      // If this is the local file system hadoop has a bug where seeks
72      // do not go to the correct location if setVerifyChecksum(false) is called.
73      // This manifests itself in that incorrect data is read and HFileBlocks won't be able to read
74      // their header magic numbers. See HBASE-5885
75      if (useHBaseChecksum && !(fs instanceof LocalFileSystem)) {
76        conf = new Configuration(conf);
77        conf.setBoolean("dfs.client.read.shortcircuit.skip.checksum", true);
78        this.noChecksumFs = newInstanceFileSystem(conf);
79        this.noChecksumFs.setVerifyChecksum(false);
80      } else {
81        this.noChecksumFs = fs;
82      }
83    }
84  
85    /**
86     * Wrap a FileSystem object within a HFileSystem. The noChecksumFs and
87     * writefs are both set to be the same specified fs. 
88     * Do not verify hbase-checksums while reading data from filesystem.
89     * @param fs Set the noChecksumFs and writeFs to this specified filesystem.
90     */
91    public HFileSystem(FileSystem fs) {
92      this.fs = fs;
93      this.noChecksumFs = fs;
94      this.useHBaseChecksum = false;
95    }
96  
97    /**
98     * Returns the filesystem that is specially setup for 
99     * doing reads from storage. This object avoids doing 
100    * checksum verifications for reads.
101    * @return The FileSystem object that can be used to read data
102    *         from files.
103    */
104   public FileSystem getNoChecksumFs() {
105     return noChecksumFs;
106   }
107 
108   /**
109    * Returns the underlying filesystem
110    * @return The underlying FileSystem for this FilterFileSystem object.
111    */
112   public FileSystem getBackingFs() throws IOException {
113     return fs;
114   }
115 
116   /**
117    * Are we verifying checksums in HBase?
118    * @return True, if hbase is configured to verify checksums,
119    *         otherwise false.
120    */
121   public boolean useHBaseChecksum() {
122     return useHBaseChecksum;
123   }
124 
125   /**
126    * Close this filesystem object
127    */
128   @Override
129   public void close() throws IOException {
130     super.close();
131     if (this.noChecksumFs != fs) {
132       this.noChecksumFs.close();
133     }
134   }
135 
136  /**
137    * Returns a brand new instance of the FileSystem. It does not use
138    * the FileSystem.Cache. In newer versions of HDFS, we can directly
139    * invoke FileSystem.newInstance(Configuration).
140    * 
141    * @param conf Configuration
142    * @return A new instance of the filesystem
143    */
144   private static FileSystem newInstanceFileSystem(Configuration conf)
145     throws IOException {
146     URI uri = FileSystem.getDefaultUri(conf);
147     FileSystem fs = null;
148     Class<?> clazz = conf.getClass("fs." + uri.getScheme() + ".impl", null);
149     if (clazz != null) {
150       // This will be true for Hadoop 1.0, or 0.20.
151       fs = (FileSystem)ReflectionUtils.newInstance(clazz, conf);
152       fs.initialize(uri, conf);
153     } else {
154       // For Hadoop 2.0, we have to go through FileSystem for the filesystem
155       // implementation to be loaded by the service loader in case it has not
156       // been loaded yet.
157       Configuration clone = new Configuration(conf);
158       clone.setBoolean("fs." + uri.getScheme() + ".impl.disable.cache", true);
159       fs = FileSystem.get(uri, clone);
160     }
161     if (fs == null) {
162       throw new IOException("No FileSystem for scheme: " + uri.getScheme());
163     }
164     return fs;
165   }
166 
167   /**
168    * Create a new HFileSystem object, similar to FileSystem.get().
169    * This returns a filesystem object that avoids checksum
170    * verification in the filesystem for hfileblock-reads.
171    * For these blocks, checksum verification is done by HBase.
172    */
173   static public FileSystem get(Configuration conf) throws IOException {
174     return new HFileSystem(conf, true);
175   }
176 
177   /**
178    * Wrap a LocalFileSystem within a HFileSystem.
179    */
180   static public FileSystem getLocalFs(Configuration conf) throws IOException {
181     return new HFileSystem(FileSystem.getLocal(conf));
182   }
183 
184   /**
185    * The org.apache.hadoop.fs.FilterFileSystem does not yet support 
186    * createNonRecursive. This is a hadoop bug and when it is fixed in Hadoop,
187    * this definition will go away.
188    */
189   public FSDataOutputStream createNonRecursive(Path f,
190       boolean overwrite,
191       int bufferSize, short replication, long blockSize,
192       Progressable progress) throws IOException {
193     return fs.createNonRecursive(f, overwrite, bufferSize, replication,
194                                  blockSize, progress);
195   }
196 }