1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.util;
19
20 import java.io.IOException;
21 import java.util.ArrayList;
22 import java.util.Arrays;
23 import java.util.Collections;
24 import java.util.HashSet;
25 import java.util.List;
26 import java.util.Set;
27 import java.util.concurrent.Callable;
28 import java.util.concurrent.ConcurrentHashMap;
29 import java.util.concurrent.ExecutionException;
30 import java.util.concurrent.ExecutorService;
31 import java.util.concurrent.Executors;
32 import java.util.concurrent.Future;
33
34 import org.apache.commons.cli.CommandLine;
35 import org.apache.commons.cli.CommandLineParser;
36 import org.apache.commons.cli.GnuParser;
37 import org.apache.commons.cli.HelpFormatter;
38 import org.apache.commons.cli.Option;
39 import org.apache.commons.cli.Options;
40 import org.apache.commons.cli.ParseException;
41 import org.apache.commons.logging.Log;
42 import org.apache.commons.logging.LogFactory;
43 import org.apache.hadoop.conf.Configured;
44 import org.apache.hadoop.fs.FSDataInputStream;
45 import org.apache.hadoop.fs.FileStatus;
46 import org.apache.hadoop.fs.FileSystem;
47 import org.apache.hadoop.fs.Path;
48 import org.apache.hadoop.hbase.HBaseConfiguration;
49 import org.apache.hadoop.hbase.HConstants;
50 import org.apache.hadoop.hbase.NamespaceDescriptor;
51 import org.apache.hadoop.hbase.io.FileLink;
52 import org.apache.hadoop.hbase.io.HFileLink;
53 import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
54 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
55 import org.apache.hadoop.util.Tool;
56 import org.apache.hadoop.util.ToolRunner;
57
58
59
60
61
62
63
64
65
66
67
68
69
70 public class HFileV1Detector extends Configured implements Tool {
71 private FileSystem fs;
72 private static final Log LOG = LogFactory.getLog(HFileV1Detector.class);
73 private static final int DEFAULT_NUM_OF_THREADS = 10;
74
75
76
77 private static final String PRE_NS_DOT_ARCHIVE = ".archive";
78
79
80
81 private static final String PRE_NS_DOT_TMP = ".tmp";
82 private int numOfThreads;
83
84
85
86 private Path targetDirPath;
87
88
89
90 private ExecutorService exec;
91
92
93
94
95 private final Set<Path> processedTables = new HashSet<Path>();
96
97
98
99 private final Set<Path> corruptedHFiles = Collections
100 .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
101
102
103
104 private final Set<Path> hFileV1Set = Collections
105 .newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
106
107 private Options options = new Options();
108
109
110
111 private Path defaultNamespace;
112
113 public HFileV1Detector() {
114 Option pathOption = new Option("p", "path", true, "Path to a table, or hbase installation");
115 pathOption.setRequired(false);
116 options.addOption(pathOption);
117 Option threadOption = new Option("n", "numberOfThreads", true,
118 "Number of threads to use while processing HFiles.");
119 threadOption.setRequired(false);
120 options.addOption(threadOption);
121 options.addOption("h", "help", false, "Help");
122 }
123
124 private boolean parseOption(String[] args) throws ParseException, IOException {
125 if (args.length == 0) {
126 return true;
127 }
128 CommandLineParser parser = new GnuParser();
129 CommandLine cmd = parser.parse(options, args);
130 if (cmd.hasOption("h")) {
131 HelpFormatter formatter = new HelpFormatter();
132 formatter.printHelp("HFileV1Detector", options, true);
133 System.out
134 .println("In case no option is provided, it processes hbase.rootdir using 10 threads.");
135 System.out.println("Example:");
136 System.out.println(" To detect any HFileV1 in a given hbase installation '/myhbase':");
137 System.out.println(" $ $HBASE_HOME/bin/hbase " + this.getClass().getName() + " -p /myhbase");
138 System.out.println();
139 return false;
140 }
141
142 if (cmd.hasOption("p")) {
143 this.targetDirPath = new Path(FSUtils.getRootDir(getConf()), cmd.getOptionValue("p"));
144 }
145 try {
146 if (cmd.hasOption("n")) {
147 int n = Integer.parseInt(cmd.getOptionValue("n"));
148 if (n < 0 || n > 100) {
149 LOG.warn("Please use a positive number <= 100 for number of threads."
150 + " Continuing with default value " + DEFAULT_NUM_OF_THREADS);
151 return true;
152 }
153 this.numOfThreads = n;
154 }
155 } catch (NumberFormatException nfe) {
156 LOG.error("Please select a valid number for threads");
157 return false;
158 }
159 return true;
160 }
161
162
163
164
165
166
167
168
169 @Override
170 public int run(String args[]) throws IOException, ParseException {
171 FSUtils.setFsDefault(getConf(), new Path(FSUtils.getRootDir(getConf()).toUri()));
172 fs = FileSystem.get(getConf());
173 numOfThreads = DEFAULT_NUM_OF_THREADS;
174 targetDirPath = FSUtils.getRootDir(getConf());
175 if (!parseOption(args)) {
176 System.exit(-1);
177 }
178 this.exec = Executors.newFixedThreadPool(numOfThreads);
179 try {
180 return processResult(checkForV1Files(targetDirPath));
181 } catch (Exception e) {
182 LOG.error(e);
183 } finally {
184 exec.shutdown();
185 fs.close();
186 }
187 return -1;
188 }
189
190 private void setDefaultNamespaceDir() throws IOException {
191 Path dataDir = new Path(FSUtils.getRootDir(getConf()), HConstants.BASE_NAMESPACE_DIR);
192 defaultNamespace = new Path(dataDir, NamespaceDescriptor.DEFAULT_NAMESPACE_NAME_STR);
193 }
194
195 private int processResult(Set<Path> regionsWithHFileV1) {
196 LOG.info("Result: \n");
197 printSet(processedTables, "Tables Processed: ");
198
199 int count = hFileV1Set.size();
200 LOG.info("Count of HFileV1: " + count);
201 if (count > 0) printSet(hFileV1Set, "HFileV1:");
202
203 count = corruptedHFiles.size();
204 LOG.info("Count of corrupted files: " + count);
205 if (count > 0) printSet(corruptedHFiles, "Corrupted Files: ");
206
207 count = regionsWithHFileV1.size();
208 LOG.info("Count of Regions with HFileV1: " + count);
209 if (count > 0) printSet(regionsWithHFileV1, "Regions to Major Compact: ");
210
211 return (hFileV1Set.isEmpty() && corruptedHFiles.isEmpty()) ? 0 : 1;
212 }
213
214 private void printSet(Set<Path> result, String msg) {
215 LOG.info(msg);
216 for (Path p : result) {
217 LOG.info(p);
218 }
219 }
220
221
222
223
224
225
226
227 private Set<Path> checkForV1Files(Path targetDir) throws IOException {
228 LOG.info("Target dir is: " + targetDir);
229 if (!fs.exists(targetDir)) {
230 throw new IOException("The given path does not exist: " + targetDir);
231 }
232 if (isTableDir(fs, targetDir)) {
233 processedTables.add(targetDir);
234 return processTable(targetDir);
235 }
236 Set<Path> regionsWithHFileV1 = new HashSet<Path>();
237 FileStatus[] fsStats = fs.listStatus(targetDir);
238 for (FileStatus fsStat : fsStats) {
239 if (isTableDir(fs, fsStat.getPath()) && !isRootTable(fsStat.getPath())) {
240 processedTables.add(fsStat.getPath());
241
242 regionsWithHFileV1.addAll(processTable(fsStat.getPath()));
243 } else {
244 LOG.info("Ignoring path: " + fsStat.getPath());
245 }
246 }
247 return regionsWithHFileV1;
248 }
249
250
251
252
253
254 private boolean isRootTable(Path path) {
255 if (path != null && path.toString().endsWith("-ROOT-")) return true;
256 return false;
257 }
258
259
260
261
262
263
264
265 private Set<Path> processTable(Path tableDir) throws IOException {
266
267 LOG.debug("processing table: " + tableDir);
268 List<Future<Path>> regionLevelResults = new ArrayList<Future<Path>>();
269 Set<Path> regionsWithHFileV1 = new HashSet<Path>();
270
271 FileStatus[] fsStats = fs.listStatus(tableDir);
272 for (FileStatus fsStat : fsStats) {
273
274 if (isRegionDir(fs, fsStat.getPath())) {
275 regionLevelResults.add(processRegion(fsStat.getPath()));
276 }
277 }
278 for (Future<Path> f : regionLevelResults) {
279 try {
280 if (f.get() != null) {
281 regionsWithHFileV1.add(f.get());
282 }
283 } catch (InterruptedException e) {
284 LOG.error(e);
285 } catch (ExecutionException e) {
286 LOG.error(e);
287 }
288 }
289 return regionsWithHFileV1;
290 }
291
292
293
294
295
296
297
298 private Future<Path> processRegion(final Path regionDir) {
299 LOG.debug("processing region: " + regionDir);
300 Callable<Path> regionCallable = new Callable<Path>() {
301 @Override
302 public Path call() throws Exception {
303 for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
304 FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir);
305 if (storeFiles == null || storeFiles.length == 0) continue;
306 for (FileStatus storeFile : storeFiles) {
307 Path storeFilePath = storeFile.getPath();
308 FSDataInputStream fsdis = null;
309 long lenToRead = 0;
310 try {
311
312 if (StoreFileInfo.isReference(storeFilePath)) continue;
313
314 else if (HFileLink.isHFileLink(storeFilePath)) {
315 FileLink fLink = getFileLinkWithPreNSPath(storeFilePath);
316 fsdis = fLink.open(fs);
317 lenToRead = fLink.getFileStatus(fs).getLen();
318 } else {
319
320 fsdis = fs.open(storeFilePath);
321 lenToRead = storeFile.getLen();
322 }
323 int majorVersion = computeMajorVersion(fsdis, lenToRead);
324 if (majorVersion == 1) {
325 hFileV1Set.add(storeFilePath);
326
327 return regionDir;
328 }
329 if (majorVersion > 2 || majorVersion < 1) throw new IllegalArgumentException(
330 "Incorrect major version: " + majorVersion);
331 } catch (Exception iae) {
332 corruptedHFiles.add(storeFilePath);
333 LOG.error("Got exception while reading trailer for file: "+ storeFilePath, iae);
334 } finally {
335 if (fsdis != null) fsdis.close();
336 }
337 }
338 }
339 return null;
340 }
341
342 private int computeMajorVersion(FSDataInputStream istream, long fileSize)
343 throws IOException {
344
345 long seekPoint = fileSize - Bytes.SIZEOF_INT;
346 if (seekPoint < 0)
347 throw new IllegalArgumentException("File too small, no major version found");
348
349
350 istream.seek(seekPoint);
351 int version = istream.readInt();
352
353 return version & 0x00ffffff;
354 }
355 };
356 Future<Path> f = exec.submit(regionCallable);
357 return f;
358 }
359
360
361
362
363
364
365
366
367 public FileLink getFileLinkWithPreNSPath(Path storeFilePath) throws IOException {
368 HFileLink link = HFileLink.buildFromHFileLinkPattern(getConf(), storeFilePath);
369 List<Path> pathsToProcess = getPreNSPathsForHFileLink(link);
370 pathsToProcess.addAll(Arrays.asList(link.getLocations()));
371 return new FileLink(pathsToProcess);
372 }
373
374 private List<Path> getPreNSPathsForHFileLink(HFileLink fileLink) throws IOException {
375 if (defaultNamespace == null) setDefaultNamespaceDir();
376 List<Path> p = new ArrayList<Path>();
377 String relativeTablePath = removeDefaultNSPath(fileLink.getOriginPath());
378 p.add(getPreNSPath(PRE_NS_DOT_ARCHIVE, relativeTablePath));
379 p.add(getPreNSPath(PRE_NS_DOT_TMP, relativeTablePath));
380 p.add(getPreNSPath(null, relativeTablePath));
381 return p;
382 }
383
384
385
386
387
388 private String removeDefaultNSPath(Path originalPath) {
389 String pathStr = originalPath.toString();
390 if (!pathStr.startsWith(defaultNamespace.toString())) return pathStr;
391 return pathStr.substring(defaultNamespace.toString().length() + 1);
392 }
393
394 private Path getPreNSPath(String prefix, String relativeTablePath) throws IOException {
395 String relativePath = (prefix == null ? relativeTablePath : prefix + Path.SEPARATOR
396 + relativeTablePath);
397 return new Path(FSUtils.getRootDir(getConf()), relativePath);
398 }
399
400 private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
401
402
403 if (fs.isFile(path)) return false;
404 return (FSTableDescriptors.getTableInfoPath(fs, path) != null || FSTableDescriptors
405 .getCurrentTableInfoStatus(fs, path, false) != null) || path.toString().endsWith(".META.");
406 }
407
408 private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
409 if (fs.isFile(path)) return false;
410 Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
411 return fs.exists(regionInfo);
412
413 }
414
415 public static void main(String args[]) throws Exception {
416 System.exit(ToolRunner.run(HBaseConfiguration.create(), new HFileV1Detector(), args));
417 }
418
419 }