1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.apache.hadoop.hbase.io.hfile;
20
21 import java.io.ByteArrayOutputStream;
22 import java.io.DataInput;
23 import java.io.IOException;
24 import java.io.PrintStream;
25 import java.util.ArrayList;
26 import java.util.List;
27 import java.util.Locale;
28 import java.util.Map;
29 import java.util.SortedMap;
30
31 import org.apache.commons.cli.CommandLine;
32 import org.apache.commons.cli.CommandLineParser;
33 import org.apache.commons.cli.HelpFormatter;
34 import org.apache.commons.cli.Option;
35 import org.apache.commons.cli.OptionGroup;
36 import org.apache.commons.cli.Options;
37 import org.apache.commons.cli.ParseException;
38 import org.apache.commons.cli.PosixParser;
39 import org.apache.commons.logging.Log;
40 import org.apache.commons.logging.LogFactory;
41 import org.apache.hadoop.hbase.classification.InterfaceAudience;
42 import org.apache.hadoop.hbase.classification.InterfaceStability;
43 import org.apache.hadoop.conf.Configuration;
44 import org.apache.hadoop.conf.Configured;
45 import org.apache.hadoop.fs.FileSystem;
46 import org.apache.hadoop.fs.Path;
47 import org.apache.hadoop.hbase.Cell;
48 import org.apache.hadoop.hbase.CellComparator;
49 import org.apache.hadoop.hbase.CellUtil;
50 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
51 import org.apache.hadoop.hbase.HConstants;
52 import org.apache.hadoop.hbase.TableName;
53 import org.apache.hadoop.hbase.HBaseConfiguration;
54 import org.apache.hadoop.hbase.HRegionInfo;
55 import org.apache.hadoop.hbase.KeyValue;
56 import org.apache.hadoop.hbase.KeyValueUtil;
57 import org.apache.hadoop.hbase.Tag;
58 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
59 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
60 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
61 import org.apache.hadoop.hbase.util.BloomFilter;
62 import org.apache.hadoop.hbase.util.BloomFilterFactory;
63 import org.apache.hadoop.hbase.util.ByteBloomFilter;
64 import org.apache.hadoop.hbase.util.Bytes;
65 import org.apache.hadoop.hbase.util.FSUtils;
66 import org.apache.hadoop.hbase.util.Writables;
67 import org.apache.hadoop.util.Tool;
68 import org.apache.hadoop.util.ToolRunner;
69
70 import com.yammer.metrics.core.Histogram;
71 import com.yammer.metrics.core.Metric;
72 import com.yammer.metrics.core.MetricName;
73 import com.yammer.metrics.core.MetricPredicate;
74 import com.yammer.metrics.core.MetricsRegistry;
75 import com.yammer.metrics.reporting.ConsoleReporter;
76
77
78
79
80 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
81 @InterfaceStability.Evolving
82 public class HFilePrettyPrinter extends Configured implements Tool {
83
84 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
85
86 private Options options = new Options();
87
88 private boolean verbose;
89 private boolean printValue;
90 private boolean printKey;
91 private boolean shouldPrintMeta;
92 private boolean printBlockIndex;
93 private boolean printBlockHeaders;
94 private boolean printStats;
95 private boolean checkRow;
96 private boolean checkFamily;
97 private boolean isSeekToRow = false;
98
99 private PrintStream out = System.out;
100 private PrintStream err = System.err;
101
102
103
104
105 private byte[] row = null;
106
107 private List<Path> files = new ArrayList<Path>();
108 private int count;
109
110 private static final String FOUR_SPACES = " ";
111
112 public HFilePrettyPrinter() {
113 super();
114 init();
115 }
116
117 public HFilePrettyPrinter(Configuration conf) {
118 super(conf);
119 init();
120 }
121
122 private void init() {
123 options.addOption("v", "verbose", false,
124 "Verbose output; emits file and meta data delimiters");
125 options.addOption("p", "printkv", false, "Print key/value pairs");
126 options.addOption("e", "printkey", false, "Print keys");
127 options.addOption("m", "printmeta", false, "Print meta data of file");
128 options.addOption("b", "printblocks", false, "Print block index meta data");
129 options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
130 options.addOption("k", "checkrow", false,
131 "Enable row order check; looks for out-of-order keys");
132 options.addOption("a", "checkfamily", false, "Enable family check");
133 options.addOption("w", "seekToRow", true,
134 "Seek to this row and print all the kvs for this row only");
135 options.addOption("s", "stats", false, "Print statistics");
136
137 OptionGroup files = new OptionGroup();
138 files.addOption(new Option("f", "file", true,
139 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
140 files.addOption(new Option("r", "region", true,
141 "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
142 options.addOptionGroup(files);
143 }
144
145 public void setPrintStreams(PrintStream out, PrintStream err) {
146 this.out = out;
147 this.err = err;
148 }
149
150 public boolean parseOptions(String args[]) throws ParseException,
151 IOException {
152 if (args.length == 0) {
153 HelpFormatter formatter = new HelpFormatter();
154 formatter.printHelp("HFile", options, true);
155 return false;
156 }
157 CommandLineParser parser = new PosixParser();
158 CommandLine cmd = parser.parse(options, args);
159
160 verbose = cmd.hasOption("v");
161 printValue = cmd.hasOption("p");
162 printKey = cmd.hasOption("e") || printValue;
163 shouldPrintMeta = cmd.hasOption("m");
164 printBlockIndex = cmd.hasOption("b");
165 printBlockHeaders = cmd.hasOption("h");
166 printStats = cmd.hasOption("s");
167 checkRow = cmd.hasOption("k");
168 checkFamily = cmd.hasOption("a");
169
170 if (cmd.hasOption("f")) {
171 files.add(new Path(cmd.getOptionValue("f")));
172 }
173
174 if (cmd.hasOption("w")) {
175 String key = cmd.getOptionValue("w");
176 if (key != null && key.length() != 0) {
177 row = Bytes.toBytesBinary(key);
178 isSeekToRow = true;
179 } else {
180 err.println("Invalid row is specified.");
181 System.exit(-1);
182 }
183 }
184
185 if (cmd.hasOption("r")) {
186 String regionName = cmd.getOptionValue("r");
187 byte[] rn = Bytes.toBytes(regionName);
188 byte[][] hri = HRegionInfo.parseRegionName(rn);
189 Path rootDir = FSUtils.getRootDir(getConf());
190 Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
191 String enc = HRegionInfo.encodeRegionName(rn);
192 Path regionDir = new Path(tableDir, enc);
193 if (verbose)
194 out.println("region dir -> " + regionDir);
195 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
196 regionDir);
197 if (verbose)
198 out.println("Number of region files found -> "
199 + regionFiles.size());
200 if (verbose) {
201 int i = 1;
202 for (Path p : regionFiles) {
203 if (verbose)
204 out.println("Found file[" + i++ + "] -> " + p);
205 }
206 }
207 files.addAll(regionFiles);
208 }
209
210 return true;
211 }
212
213
214
215
216
217 @Override
218 public int run(String[] args) {
219 if (getConf() == null) {
220 throw new RuntimeException("A Configuration instance must be provided.");
221 }
222 try {
223 FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
224 if (!parseOptions(args))
225 return 1;
226 } catch (IOException ex) {
227 LOG.error("Error parsing command-line options", ex);
228 return 1;
229 } catch (ParseException ex) {
230 LOG.error("Error parsing command-line options", ex);
231 return 1;
232 }
233
234
235 for (Path fileName : files) {
236 try {
237 int exitCode = processFile(fileName);
238 if (exitCode != 0) {
239 return exitCode;
240 }
241 } catch (IOException ex) {
242 LOG.error("Error reading " + fileName, ex);
243 return -2;
244 }
245 }
246
247 if (verbose || printKey) {
248 out.println("Scanned kv count -> " + count);
249 }
250
251 return 0;
252 }
253
254 public int processFile(Path file) throws IOException {
255 if (verbose)
256 out.println("Scanning -> " + file);
257
258 Path rootPath = FSUtils.getRootDir(getConf());
259 String rootString = rootPath + rootPath.SEPARATOR;
260 if (!file.toString().startsWith(rootString)) {
261
262
263
264 FileSystem rootFS = rootPath.getFileSystem(getConf());
265 String qualifiedFile = rootFS.getUri().toString() + file.toString();
266 if (!qualifiedFile.startsWith(rootString)) {
267 err.println("ERROR, file (" + file +
268 ") is not in HBase's root directory (" + rootString + ")");
269 return -2;
270 }
271 }
272
273 FileSystem fs = file.getFileSystem(getConf());
274 if (!fs.exists(file)) {
275 err.println("ERROR, file doesnt exist: " + file);
276 return -2;
277 }
278
279 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(getConf()), getConf());
280
281 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
282
283 KeyValueStatsCollector fileStats = null;
284
285 if (verbose || printKey || checkRow || checkFamily || printStats) {
286
287 HFileScanner scanner = reader.getScanner(false, false, false);
288 fileStats = new KeyValueStatsCollector();
289 boolean shouldScanKeysValues = false;
290 if (this.isSeekToRow) {
291
292 shouldScanKeysValues =
293 (scanner.seekTo(KeyValueUtil.createFirstOnRow(this.row).getKey()) != -1);
294 } else {
295 shouldScanKeysValues = scanner.seekTo();
296 }
297 if (shouldScanKeysValues)
298 scanKeysValues(file, fileStats, scanner, row);
299 }
300
301
302 if (shouldPrintMeta) {
303 printMeta(reader, fileInfo);
304 }
305
306 if (printBlockIndex) {
307 out.println("Block Index:");
308 out.println(reader.getDataBlockIndexReader());
309 }
310
311 if (printBlockHeaders) {
312 out.println("Block Headers:");
313
314
315
316
317 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
318 long fileSize = fs.getFileStatus(file).getLen();
319 FixedFileTrailer trailer =
320 FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
321 long offset = trailer.getFirstDataBlockOffset(),
322 max = trailer.getLastDataBlockOffset();
323 HFileBlock block;
324 while (offset <= max) {
325 block = reader.readBlock(offset, -1,
326
327 offset += block.getOnDiskSizeWithHeader();
328 out.println(block);
329 }
330 }
331
332 if (printStats) {
333 fileStats.finish();
334 out.println("Stats:\n" + fileStats);
335 }
336
337 reader.close();
338 return 0;
339 }
340
341 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
342 HFileScanner scanner, byte[] row) throws IOException {
343 Cell pCell = null;
344 do {
345 Cell cell = scanner.getKeyValue();
346 if (row != null && row.length != 0) {
347 int result = CellComparator.compareRows(cell.getRowArray(), cell.getRowOffset(),
348 cell.getRowLength(), row, 0, row.length);
349 if (result > 0) {
350 break;
351 } else if (result < 0) {
352 continue;
353 }
354 }
355
356 if (printStats) {
357 fileStats.collect(cell);
358 }
359
360 if (printKey) {
361 out.print("K: " + cell);
362 if (printValue) {
363 out.print(" V: "
364 + Bytes.toStringBinary(cell.getValueArray(), cell.getValueOffset(),
365 cell.getValueLength()));
366 int i = 0;
367 List<Tag> tags = Tag.asList(cell.getTagsArray(), cell.getTagsOffset(),
368 cell.getTagsLength());
369 for (Tag tag : tags) {
370 out.print(String.format(" T[%d]: %s", i++,
371 Bytes.toStringBinary(tag.getBuffer(), tag.getTagOffset(), tag.getTagLength())));
372 }
373 }
374 out.println();
375 }
376
377 if (checkRow && pCell != null) {
378 if (CellComparator.compareRows(pCell, cell) > 0) {
379 err.println("WARNING, previous row is greater then"
380 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
381 + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent -> "
382 + CellUtil.getCellKeyAsString(cell));
383 }
384 }
385
386 if (checkFamily) {
387 String fam = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(),
388 cell.getFamilyLength());
389 if (!file.toString().contains(fam)) {
390 err.println("WARNING, filename does not match kv family,"
391 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
392 + CellUtil.getCellKeyAsString(cell));
393 }
394 if (pCell != null && CellComparator.compareFamilies(pCell, cell) != 0) {
395 err.println("WARNING, previous kv has different family"
396 + " compared to current key\n\tfilename -> " + file
397 + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell)
398 + "\n\tcurrent -> " + CellUtil.getCellKeyAsString(cell));
399 }
400 }
401 pCell = cell;
402 ++count;
403 } while (scanner.next());
404 }
405
406
407
408
409
410 private static String asSeparateLines(String keyValueStr) {
411 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
412 ",\n" + FOUR_SPACES + "$1");
413 }
414
415 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
416 throws IOException {
417 out.println("Block index size as per heapsize: "
418 + reader.indexSize());
419 out.println(asSeparateLines(reader.toString()));
420 out.println("Trailer:\n "
421 + asSeparateLines(reader.getTrailer().toString()));
422 out.println("Fileinfo:");
423 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
424 out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
425 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
426 long seqid = Bytes.toLong(e.getValue());
427 out.println(seqid);
428 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
429
430 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
431 Writables.copyWritable(e.getValue(), timeRangeTracker);
432 out.println(timeRangeTracker.getMinimumTimestamp() + "...."
433 + timeRangeTracker.getMaximumTimestamp());
434 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
435 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
436 out.println(Bytes.toInt(e.getValue()));
437 } else {
438 out.println(Bytes.toStringBinary(e.getValue()));
439 }
440 }
441
442 try {
443
444 out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
445 } catch (Exception e) {
446 out.println ("Unable to retrieve the midkey");
447 }
448
449
450 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
451 BloomFilter bloomFilter = null;
452 if (bloomMeta != null)
453 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
454
455 out.println("Bloom filter:");
456 if (bloomFilter != null) {
457 out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
458 ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
459 } else {
460 out.println(FOUR_SPACES + "Not present");
461 }
462
463
464 bloomMeta = reader.getDeleteBloomFilterMetadata();
465 bloomFilter = null;
466 if (bloomMeta != null)
467 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
468
469 out.println("Delete Family Bloom filter:");
470 if (bloomFilter != null) {
471 out.println(FOUR_SPACES
472 + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
473 "\n" + FOUR_SPACES));
474 } else {
475 out.println(FOUR_SPACES + "Not present");
476 }
477 }
478
479 private static class KeyValueStatsCollector {
480 private final MetricsRegistry metricsRegistry = new MetricsRegistry();
481 private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
482 private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
483 Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
484 Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
485 Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
486 Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
487
488 long curRowBytes = 0;
489 long curRowCols = 0;
490
491 byte[] biggestRow = null;
492
493 private Cell prevCell = null;
494 private long maxRowBytes = 0;
495 private long curRowKeyLength;
496
497 public void collect(Cell cell) {
498 valLen.update(cell.getValueLength());
499 if (prevCell != null &&
500 KeyValue.COMPARATOR.compareRows(prevCell, cell) != 0) {
501
502 collectRow();
503 }
504 curRowBytes += KeyValueUtil.length(cell);
505 curRowKeyLength = KeyValueUtil.keyLength(cell);
506 curRowCols++;
507 prevCell = cell;
508 }
509
510 private void collectRow() {
511 rowSizeBytes.update(curRowBytes);
512 rowSizeCols.update(curRowCols);
513 keyLen.update(curRowKeyLength);
514
515 if (curRowBytes > maxRowBytes && prevCell != null) {
516 biggestRow = prevCell.getRow();
517 maxRowBytes = curRowBytes;
518 }
519
520 curRowBytes = 0;
521 curRowCols = 0;
522 }
523
524 public void finish() {
525 if (curRowCols > 0) {
526 collectRow();
527 }
528 }
529
530 @Override
531 public String toString() {
532 if (prevCell == null)
533 return "no data available for statistics";
534
535
536 simpleReporter.shutdown();
537 simpleReporter.run();
538 metricsRegistry.shutdown();
539
540 return
541 metricsOutput.toString() +
542 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
543 }
544 }
545
546 private static class SimpleReporter extends ConsoleReporter {
547 private final PrintStream out;
548
549 public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
550 super(metricsRegistry, out, MetricPredicate.ALL);
551 this.out = out;
552 }
553
554 @Override
555 public void run() {
556 for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
557 MetricPredicate.ALL).entrySet()) {
558 try {
559 for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
560 out.print(" " + subEntry.getKey().getName());
561 out.println(':');
562
563 subEntry.getValue().processWith(this, subEntry.getKey(), out);
564 }
565 } catch (Exception e) {
566 e.printStackTrace(out);
567 }
568 }
569 }
570
571 @Override
572 public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
573 super.processHistogram(name, histogram, stream);
574 stream.printf(Locale.getDefault(), " count = %d%n", histogram.count());
575 }
576 }
577
578 public static void main(String[] args) throws Exception {
579 Configuration conf = HBaseConfiguration.create();
580
581 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
582 int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
583 System.exit(ret);
584 }
585 }