1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 package org.apache.hadoop.hbase.io.hfile;
21
22 import java.io.ByteArrayOutputStream;
23 import java.io.DataInput;
24 import java.io.IOException;
25 import java.io.PrintStream;
26 import java.util.ArrayList;
27 import java.util.List;
28 import java.util.Locale;
29 import java.util.Map;
30 import java.util.SortedMap;
31
32 import org.apache.commons.cli.CommandLine;
33 import org.apache.commons.cli.CommandLineParser;
34 import org.apache.commons.cli.HelpFormatter;
35 import org.apache.commons.cli.Option;
36 import org.apache.commons.cli.OptionGroup;
37 import org.apache.commons.cli.Options;
38 import org.apache.commons.cli.ParseException;
39 import org.apache.commons.cli.PosixParser;
40 import org.apache.commons.logging.Log;
41 import org.apache.commons.logging.LogFactory;
42 import org.apache.hadoop.hbase.classification.InterfaceAudience;
43 import org.apache.hadoop.hbase.classification.InterfaceStability;
44 import org.apache.hadoop.conf.Configuration;
45 import org.apache.hadoop.conf.Configured;
46 import org.apache.hadoop.fs.FileSystem;
47 import org.apache.hadoop.fs.Path;
48 import org.apache.hadoop.hbase.Cell;
49 import org.apache.hadoop.hbase.CellComparator;
50 import org.apache.hadoop.hbase.CellUtil;
51 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
52 import org.apache.hadoop.hbase.HConstants;
53 import org.apache.hadoop.hbase.TableName;
54 import org.apache.hadoop.hbase.HBaseConfiguration;
55 import org.apache.hadoop.hbase.HRegionInfo;
56 import org.apache.hadoop.hbase.KeyValue;
57 import org.apache.hadoop.hbase.KeyValueUtil;
58 import org.apache.hadoop.hbase.Tag;
59 import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper;
60 import org.apache.hadoop.hbase.io.hfile.HFile.FileInfo;
61 import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
62 import org.apache.hadoop.hbase.util.BloomFilter;
63 import org.apache.hadoop.hbase.util.BloomFilterFactory;
64 import org.apache.hadoop.hbase.util.ByteBloomFilter;
65 import org.apache.hadoop.hbase.util.Bytes;
66 import org.apache.hadoop.hbase.util.FSUtils;
67 import org.apache.hadoop.hbase.util.Writables;
68 import org.apache.hadoop.util.Tool;
69 import org.apache.hadoop.util.ToolRunner;
70
71 import com.yammer.metrics.core.Histogram;
72 import com.yammer.metrics.core.Metric;
73 import com.yammer.metrics.core.MetricName;
74 import com.yammer.metrics.core.MetricPredicate;
75 import com.yammer.metrics.core.MetricsRegistry;
76 import com.yammer.metrics.reporting.ConsoleReporter;
77
78
79
80
81 @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
82 @InterfaceStability.Evolving
83 public class HFilePrettyPrinter extends Configured implements Tool {
84
85 private static final Log LOG = LogFactory.getLog(HFilePrettyPrinter.class);
86
87 private Options options = new Options();
88
89 private boolean verbose;
90 private boolean printValue;
91 private boolean printKey;
92 private boolean shouldPrintMeta;
93 private boolean printBlockIndex;
94 private boolean printBlockHeaders;
95 private boolean printStats;
96 private boolean checkRow;
97 private boolean checkFamily;
98 private boolean isSeekToRow = false;
99
100
101
102
103 private byte[] row = null;
104
105 private List<Path> files = new ArrayList<Path>();
106 private int count;
107
108 private static final String FOUR_SPACES = " ";
109
110 public HFilePrettyPrinter() {
111 super();
112 init();
113 }
114
115 public HFilePrettyPrinter(Configuration conf) {
116 super(conf);
117 init();
118 }
119
120 private void init() {
121 options.addOption("v", "verbose", false,
122 "Verbose output; emits file and meta data delimiters");
123 options.addOption("p", "printkv", false, "Print key/value pairs");
124 options.addOption("e", "printkey", false, "Print keys");
125 options.addOption("m", "printmeta", false, "Print meta data of file");
126 options.addOption("b", "printblocks", false, "Print block index meta data");
127 options.addOption("h", "printblockheaders", false, "Print block headers for each block.");
128 options.addOption("k", "checkrow", false,
129 "Enable row order check; looks for out-of-order keys");
130 options.addOption("a", "checkfamily", false, "Enable family check");
131 options.addOption("w", "seekToRow", true,
132 "Seek to this row and print all the kvs for this row only");
133 options.addOption("s", "stats", false, "Print statistics");
134
135 OptionGroup files = new OptionGroup();
136 files.addOption(new Option("f", "file", true,
137 "File to scan. Pass full-path; e.g. hdfs://a:9000/hbase/hbase:meta/12/34"));
138 files.addOption(new Option("r", "region", true,
139 "Region to scan. Pass region name; e.g. 'hbase:meta,,1'"));
140 options.addOptionGroup(files);
141 }
142
143 public boolean parseOptions(String args[]) throws ParseException,
144 IOException {
145 if (args.length == 0) {
146 HelpFormatter formatter = new HelpFormatter();
147 formatter.printHelp("HFile", options, true);
148 return false;
149 }
150 CommandLineParser parser = new PosixParser();
151 CommandLine cmd = parser.parse(options, args);
152
153 verbose = cmd.hasOption("v");
154 printValue = cmd.hasOption("p");
155 printKey = cmd.hasOption("e") || printValue;
156 shouldPrintMeta = cmd.hasOption("m");
157 printBlockIndex = cmd.hasOption("b");
158 printBlockHeaders = cmd.hasOption("h");
159 printStats = cmd.hasOption("s");
160 checkRow = cmd.hasOption("k");
161 checkFamily = cmd.hasOption("a");
162
163 if (cmd.hasOption("f")) {
164 files.add(new Path(cmd.getOptionValue("f")));
165 }
166
167 if (cmd.hasOption("w")) {
168 String key = cmd.getOptionValue("w");
169 if (key != null && key.length() != 0) {
170 row = Bytes.toBytesBinary(key);
171 isSeekToRow = true;
172 } else {
173 System.err.println("Invalid row is specified.");
174 System.exit(-1);
175 }
176 }
177
178 if (cmd.hasOption("r")) {
179 String regionName = cmd.getOptionValue("r");
180 byte[] rn = Bytes.toBytes(regionName);
181 byte[][] hri = HRegionInfo.parseRegionName(rn);
182 Path rootDir = FSUtils.getRootDir(getConf());
183 Path tableDir = FSUtils.getTableDir(rootDir, TableName.valueOf(hri[0]));
184 String enc = HRegionInfo.encodeRegionName(rn);
185 Path regionDir = new Path(tableDir, enc);
186 if (verbose)
187 System.out.println("region dir -> " + regionDir);
188 List<Path> regionFiles = HFile.getStoreFiles(FileSystem.get(getConf()),
189 regionDir);
190 if (verbose)
191 System.out.println("Number of region files found -> "
192 + regionFiles.size());
193 if (verbose) {
194 int i = 1;
195 for (Path p : regionFiles) {
196 if (verbose)
197 System.out.println("Found file[" + i++ + "] -> " + p);
198 }
199 }
200 files.addAll(regionFiles);
201 }
202
203 return true;
204 }
205
206
207
208
209
210 @Override
211 public int run(String[] args) {
212 if (getConf() == null) {
213 throw new RuntimeException("A Configuration instance must be provided.");
214 }
215 try {
216 FSUtils.setFsDefault(getConf(), FSUtils.getRootDir(getConf()));
217 if (!parseOptions(args))
218 return 1;
219 } catch (IOException ex) {
220 LOG.error("Error parsing command-line options", ex);
221 return 1;
222 } catch (ParseException ex) {
223 LOG.error("Error parsing command-line options", ex);
224 return 1;
225 }
226
227
228 for (Path fileName : files) {
229 try {
230 processFile(fileName);
231 } catch (IOException ex) {
232 LOG.error("Error reading " + fileName, ex);
233 System.exit(-2);
234 }
235 }
236
237 if (verbose || printKey) {
238 System.out.println("Scanned kv count -> " + count);
239 }
240
241 return 0;
242 }
243
244 private void processFile(Path file) throws IOException {
245 if (verbose)
246 System.out.println("Scanning -> " + file);
247 FileSystem fs = file.getFileSystem(getConf());
248 if (!fs.exists(file)) {
249 System.err.println("ERROR, file doesnt exist: " + file);
250 System.exit(-2);
251 }
252
253 HFile.Reader reader = HFile.createReader(fs, file, new CacheConfig(getConf()), getConf());
254
255 Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
256
257 KeyValueStatsCollector fileStats = null;
258
259 if (verbose || printKey || checkRow || checkFamily || printStats) {
260
261 HFileScanner scanner = reader.getScanner(false, false, false);
262 fileStats = new KeyValueStatsCollector();
263 boolean shouldScanKeysValues = false;
264 if (this.isSeekToRow) {
265
266 shouldScanKeysValues =
267 (scanner.seekTo(KeyValueUtil.createFirstOnRow(this.row).getKey()) != -1);
268 } else {
269 shouldScanKeysValues = scanner.seekTo();
270 }
271 if (shouldScanKeysValues)
272 scanKeysValues(file, fileStats, scanner, row);
273 }
274
275
276 if (shouldPrintMeta) {
277 printMeta(reader, fileInfo);
278 }
279
280 if (printBlockIndex) {
281 System.out.println("Block Index:");
282 System.out.println(reader.getDataBlockIndexReader());
283 }
284
285 if (printBlockHeaders) {
286 System.out.println("Block Headers:");
287
288
289
290
291 FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs, file);
292 long fileSize = fs.getFileStatus(file).getLen();
293 FixedFileTrailer trailer =
294 FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
295 long offset = trailer.getFirstDataBlockOffset(),
296 max = trailer.getLastDataBlockOffset();
297 HFileBlock block;
298 while (offset <= max) {
299 block = reader.readBlock(offset, -1,
300
301 offset += block.getOnDiskSizeWithHeader();
302 System.out.println(block);
303 }
304 }
305
306 if (printStats) {
307 fileStats.finish();
308 System.out.println("Stats:\n" + fileStats);
309 }
310
311 reader.close();
312 }
313
314 private void scanKeysValues(Path file, KeyValueStatsCollector fileStats,
315 HFileScanner scanner, byte[] row) throws IOException {
316 Cell pCell = null;
317 do {
318 Cell cell = scanner.getKeyValue();
319 if (row != null && row.length != 0) {
320 int result = CellComparator.compareRows(cell.getRowArray(), cell.getRowOffset(),
321 cell.getRowLength(), row, 0, row.length);
322 if (result > 0) {
323 break;
324 } else if (result < 0) {
325 continue;
326 }
327 }
328
329 if (printStats) {
330 fileStats.collect(cell);
331 }
332
333 if (printKey) {
334 System.out.print("K: " + cell);
335 if (printValue) {
336 System.out.print(" V: "
337 + Bytes.toStringBinary(cell.getValueArray(), cell.getValueOffset(),
338 cell.getValueLength()));
339 int i = 0;
340 List<Tag> tags = Tag.asList(cell.getTagsArray(), cell.getTagsOffset(),
341 cell.getTagsLength());
342 for (Tag tag : tags) {
343 System.out.print(String.format(" T[%d]: %s", i++,
344 Bytes.toStringBinary(tag.getBuffer(), tag.getTagOffset(), tag.getTagLength())));
345 }
346 }
347 System.out.println();
348 }
349
350 if (checkRow && pCell != null) {
351 if (CellComparator.compareRows(pCell, cell) > 0) {
352 System.err.println("WARNING, previous row is greater then"
353 + " current row\n\tfilename -> " + file + "\n\tprevious -> "
354 + CellUtil.getCellKeyAsString(pCell) + "\n\tcurrent -> "
355 + CellUtil.getCellKeyAsString(cell));
356 }
357 }
358
359 if (checkFamily) {
360 String fam = Bytes.toString(cell.getFamilyArray(), cell.getFamilyOffset(),
361 cell.getFamilyLength());
362 if (!file.toString().contains(fam)) {
363 System.err.println("WARNING, filename does not match kv family,"
364 + "\n\tfilename -> " + file + "\n\tkeyvalue -> "
365 + CellUtil.getCellKeyAsString(cell));
366 }
367 if (pCell != null && CellComparator.compareFamilies(pCell, cell) != 0) {
368 System.err.println("WARNING, previous kv has different family"
369 + " compared to current key\n\tfilename -> " + file
370 + "\n\tprevious -> " + CellUtil.getCellKeyAsString(pCell)
371 + "\n\tcurrent -> " + CellUtil.getCellKeyAsString(cell));
372 }
373 }
374 pCell = cell;
375 ++count;
376 } while (scanner.next());
377 }
378
379
380
381
382
383 private static String asSeparateLines(String keyValueStr) {
384 return keyValueStr.replaceAll(", ([a-zA-Z]+=)",
385 ",\n" + FOUR_SPACES + "$1");
386 }
387
388 private void printMeta(HFile.Reader reader, Map<byte[], byte[]> fileInfo)
389 throws IOException {
390 System.out.println("Block index size as per heapsize: "
391 + reader.indexSize());
392 System.out.println(asSeparateLines(reader.toString()));
393 System.out.println("Trailer:\n "
394 + asSeparateLines(reader.getTrailer().toString()));
395 System.out.println("Fileinfo:");
396 for (Map.Entry<byte[], byte[]> e : fileInfo.entrySet()) {
397 System.out.print(FOUR_SPACES + Bytes.toString(e.getKey()) + " = ");
398 if (Bytes.compareTo(e.getKey(), Bytes.toBytes("MAX_SEQ_ID_KEY")) == 0) {
399 long seqid = Bytes.toLong(e.getValue());
400 System.out.println(seqid);
401 } else if (Bytes.compareTo(e.getKey(), Bytes.toBytes("TIMERANGE")) == 0) {
402 TimeRangeTracker timeRangeTracker = new TimeRangeTracker();
403 Writables.copyWritable(e.getValue(), timeRangeTracker);
404 System.out.println(timeRangeTracker.getMinimumTimestamp() + "...."
405 + timeRangeTracker.getMaximumTimestamp());
406 } else if (Bytes.compareTo(e.getKey(), FileInfo.AVG_KEY_LEN) == 0
407 || Bytes.compareTo(e.getKey(), FileInfo.AVG_VALUE_LEN) == 0) {
408 System.out.println(Bytes.toInt(e.getValue()));
409 } else {
410 System.out.println(Bytes.toStringBinary(e.getValue()));
411 }
412 }
413
414 try {
415 System.out.println("Mid-key: " + Bytes.toStringBinary(reader.midkey()));
416 } catch (Exception e) {
417 System.out.println ("Unable to retrieve the midkey");
418 }
419
420
421 DataInput bloomMeta = reader.getGeneralBloomFilterMetadata();
422 BloomFilter bloomFilter = null;
423 if (bloomMeta != null)
424 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
425
426 System.out.println("Bloom filter:");
427 if (bloomFilter != null) {
428 System.out.println(FOUR_SPACES + bloomFilter.toString().replaceAll(
429 ByteBloomFilter.STATS_RECORD_SEP, "\n" + FOUR_SPACES));
430 } else {
431 System.out.println(FOUR_SPACES + "Not present");
432 }
433
434
435 bloomMeta = reader.getDeleteBloomFilterMetadata();
436 bloomFilter = null;
437 if (bloomMeta != null)
438 bloomFilter = BloomFilterFactory.createFromMeta(bloomMeta, reader);
439
440 System.out.println("Delete Family Bloom filter:");
441 if (bloomFilter != null) {
442 System.out.println(FOUR_SPACES
443 + bloomFilter.toString().replaceAll(ByteBloomFilter.STATS_RECORD_SEP,
444 "\n" + FOUR_SPACES));
445 } else {
446 System.out.println(FOUR_SPACES + "Not present");
447 }
448 }
449
450 private static class KeyValueStatsCollector {
451 private final MetricsRegistry metricsRegistry = new MetricsRegistry();
452 private final ByteArrayOutputStream metricsOutput = new ByteArrayOutputStream();
453 private final SimpleReporter simpleReporter = new SimpleReporter(metricsRegistry, new PrintStream(metricsOutput));
454 Histogram keyLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Key length");
455 Histogram valLen = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Val length");
456 Histogram rowSizeBytes = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (bytes)");
457 Histogram rowSizeCols = metricsRegistry.newHistogram(HFilePrettyPrinter.class, "Row size (columns)");
458
459 long curRowBytes = 0;
460 long curRowCols = 0;
461
462 byte[] biggestRow = null;
463
464 private Cell prevCell = null;
465 private long maxRowBytes = 0;
466 private long curRowKeyLength;
467
468 public void collect(Cell cell) {
469 valLen.update(cell.getValueLength());
470 if (prevCell != null &&
471 KeyValue.COMPARATOR.compareRows(prevCell, cell) != 0) {
472
473 collectRow();
474 }
475 curRowBytes += KeyValueUtil.length(cell);
476 curRowKeyLength = KeyValueUtil.keyLength(cell);
477 curRowCols++;
478 prevCell = cell;
479 }
480
481 private void collectRow() {
482 rowSizeBytes.update(curRowBytes);
483 rowSizeCols.update(curRowCols);
484 keyLen.update(curRowKeyLength);
485
486 if (curRowBytes > maxRowBytes && prevCell != null) {
487 biggestRow = prevCell.getRow();
488 maxRowBytes = curRowBytes;
489 }
490
491 curRowBytes = 0;
492 curRowCols = 0;
493 }
494
495 public void finish() {
496 if (curRowCols > 0) {
497 collectRow();
498 }
499 }
500
501 @Override
502 public String toString() {
503 if (prevCell == null)
504 return "no data available for statistics";
505
506
507 simpleReporter.shutdown();
508 simpleReporter.run();
509 metricsRegistry.shutdown();
510
511 return
512 metricsOutput.toString() +
513 "Key of biggest row: " + Bytes.toStringBinary(biggestRow);
514 }
515 }
516
517 private static class SimpleReporter extends ConsoleReporter {
518 private final PrintStream out;
519
520 public SimpleReporter(MetricsRegistry metricsRegistry, PrintStream out) {
521 super(metricsRegistry, out, MetricPredicate.ALL);
522 this.out = out;
523 }
524
525 @Override
526 public void run() {
527 for (Map.Entry<String, SortedMap<MetricName, Metric>> entry : getMetricsRegistry().groupedMetrics(
528 MetricPredicate.ALL).entrySet()) {
529 try {
530 for (Map.Entry<MetricName, Metric> subEntry : entry.getValue().entrySet()) {
531 out.print(" " + subEntry.getKey().getName());
532 out.println(':');
533
534 subEntry.getValue().processWith(this, subEntry.getKey(), out);
535 }
536 } catch (Exception e) {
537 e.printStackTrace(out);
538 }
539 }
540 }
541
542 @Override
543 public void processHistogram(MetricName name, Histogram histogram, PrintStream stream) {
544 super.processHistogram(name, histogram, stream);
545 stream.printf(Locale.getDefault(), " count = %d%n", histogram.count());
546 }
547 }
548
549 public static void main(String[] args) throws Exception {
550 Configuration conf = HBaseConfiguration.create();
551
552 conf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0);
553 int ret = ToolRunner.run(conf, new HFilePrettyPrinter(), args);
554 System.exit(ret);
555 }
556 }