001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.mob.mapreduce; 020 021import java.io.IOException; 022import java.util.Arrays; 023import java.util.Base64; 024import java.util.HashSet; 025import java.util.Set; 026import java.util.UUID; 027import org.apache.hadoop.conf.Configuration; 028import org.apache.hadoop.conf.Configured; 029import org.apache.hadoop.fs.FileStatus; 030import org.apache.hadoop.fs.FileSystem; 031import org.apache.hadoop.fs.Path; 032import org.apache.hadoop.hbase.Cell; 033import org.apache.hadoop.hbase.HBaseConfiguration; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.TableName; 036import org.apache.hadoop.hbase.client.Admin; 037import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 038import org.apache.hadoop.hbase.client.Connection; 039import org.apache.hadoop.hbase.client.ConnectionFactory; 040import org.apache.hadoop.hbase.client.Result; 041import org.apache.hadoop.hbase.client.Scan; 042import org.apache.hadoop.hbase.client.TableDescriptor; 043import org.apache.hadoop.hbase.io.HFileLink; 044import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 045import org.apache.hadoop.hbase.mapreduce.TableInputFormat; 046import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; 047import org.apache.hadoop.hbase.mapreduce.TableMapper; 048import org.apache.hadoop.hbase.mob.MobConstants; 049import org.apache.hadoop.hbase.mob.MobUtils; 050import org.apache.hadoop.hbase.util.Bytes; 051import org.apache.hadoop.hbase.util.CommonFSUtils; 052import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 053import org.apache.hadoop.hbase.util.HFileArchiveUtil; 054import org.apache.hadoop.hbase.util.Pair; 055import org.apache.hadoop.io.Text; 056import org.apache.hadoop.mapreduce.Job; 057import org.apache.hadoop.mapreduce.Reducer; 058import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 059import org.apache.hadoop.security.UserGroupInformation; 060import org.apache.hadoop.util.Tool; 061import org.apache.hadoop.util.ToolRunner; 062import org.apache.yetus.audience.InterfaceAudience; 063import org.slf4j.Logger; 064import org.slf4j.LoggerFactory; 065 066 067/** 068 * Scans a given table + CF for all mob reference cells to get the list of backing mob files. 069 * For each referenced file we attempt to verify that said file is on the FileSystem in a place 070 * that the MOB system will look when attempting to resolve the actual value. 071 * 072 * The job includes counters that can help provide a rough sketch of the mob data. 073 * 074 * <pre> 075 * Map-Reduce Framework 076 * Map input records=10000 077 * ... 078 * Reduce output records=99 079 * ... 080 * CELLS PER ROW 081 * Number of rows with 1s of cells per row=10000 082 * MOB 083 * NUM_CELLS=52364 084 * PROBLEM 085 * Affected rows=338 086 * Problem MOB files=2 087 * ROWS WITH PROBLEMS PER FILE 088 * Number of HFiles with 100s of affected rows=2 089 * SIZES OF CELLS 090 * Number of cells with size in the 10,000s of bytes=627 091 * Number of cells with size in the 100,000s of bytes=51392 092 * Number of cells with size in the 1,000,000s of bytes=345 093 * SIZES OF ROWS 094 * Number of rows with total size in the 100,000s of bytes=6838 095 * Number of rows with total size in the 1,000,000s of bytes=3162 096 * </pre> 097 * 098 * * Map-Reduce Framework:Map input records - the number of rows with mob references 099 * * Map-Reduce Framework:Reduce output records - the number of unique hfiles referenced 100 * * MOB:NUM_CELLS - the total number of mob reference cells 101 * * PROBLEM:Affected rows - the number of rows that reference hfiles with an issue 102 * * PROBLEM:Problem MOB files - the number of unique hfiles that have an issue 103 * * CELLS PER ROW: - this counter group gives a histogram of the order of magnitude of the 104 * number of cells in a given row by grouping by the number of digits used in each count. 105 * This allows us to see more about the distribution of cells than what we can determine 106 * with just the cell count and the row count. In this particular example we can see that 107 * all of our rows have somewhere between 1 - 9 cells. 108 * * ROWS WITH PROBLEMS PER FILE: - this counter group gives a histogram of the order of 109 * magnitude of the number of rows in each of the hfiles with a problem. e.g. in the 110 * example there are 2 hfiles and they each have the same order of magnitude number of rows, 111 * specifically between 100 and 999. 112 * * SIZES OF CELLS: - this counter group gives a histogram of the order of magnitude of 113 * the size of mob values according to our reference cells. e.g. in the example above we 114 * have cell sizes that are all between 10,000 bytes and 9,999,999 bytes. From this 115 * histogram we can also see that _most_ cells are 100,000 - 999,000 bytes and the smaller 116 * and bigger ones are outliers making up less than 2% of mob cells. 117 * * SIZES OF ROWS: - this counter group gives a histogram of the order of magnitude of the 118 * size of mob values across each row according to our reference cells. In the example above 119 * we have rows that are are between 100,000 bytes and 9,999,999 bytes. We can also see that 120 * about 2/3rd of our rows are 100,000 - 999,999 bytes. 121 * 122 * Generates a report that gives one file status per line, with tabs dividing fields. 123 * 124 * <pre> 125 * RESULT OF LOOKUP FILE REF comma seperated, base64 encoded rows when there's a problem 126 * </pre> 127 * 128 * e.g. 129 * 130 * <pre> 131 * MOB DIR 09c576e28a65ed2ead0004d192ffaa382019110184b30a1c7e034573bf8580aef8393402 132 * MISSING FILE 28e252d7f013973174750d483d358fa020191101f73536e7133f4cd3ab1065edf588d509 MmJiMjMyYzBiMTNjNzc0OTY1ZWY4NTU4ZjBmYmQ2MTUtNTIz,MmEzOGE0YTkzMTZjNDllNWE4MzM1MTdjNDVkMzEwNzAtODg= 133 * </pre> 134 * 135 * Possible results are listed; the first three indicate things are working properly. 136 * * MOB DIR - the reference is in the normal MOB area for the given table and CF 137 * * HLINK TO ARCHIVE FOR SAME TABLE - the reference is present in the archive area for this 138 * table and CF 139 * * HLINK TO ARCHIVE FOR OTHER TABLE - the reference is present in a different table and CF, 140 * either in the MOB or archive areas (e.g. from a snapshot restore or clone) 141 * * ARCHIVE WITH HLINK BUT NOT FROM OUR TABLE - the reference is currently present in the archive 142 * area for this table and CF, but it is kept there because a _different_ table has a 143 * reference to it (e.g. from a snapshot clone). If these other tables are removed then 144 * the file will likely be deleted unless there is a snapshot also referencing it. 145 * * ARCHIVE BUT NO HLINKS - the reference is currently present in the archive for this table and 146 * CF, but there are no references present to prevent its removal. Unless it is newer than 147 * the general TTL (default 5 minutes) or referenced in a snapshot it will be subject to 148 * cleaning. 149 * * ARCHIVE BUT FAILURE WHILE CHECKING HLINKS - Check the job logs to see why things failed while 150 * looking for why this file is being kept around. 151 * * MISSING FILE - We couldn't find the reference on the FileSystem. Either there is dataloss due 152 * to a bug in the MOB storage system or the MOB storage is damaged but in an edge case that 153 * allows it to work for now. You can verify which by doing a raw reference scan to get the 154 * referenced hfile and check the underlying filesystem. See the ref guide section on mob 155 * for details. 156 * * HLINK BUT POINT TO MISSING FILE - There is a pointer in our mob area for this table and CF 157 * to a file elsewhere on the FileSystem, however the file it points to no longer exists. 158 * * MISSING FILE BUT FAILURE WHILE CHECKING HLINKS - We could not find the referenced file, 159 * however you should check the job logs to see why we couldn't check to see if there is a 160 * pointer to the referenced file in our archive or another table's archive or mob area. 161 * 162 */ 163@InterfaceAudience.Private 164public class MobRefReporter extends Configured implements Tool { 165 private static Logger LOG = LoggerFactory.getLogger(MobRefReporter.class); 166 public static final String NAME = "mobrefs"; 167 static final String REPORT_JOB_ID = "mob.report.job.id"; 168 static final String REPORT_START_DATETIME = "mob.report.job.start"; 169 170 public static class MobRefMapper extends TableMapper<Text, ImmutableBytesWritable> { 171 @Override 172 public void map(ImmutableBytesWritable r, Result columns, Context context) throws IOException, 173 InterruptedException { 174 if (columns == null) { 175 return; 176 } 177 Cell[] cells = columns.rawCells(); 178 if (cells == null || cells.length == 0) { 179 return; 180 } 181 Set<String> files = new HashSet<>(); 182 long count = 0; 183 long size = 0; 184 for (Cell c : cells) { 185 if (MobUtils.hasValidMobRefCellValue(c)) { 186 // TODO confirm there aren't tags 187 String fileName = MobUtils.getMobFileName(c); 188 if (!files.contains(fileName)) { 189 context.write(new Text(fileName), r); 190 files.add(fileName); 191 } 192 final int cellsize = MobUtils.getMobValueLength(c); 193 context.getCounter("SIZES OF CELLS", "Number of cells with size in the " + 194 log10GroupedString(cellsize) + "s of bytes").increment(1L); 195 size += cellsize; 196 count++; 197 } else { 198 LOG.debug("cell is not a mob ref, even though we asked for only refs. cell={}", c); 199 } 200 } 201 context.getCounter("CELLS PER ROW", "Number of rows with " + log10GroupedString(count) + 202 "s of cells per row").increment(1L); 203 context.getCounter("SIZES OF ROWS", "Number of rows with total size in the " + 204 log10GroupedString(size) + "s of bytes").increment(1L); 205 context.getCounter("MOB","NUM_CELLS").increment(count); 206 } 207 } 208 209 public static class MobRefReducer extends 210 Reducer<Text, ImmutableBytesWritable, Text, Text> { 211 212 TableName table; 213 String mobRegion; 214 Path mob; 215 Path archive; 216 String seperator; 217 218 /* Results that mean things are fine */ 219 final Text OK_MOB_DIR = new Text("MOB DIR"); 220 final Text OK_HLINK_RESTORE = new Text("HLINK TO ARCHIVE FOR SAME TABLE"); 221 final Text OK_HLINK_CLONE = new Text("HLINK TO ARCHIVE FOR OTHER TABLE"); 222 /* Results that mean something is incorrect */ 223 final Text INCONSISTENT_ARCHIVE_BAD_LINK = 224 new Text("ARCHIVE WITH HLINK BUT NOT FROM OUR TABLE"); 225 final Text INCONSISTENT_ARCHIVE_STALE = new Text("ARCHIVE BUT NO HLINKS"); 226 final Text INCONSISTENT_ARCHIVE_IOE = new Text("ARCHIVE BUT FAILURE WHILE CHECKING HLINKS"); 227 /* Results that mean data is probably already gone */ 228 final Text DATALOSS_MISSING = new Text("MISSING FILE"); 229 final Text DATALOSS_HLINK_DANGLING = new Text("HLINK BUT POINTS TO MISSING FILE"); 230 final Text DATALOSS_MISSING_IOE = new Text("MISSING FILE BUT FAILURE WHILE CHECKING HLINKS"); 231 final Base64.Encoder base64 = Base64.getEncoder(); 232 233 @Override 234 public void setup(Context context) throws IOException, InterruptedException { 235 final Configuration conf = context.getConfiguration(); 236 final String tableName = conf.get(TableInputFormat.INPUT_TABLE); 237 if (null == tableName) { 238 throw new IOException("Job configuration did not include table."); 239 } 240 table = TableName.valueOf(tableName); 241 mobRegion = MobUtils.getMobRegionInfo(table).getEncodedName(); 242 final String family = conf.get(TableInputFormat.SCAN_COLUMN_FAMILY); 243 if (null == family) { 244 throw new IOException("Job configuration did not include column family"); 245 } 246 mob = MobUtils.getMobFamilyPath(conf, table, family); 247 LOG.info("Using active mob area '{}'", mob); 248 archive = HFileArchiveUtil.getStoreArchivePath(conf, table, 249 MobUtils.getMobRegionInfo(table).getEncodedName(), family); 250 LOG.info("Using archive mob area '{}'", archive); 251 seperator = conf.get(TextOutputFormat.SEPERATOR, "\t"); 252 } 253 254 @Override 255 public void reduce(Text key, Iterable<ImmutableBytesWritable> rows, Context context) 256 throws IOException, InterruptedException { 257 final Configuration conf = context.getConfiguration(); 258 final String file = key.toString(); 259 // active mob area 260 if (mob.getFileSystem(conf).exists(new Path(mob, file))) { 261 LOG.debug("Found file '{}' in mob area", file); 262 context.write(OK_MOB_DIR, key); 263 // archive area - is there an hlink back reference (from a snapshot from same table) 264 } else if (archive.getFileSystem(conf).exists(new Path(archive, file))) { 265 266 Path backRefDir = HFileLink.getBackReferencesDir(archive, file); 267 try { 268 FileStatus[] backRefs = CommonFSUtils.listStatus(archive.getFileSystem(conf), backRefDir); 269 if (backRefs != null) { 270 boolean found = false; 271 for (FileStatus backRef : backRefs) { 272 Pair<TableName, String> refParts = HFileLink.parseBackReferenceName( 273 backRef.getPath().getName()); 274 if (table.equals(refParts.getFirst()) && mobRegion.equals(refParts.getSecond())) { 275 Path hlinkPath = HFileLink.getHFileFromBackReference(MobUtils.getMobHome(conf), 276 backRef.getPath()); 277 if (hlinkPath.getFileSystem(conf).exists(hlinkPath)) { 278 found = true; 279 } else { 280 LOG.warn("Found file '{}' in archive area with a back reference to the mob area " 281 + "for our table, but the mob area does not have a corresponding hfilelink.", 282 file); 283 } 284 } 285 } 286 if (found) { 287 LOG.debug("Found file '{}' in archive area. has proper hlink back references to " 288 + "suggest it is from a restored snapshot for this table.", file); 289 context.write(OK_HLINK_RESTORE, key); 290 } else { 291 LOG.warn("Found file '{}' in archive area, but the hlink back references do not " 292 + "properly point to the mob area for our table.", file); 293 context.write(INCONSISTENT_ARCHIVE_BAD_LINK, encodeRows(context, key, rows)); 294 } 295 } else { 296 LOG.warn("Found file '{}' in archive area, but there are no hlinks pointing to it. Not " 297 + "yet used snapshot or an error.", file); 298 context.write(INCONSISTENT_ARCHIVE_STALE, encodeRows(context, key, rows)); 299 } 300 } catch (IOException e) { 301 LOG.warn("Found file '{}' in archive area, but got an error while checking " 302 + "on back references.", file, e); 303 context.write(INCONSISTENT_ARCHIVE_IOE, encodeRows(context, key, rows)); 304 } 305 306 } else { 307 // check for an hlink in the active mob area (from a snapshot of a different table) 308 try { 309 /** 310 * we are doing this ourselves instead of using FSUtils.getReferenceFilePaths because 311 * we know the mob region never splits, so we can only have HFileLink references 312 * and looking for just them is cheaper then listing everything. 313 * 314 * This glob should match the naming convention for HFileLinks to our referenced hfile. 315 * As simplified explanation those file names look like "table=region-hfile". For details 316 * see the {@link HFileLink#createHFileLinkName HFileLink implementation}. 317 */ 318 FileStatus[] hlinks = mob.getFileSystem(conf).globStatus(new Path(mob + "/*=*-" + file)); 319 if (hlinks != null && hlinks.length != 0) { 320 if (hlinks.length != 1) { 321 LOG.warn("Found file '{}' as hfilelinks in the mob area, but there are more than " + 322 "one: {}", file, Arrays.deepToString(hlinks)); 323 } 324 HFileLink found = null; 325 for (FileStatus hlink : hlinks) { 326 HFileLink tmp = HFileLink.buildFromHFileLinkPattern(conf, hlink.getPath()); 327 if (tmp.exists(archive.getFileSystem(conf))) { 328 found = tmp; 329 break; 330 } else { 331 LOG.debug("Target file does not exist for ref {}", tmp); 332 } 333 } 334 if (found != null) { 335 LOG.debug("Found file '{}' as a ref in the mob area: {}", file, found); 336 context.write(OK_HLINK_CLONE, key); 337 } else { 338 LOG.warn("Found file '{}' as ref(s) in the mob area but they do not point to an hfile" 339 + " that exists.", file); 340 context.write(DATALOSS_HLINK_DANGLING, encodeRows(context, key, rows)); 341 } 342 } else { 343 LOG.error("Could not find referenced file '{}'. See the docs on this tool.", file); 344 LOG.debug("Note that we don't have the server-side tag from the mob cells that says " 345 + "what table the reference is originally from. So if the HFileLink in this table " 346 + "is missing but the referenced file is still in the table from that tag, then " 347 + "lookups of these impacted rows will work. Do a scan of the reference details " 348 + "of the cell for the hfile name and then check the entire hbase install if this " 349 + "table was made from a snapshot of another table. see the ref guide section on " 350 + "mob for details."); 351 context.write(DATALOSS_MISSING, encodeRows(context, key, rows)); 352 } 353 } catch (IOException e) { 354 LOG.error( 355 "Exception while checking mob area of our table for HFileLinks that point to {}", 356 file, e); 357 context.write(DATALOSS_MISSING_IOE, encodeRows(context, key, rows)); 358 } 359 } 360 } 361 362 /** 363 * reuses the passed Text key. appends the configured seperator and then a comma seperated list 364 * of base64 encoded row keys 365 */ 366 private Text encodeRows(Context context, Text key, Iterable<ImmutableBytesWritable> rows) 367 throws IOException { 368 StringBuilder sb = new StringBuilder(key.toString()); 369 sb.append(seperator); 370 boolean moreThanOne = false; 371 long count = 0; 372 for (ImmutableBytesWritable row : rows) { 373 if (moreThanOne) { 374 sb.append(","); 375 } 376 sb.append(base64.encodeToString(row.copyBytes())); 377 moreThanOne = true; 378 count++; 379 } 380 context.getCounter("PROBLEM", "Problem MOB files").increment(1L); 381 context.getCounter("PROBLEM", "Affected rows").increment(count); 382 context.getCounter("ROWS WITH PROBLEMS PER FILE", "Number of HFiles with " + 383 log10GroupedString(count) + "s of affected rows").increment(1L); 384 key.set(sb.toString()); 385 return key; 386 } 387 } 388 389 /** 390 * Returns the string representation of the given number after grouping it 391 * into log10 buckets. e.g. 0-9 -> 1, 10-99 -> 10, ..., 100,000-999,999 -> 100,000, etc. 392 */ 393 static String log10GroupedString(long number) { 394 return String.format("%,d", (long)(Math.pow(10d, Math.floor(Math.log10(number))))); 395 } 396 397 /** 398 * Main method for the tool. 399 * @return 0 if success, 1 for bad args. 2 if job aborted with an exception, 400 * 3 if mr job was unsuccessful 401 */ 402 public int run(String[] args) throws IOException, InterruptedException { 403 // TODO make family and table optional 404 if (args.length != 3) { 405 printUsage(); 406 return 1; 407 } 408 final String output = args[0]; 409 final String tableName = args[1]; 410 final String familyName = args[2]; 411 final long reportStartTime = EnvironmentEdgeManager.currentTime(); 412 Configuration conf = getConf(); 413 try { 414 FileSystem fs = FileSystem.get(conf); 415 // check whether the current user is the same one with the owner of hbase root 416 String currentUserName = UserGroupInformation.getCurrentUser().getShortUserName(); 417 FileStatus[] hbaseRootFileStat = fs.listStatus(new Path(conf.get(HConstants.HBASE_DIR))); 418 if (hbaseRootFileStat.length > 0) { 419 String owner = hbaseRootFileStat[0].getOwner(); 420 if (!owner.equals(currentUserName)) { 421 String errorMsg = "The current user[" + currentUserName 422 + "] does not have hbase root credentials." 423 + " If this job fails due to an inability to read HBase's internal directories, " 424 + "you will need to rerun as a user with sufficient permissions. The HBase superuser " 425 + "is a safe choice."; 426 LOG.warn(errorMsg); 427 } 428 } else { 429 LOG.error("The passed configs point to an HBase dir does not exist: {}", 430 conf.get(HConstants.HBASE_DIR)); 431 throw new IOException("The target HBase does not exist"); 432 } 433 434 byte[] family; 435 int maxVersions; 436 TableName tn = TableName.valueOf(tableName); 437 try (Connection connection = ConnectionFactory.createConnection(conf); 438 Admin admin = connection.getAdmin()) { 439 TableDescriptor htd = admin.getDescriptor(tn); 440 ColumnFamilyDescriptor hcd = htd.getColumnFamily(Bytes.toBytes(familyName)); 441 if (hcd == null || !hcd.isMobEnabled()) { 442 throw new IOException("Column family " + familyName + " is not a MOB column family"); 443 } 444 family = hcd.getName(); 445 maxVersions = hcd.getMaxVersions(); 446 } 447 448 449 String id = getClass().getSimpleName() + UUID.randomUUID().toString().replace("-", ""); 450 Job job = null; 451 Scan scan = new Scan(); 452 scan.addFamily(family); 453 // Do not retrieve the mob data when scanning 454 scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); 455 scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE)); 456 // If a scanner caching value isn't set, pick a smaller default since we know we're doing 457 // a full table scan and don't want to impact other clients badly. 458 scan.setCaching(conf.getInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, 10000)); 459 scan.setCacheBlocks(false); 460 scan.setMaxVersions(maxVersions); 461 conf.set(REPORT_JOB_ID, id); 462 463 job = Job.getInstance(conf); 464 job.setJarByClass(getClass()); 465 TableMapReduceUtil.initTableMapperJob(tn, scan, 466 MobRefMapper.class, Text.class, ImmutableBytesWritable.class, job); 467 468 job.setReducerClass(MobRefReducer.class); 469 job.setOutputFormatClass(TextOutputFormat.class); 470 TextOutputFormat.setOutputPath(job, new Path(output)); 471 472 job.setJobName(getClass().getSimpleName() + "-" + tn + "-" + familyName); 473 // for use in the reducer. easier than re-parsing it out of the scan string. 474 job.getConfiguration().set(TableInputFormat.SCAN_COLUMN_FAMILY, familyName); 475 476 // Use when we start this job as the base point for file "recency". 477 job.getConfiguration().setLong(REPORT_START_DATETIME, reportStartTime); 478 479 if (job.waitForCompletion(true)) { 480 LOG.info("Finished creating report for '{}', family='{}'", tn, familyName); 481 } else { 482 System.err.println("Job was not successful"); 483 return 3; 484 } 485 return 0; 486 487 } catch (ClassNotFoundException | RuntimeException | IOException | InterruptedException e) { 488 System.err.println("Job aborted due to exception " + e); 489 return 2; // job failed 490 } 491 } 492 493 public static void main(String[] args) throws Exception { 494 Configuration conf = HBaseConfiguration.create(); 495 int ret = ToolRunner.run(conf, new MobRefReporter(), args); 496 System.exit(ret); 497 } 498 499 private void printUsage() { 500 System.err.println("Usage:\n" + "--------------------------\n" + MobRefReporter.class.getName() 501 + " output-dir tableName familyName"); 502 System.err.println(" output-dir Where to write output report."); 503 System.err.println(" tableName The table name"); 504 System.err.println(" familyName The column family name"); 505 } 506 507}