001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.backup.util;
020
021import java.io.FileNotFoundException;
022import java.io.IOException;
023import java.util.ArrayList;
024import java.util.Arrays;
025import java.util.HashMap;
026import java.util.List;
027import java.util.TreeMap;
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.fs.FileStatus;
030import org.apache.hadoop.fs.FileSystem;
031import org.apache.hadoop.fs.FileUtil;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.hbase.HConstants;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.backup.BackupRestoreFactory;
036import org.apache.hadoop.hbase.backup.HBackupFileSystem;
037import org.apache.hadoop.hbase.backup.RestoreJob;
038import org.apache.hadoop.hbase.client.Admin;
039import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
040import org.apache.hadoop.hbase.client.Connection;
041import org.apache.hadoop.hbase.client.TableDescriptor;
042import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
043import org.apache.hadoop.hbase.io.HFileLink;
044import org.apache.hadoop.hbase.io.hfile.HFile;
045import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
046import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
047import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
048import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
051import org.apache.hadoop.hbase.util.FSTableDescriptors;
052import org.apache.yetus.audience.InterfaceAudience;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055
056import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
057
058/**
059 * A collection for methods used by multiple classes to restore HBase tables.
060 */
061@InterfaceAudience.Private
062public class RestoreTool {
063  public static final Logger LOG = LoggerFactory.getLogger(BackupUtils.class);
064  private final static long TABLE_AVAILABILITY_WAIT_TIME = 180000;
065
066  private final String[] ignoreDirs = { HConstants.RECOVERED_EDITS_DIR };
067  protected Configuration conf;
068  protected Path backupRootPath;
069  protected String backupId;
070  protected FileSystem fs;
071
072  // store table name and snapshot dir mapping
073  private final HashMap<TableName, Path> snapshotMap = new HashMap<>();
074
075  public RestoreTool(Configuration conf, final Path backupRootPath, final String backupId)
076      throws IOException {
077    this.conf = conf;
078    this.backupRootPath = backupRootPath;
079    this.backupId = backupId;
080    this.fs = backupRootPath.getFileSystem(conf);
081  }
082
083  /**
084   * return value represent path for:
085   * ".../user/biadmin/backup1/default/t1_dn/backup_1396650096738/archive/data/default/t1_dn"
086   * @param tableName table name
087   * @return path to table archive
088   * @throws IOException exception
089   */
090  Path getTableArchivePath(TableName tableName) throws IOException {
091    Path baseDir =
092        new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId),
093            HConstants.HFILE_ARCHIVE_DIRECTORY);
094    Path dataDir = new Path(baseDir, HConstants.BASE_NAMESPACE_DIR);
095    Path archivePath = new Path(dataDir, tableName.getNamespaceAsString());
096    Path tableArchivePath = new Path(archivePath, tableName.getQualifierAsString());
097    if (!fs.exists(tableArchivePath) || !fs.getFileStatus(tableArchivePath).isDirectory()) {
098      LOG.debug("Folder tableArchivePath: " + tableArchivePath.toString() + " does not exists");
099      tableArchivePath = null; // empty table has no archive
100    }
101    return tableArchivePath;
102  }
103
104  /**
105   * Gets region list
106   * @param tableName table name
107   * @return RegionList region list
108   * @throws IOException exception
109   */
110  ArrayList<Path> getRegionList(TableName tableName) throws IOException {
111    Path tableArchivePath = getTableArchivePath(tableName);
112    ArrayList<Path> regionDirList = new ArrayList<>();
113    FileStatus[] children = fs.listStatus(tableArchivePath);
114    for (FileStatus childStatus : children) {
115      // here child refer to each region(Name)
116      Path child = childStatus.getPath();
117      regionDirList.add(child);
118    }
119    return regionDirList;
120  }
121
122  void modifyTableSync(Connection conn, TableDescriptor desc) throws IOException {
123    try (Admin admin = conn.getAdmin()) {
124      admin.modifyTable(desc);
125      int attempt = 0;
126      int maxAttempts = 600;
127      while (!admin.isTableAvailable(desc.getTableName())) {
128        Thread.sleep(100);
129        attempt++;
130        if (attempt++ > maxAttempts) {
131          throw new IOException("Timeout expired " + (maxAttempts * 100) + "ms");
132        }
133      }
134    } catch (Exception e) {
135      throw new IOException(e);
136    }
137  }
138
139  /**
140   * During incremental backup operation. Call WalPlayer to replay WAL in backup image Currently
141   * tableNames and newTablesNames only contain single table, will be expanded to multiple tables in
142   * the future
143   * @param conn HBase connection
144   * @param tableBackupPath backup path
145   * @param logDirs : incremental backup folders, which contains WAL
146   * @param tableNames : source tableNames(table names were backuped)
147   * @param newTableNames : target tableNames(table names to be restored to)
148   * @param incrBackupId incremental backup Id
149   * @throws IOException exception
150   */
151  public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[] logDirs,
152      TableName[] tableNames, TableName[] newTableNames, String incrBackupId) throws IOException {
153    try (Admin admin = conn.getAdmin()) {
154      if (tableNames.length != newTableNames.length) {
155        throw new IOException("Number of source tables and target tables does not match!");
156      }
157      FileSystem fileSys = tableBackupPath.getFileSystem(this.conf);
158
159      // for incremental backup image, expect the table already created either by user or previous
160      // full backup. Here, check that all new tables exists
161      for (TableName tableName : newTableNames) {
162        if (!admin.tableExists(tableName)) {
163          throw new IOException("HBase table " + tableName
164              + " does not exist. Create the table first, e.g. by restoring a full backup.");
165        }
166      }
167      // adjust table schema
168      for (int i = 0; i < tableNames.length; i++) {
169        TableName tableName = tableNames[i];
170        TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, incrBackupId);
171        if (tableDescriptor == null) {
172          throw new IOException("Can't find " + tableName + "'s descriptor.");
173        }
174        LOG.debug("Found descriptor " + tableDescriptor + " through " + incrBackupId);
175
176        TableName newTableName = newTableNames[i];
177        TableDescriptor newTableDescriptor = admin.getDescriptor(newTableName);
178        List<ColumnFamilyDescriptor> families = Arrays.asList(tableDescriptor.getColumnFamilies());
179        List<ColumnFamilyDescriptor> existingFamilies =
180            Arrays.asList(newTableDescriptor.getColumnFamilies());
181        TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(newTableDescriptor);
182        boolean schemaChangeNeeded = false;
183        for (ColumnFamilyDescriptor family : families) {
184          if (!existingFamilies.contains(family)) {
185            builder.setColumnFamily(family);
186            schemaChangeNeeded = true;
187          }
188        }
189        for (ColumnFamilyDescriptor family : existingFamilies) {
190          if (!families.contains(family)) {
191            builder.removeColumnFamily(family.getName());
192            schemaChangeNeeded = true;
193          }
194        }
195        if (schemaChangeNeeded) {
196          modifyTableSync(conn, builder.build());
197          LOG.info("Changed " + newTableDescriptor.getTableName() + " to: " + newTableDescriptor);
198        }
199      }
200      RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf);
201
202      restoreService.run(logDirs, tableNames, newTableNames, false);
203    }
204  }
205
206  public void fullRestoreTable(Connection conn, Path tableBackupPath, TableName tableName,
207      TableName newTableName, boolean truncateIfExists, String lastIncrBackupId)
208          throws IOException {
209    createAndRestoreTable(conn, tableName, newTableName, tableBackupPath, truncateIfExists,
210      lastIncrBackupId);
211  }
212
213  /**
214   * Returns value represent path for path to backup table snapshot directory:
215   * "/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot"
216   * @param backupRootPath backup root path
217   * @param tableName table name
218   * @param backupId backup Id
219   * @return path for snapshot
220   */
221  Path getTableSnapshotPath(Path backupRootPath, TableName tableName, String backupId) {
222    return new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId),
223        HConstants.SNAPSHOT_DIR_NAME);
224  }
225
226  /**
227   * Returns value represent path for:
228   * ""/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot/
229   *    snapshot_1396650097621_namespace_table"
230   * this path contains .snapshotinfo, .tabledesc (0.96 and 0.98) this path contains .snapshotinfo,
231   * .data.manifest (trunk)
232   * @param tableName table name
233   * @return path to table info
234   * @throws IOException exception
235   */
236  Path getTableInfoPath(TableName tableName) throws IOException {
237    Path tableSnapShotPath = getTableSnapshotPath(backupRootPath, tableName, backupId);
238    Path tableInfoPath = null;
239
240    // can't build the path directly as the timestamp values are different
241    FileStatus[] snapshots = fs.listStatus(tableSnapShotPath,
242        new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
243    for (FileStatus snapshot : snapshots) {
244      tableInfoPath = snapshot.getPath();
245      // SnapshotManifest.DATA_MANIFEST_NAME = "data.manifest";
246      if (tableInfoPath.getName().endsWith("data.manifest")) {
247        break;
248      }
249    }
250    return tableInfoPath;
251  }
252
253  /**
254   * Get table descriptor
255   * @param tableName is the table backed up
256   * @return {@link TableDescriptor} saved in backup image of the table
257   */
258  TableDescriptor getTableDesc(TableName tableName) throws IOException {
259    Path tableInfoPath = this.getTableInfoPath(tableName);
260    SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, tableInfoPath);
261    SnapshotManifest manifest = SnapshotManifest.open(conf, fs, tableInfoPath, desc);
262    TableDescriptor tableDescriptor = manifest.getTableDescriptor();
263    if (!tableDescriptor.getTableName().equals(tableName)) {
264      LOG.error("couldn't find Table Desc for table: " + tableName + " under tableInfoPath: "
265              + tableInfoPath.toString());
266      LOG.error("tableDescriptor.getNameAsString() = "
267              + tableDescriptor.getTableName().getNameAsString());
268      throw new FileNotFoundException("couldn't find Table Desc for table: " + tableName
269          + " under tableInfoPath: " + tableInfoPath.toString());
270    }
271    return tableDescriptor;
272  }
273
274  private TableDescriptor getTableDescriptor(FileSystem fileSys, TableName tableName,
275      String lastIncrBackupId) throws IOException {
276    if (lastIncrBackupId != null) {
277      String target =
278          BackupUtils.getTableBackupDir(backupRootPath.toString(),
279            lastIncrBackupId, tableName);
280      return FSTableDescriptors.getTableDescriptorFromFs(fileSys, new Path(target));
281    }
282    return null;
283  }
284
285  private void createAndRestoreTable(Connection conn, TableName tableName, TableName newTableName,
286      Path tableBackupPath, boolean truncateIfExists, String lastIncrBackupId) throws IOException {
287    if (newTableName == null) {
288      newTableName = tableName;
289    }
290    FileSystem fileSys = tableBackupPath.getFileSystem(this.conf);
291
292    // get table descriptor first
293    TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, lastIncrBackupId);
294    if (tableDescriptor != null) {
295      LOG.debug("Retrieved descriptor: " + tableDescriptor + " thru " + lastIncrBackupId);
296    }
297
298    if (tableDescriptor == null) {
299      Path tableSnapshotPath = getTableSnapshotPath(backupRootPath, tableName, backupId);
300      if (fileSys.exists(tableSnapshotPath)) {
301        // snapshot path exist means the backup path is in HDFS
302        // check whether snapshot dir already recorded for target table
303        if (snapshotMap.get(tableName) != null) {
304          SnapshotDescription desc =
305              SnapshotDescriptionUtils.readSnapshotInfo(fileSys, tableSnapshotPath);
306          SnapshotManifest manifest = SnapshotManifest.open(conf, fileSys, tableSnapshotPath, desc);
307          tableDescriptor = manifest.getTableDescriptor();
308        } else {
309          tableDescriptor = getTableDesc(tableName);
310          snapshotMap.put(tableName, getTableInfoPath(tableName));
311        }
312        if (tableDescriptor == null) {
313          LOG.debug("Found no table descriptor in the snapshot dir, previous schema would be lost");
314        }
315      } else {
316        throw new IOException("Table snapshot directory: " +
317            tableSnapshotPath + " does not exist.");
318      }
319    }
320
321    Path tableArchivePath = getTableArchivePath(tableName);
322    if (tableArchivePath == null) {
323      if (tableDescriptor != null) {
324        // find table descriptor but no archive dir means the table is empty, create table and exit
325        if (LOG.isDebugEnabled()) {
326          LOG.debug("find table descriptor but no archive dir for table " + tableName
327              + ", will only create table");
328        }
329        tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor);
330        checkAndCreateTable(conn, tableBackupPath, tableName, newTableName, null, tableDescriptor,
331          truncateIfExists);
332        return;
333      } else {
334        throw new IllegalStateException("Cannot restore hbase table because directory '"
335            + " tableArchivePath is null.");
336      }
337    }
338
339    if (tableDescriptor == null) {
340      tableDescriptor = TableDescriptorBuilder.newBuilder(newTableName).build();
341    } else {
342      tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor);
343    }
344
345    // record all region dirs:
346    // load all files in dir
347    try {
348      ArrayList<Path> regionPathList = getRegionList(tableName);
349
350      // should only try to create the table with all region informations, so we could pre-split
351      // the regions in fine grain
352      checkAndCreateTable(conn, tableBackupPath, tableName, newTableName, regionPathList,
353        tableDescriptor, truncateIfExists);
354      RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf);
355      Path[] paths = new Path[regionPathList.size()];
356      regionPathList.toArray(paths);
357      restoreService.run(paths, new TableName[]{tableName}, new TableName[] {newTableName}, true);
358
359    } catch (Exception e) {
360      LOG.error(e.toString(), e);
361      throw new IllegalStateException("Cannot restore hbase table", e);
362    }
363  }
364
365  /**
366   * Gets region list
367   * @param tableArchivePath table archive path
368   * @return RegionList region list
369   * @throws IOException exception
370   */
371  ArrayList<Path> getRegionList(Path tableArchivePath) throws IOException {
372    ArrayList<Path> regionDirList = new ArrayList<>();
373    FileStatus[] children = fs.listStatus(tableArchivePath);
374    for (FileStatus childStatus : children) {
375      // here child refer to each region(Name)
376      Path child = childStatus.getPath();
377      regionDirList.add(child);
378    }
379    return regionDirList;
380  }
381
382  /**
383   * Calculate region boundaries and add all the column families to the table descriptor
384   * @param regionDirList region dir list
385   * @return a set of keys to store the boundaries
386   */
387  byte[][] generateBoundaryKeys(ArrayList<Path> regionDirList) throws IOException {
388    TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
389    // Build a set of keys to store the boundaries
390    // calculate region boundaries and add all the column families to the table descriptor
391    for (Path regionDir : regionDirList) {
392      LOG.debug("Parsing region dir: " + regionDir);
393      Path hfofDir = regionDir;
394
395      if (!fs.exists(hfofDir)) {
396        LOG.warn("HFileOutputFormat dir " + hfofDir + " not found");
397      }
398
399      FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
400      if (familyDirStatuses == null) {
401        throw new IOException("No families found in " + hfofDir);
402      }
403
404      for (FileStatus stat : familyDirStatuses) {
405        if (!stat.isDirectory()) {
406          LOG.warn("Skipping non-directory " + stat.getPath());
407          continue;
408        }
409        boolean isIgnore = false;
410        String pathName = stat.getPath().getName();
411        for (String ignore : ignoreDirs) {
412          if (pathName.contains(ignore)) {
413            LOG.warn("Skipping non-family directory" + pathName);
414            isIgnore = true;
415            break;
416          }
417        }
418        if (isIgnore) {
419          continue;
420        }
421        Path familyDir = stat.getPath();
422        LOG.debug("Parsing family dir [" + familyDir.toString() + " in region [" + regionDir + "]");
423        // Skip _logs, etc
424        if (familyDir.getName().startsWith("_") || familyDir.getName().startsWith(".")) {
425          continue;
426        }
427
428        // start to parse hfile inside one family dir
429        Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
430        for (Path hfile : hfiles) {
431          if (hfile.getName().startsWith("_") || hfile.getName().startsWith(".")
432              || StoreFileInfo.isReference(hfile.getName())
433              || HFileLink.isHFileLink(hfile.getName())) {
434            continue;
435          }
436          HFile.Reader reader = HFile.createReader(fs, hfile, conf);
437          final byte[] first, last;
438          try {
439            reader.loadFileInfo();
440            first = reader.getFirstRowKey().get();
441            last = reader.getLastRowKey().get();
442            LOG.debug("Trying to figure out region boundaries hfile=" + hfile + " first="
443                + Bytes.toStringBinary(first) + " last=" + Bytes.toStringBinary(last));
444
445            // To eventually infer start key-end key boundaries
446            Integer value = map.containsKey(first) ? (Integer) map.get(first) : 0;
447            map.put(first, value + 1);
448            value = map.containsKey(last) ? (Integer) map.get(last) : 0;
449            map.put(last, value - 1);
450          } finally {
451            reader.close();
452          }
453        }
454      }
455    }
456    return BulkLoadHFilesTool.inferBoundaries(map);
457  }
458
459  /**
460   * Prepare the table for bulkload, most codes copied from {@code createTable} method in
461   * {@code BulkLoadHFilesTool}.
462   * @param conn connection
463   * @param tableBackupPath path
464   * @param tableName table name
465   * @param targetTableName target table name
466   * @param regionDirList region directory list
467   * @param htd table descriptor
468   * @param truncateIfExists truncates table if exists
469   * @throws IOException exception
470   */
471  private void checkAndCreateTable(Connection conn, Path tableBackupPath, TableName tableName,
472      TableName targetTableName, ArrayList<Path> regionDirList, TableDescriptor htd,
473      boolean truncateIfExists) throws IOException {
474    try (Admin admin = conn.getAdmin()) {
475      boolean createNew = false;
476      if (admin.tableExists(targetTableName)) {
477        if (truncateIfExists) {
478          LOG.info("Truncating exising target table '" + targetTableName
479              + "', preserving region splits");
480          admin.disableTable(targetTableName);
481          admin.truncateTable(targetTableName, true);
482        } else {
483          LOG.info("Using exising target table '" + targetTableName + "'");
484        }
485      } else {
486        createNew = true;
487      }
488      if (createNew) {
489        LOG.info("Creating target table '" + targetTableName + "'");
490        byte[][] keys;
491        if (regionDirList == null || regionDirList.size() == 0) {
492          admin.createTable(htd);
493        } else {
494          keys = generateBoundaryKeys(regionDirList);
495          // create table using table descriptor and region boundaries
496          admin.createTable(htd, keys);
497        }
498
499      }
500      long startTime = EnvironmentEdgeManager.currentTime();
501      while (!admin.isTableAvailable(targetTableName)) {
502        try {
503          Thread.sleep(100);
504        } catch (InterruptedException ie) {
505          Thread.currentThread().interrupt();
506        }
507        if (EnvironmentEdgeManager.currentTime() - startTime > TABLE_AVAILABILITY_WAIT_TIME) {
508          throw new IOException("Time out " + TABLE_AVAILABILITY_WAIT_TIME + "ms expired, table "
509              + targetTableName + " is still not available");
510        }
511      }
512    }
513  }
514}