001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.backup.util;
019
020import java.io.FileNotFoundException;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.HashMap;
025import java.util.List;
026import java.util.TreeMap;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.fs.FileStatus;
029import org.apache.hadoop.fs.FileSystem;
030import org.apache.hadoop.fs.FileUtil;
031import org.apache.hadoop.fs.Path;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.NamespaceDescriptor;
034import org.apache.hadoop.hbase.NamespaceNotFoundException;
035import org.apache.hadoop.hbase.TableName;
036import org.apache.hadoop.hbase.backup.BackupRestoreFactory;
037import org.apache.hadoop.hbase.backup.HBackupFileSystem;
038import org.apache.hadoop.hbase.backup.RestoreJob;
039import org.apache.hadoop.hbase.client.Admin;
040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
041import org.apache.hadoop.hbase.client.Connection;
042import org.apache.hadoop.hbase.client.TableDescriptor;
043import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
044import org.apache.hadoop.hbase.io.HFileLink;
045import org.apache.hadoop.hbase.io.hfile.HFile;
046import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
047import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
048import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
049import org.apache.hadoop.hbase.tool.BulkLoadHFilesTool;
050import org.apache.hadoop.hbase.util.Bytes;
051import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
052import org.apache.hadoop.hbase.util.FSTableDescriptors;
053import org.apache.yetus.audience.InterfaceAudience;
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056
057import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
058
059/**
060 * A collection for methods used by multiple classes to restore HBase tables.
061 */
062@InterfaceAudience.Private
063public class RestoreTool {
064  public static final Logger LOG = LoggerFactory.getLogger(BackupUtils.class);
065  private final static long TABLE_AVAILABILITY_WAIT_TIME = 180000;
066
067  private final String[] ignoreDirs = { HConstants.RECOVERED_EDITS_DIR };
068  protected Configuration conf;
069  protected Path backupRootPath;
070  protected String backupId;
071  protected FileSystem fs;
072
073  // store table name and snapshot dir mapping
074  private final HashMap<TableName, Path> snapshotMap = new HashMap<>();
075
076  public RestoreTool(Configuration conf, final Path backupRootPath, final String backupId)
077    throws IOException {
078    this.conf = conf;
079    this.backupRootPath = backupRootPath;
080    this.backupId = backupId;
081    this.fs = backupRootPath.getFileSystem(conf);
082  }
083
084  /**
085   * return value represent path for:
086   * ".../user/biadmin/backup1/default/t1_dn/backup_1396650096738/archive/data/default/t1_dn"
087   * @param tableName table name
088   * @return path to table archive
089   * @throws IOException exception
090   */
091  Path getTableArchivePath(TableName tableName) throws IOException {
092    Path baseDir =
093      new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId),
094        HConstants.HFILE_ARCHIVE_DIRECTORY);
095    Path dataDir = new Path(baseDir, HConstants.BASE_NAMESPACE_DIR);
096    Path archivePath = new Path(dataDir, tableName.getNamespaceAsString());
097    Path tableArchivePath = new Path(archivePath, tableName.getQualifierAsString());
098    if (!fs.exists(tableArchivePath) || !fs.getFileStatus(tableArchivePath).isDirectory()) {
099      LOG.debug("Folder tableArchivePath: " + tableArchivePath.toString() + " does not exists");
100      tableArchivePath = null; // empty table has no archive
101    }
102    return tableArchivePath;
103  }
104
105  /**
106   * Gets region list
107   * @param tableName table name
108   * @return RegionList region list
109   * @throws IOException exception
110   */
111  ArrayList<Path> getRegionList(TableName tableName) throws IOException {
112    Path tableArchivePath = getTableArchivePath(tableName);
113    ArrayList<Path> regionDirList = new ArrayList<>();
114    FileStatus[] children = fs.listStatus(tableArchivePath);
115    for (FileStatus childStatus : children) {
116      // here child refer to each region(Name)
117      Path child = childStatus.getPath();
118      regionDirList.add(child);
119    }
120    return regionDirList;
121  }
122
123  void modifyTableSync(Connection conn, TableDescriptor desc) throws IOException {
124    try (Admin admin = conn.getAdmin()) {
125      admin.modifyTable(desc);
126      int attempt = 0;
127      int maxAttempts = 600;
128      while (!admin.isTableAvailable(desc.getTableName())) {
129        Thread.sleep(100);
130        attempt++;
131        if (attempt++ > maxAttempts) {
132          throw new IOException("Timeout expired " + (maxAttempts * 100) + "ms");
133        }
134      }
135    } catch (Exception e) {
136      throw new IOException(e);
137    }
138  }
139
140  /**
141   * During incremental backup operation. Call WalPlayer to replay WAL in backup image Currently
142   * tableNames and newTablesNames only contain single table, will be expanded to multiple tables in
143   * the future
144   * @param conn            HBase connection
145   * @param tableBackupPath backup path
146   * @param logDirs         : incremental backup folders, which contains WAL
147   * @param tableNames      : source tableNames(table names were backuped)
148   * @param newTableNames   : target tableNames(table names to be restored to)
149   * @param incrBackupId    incremental backup Id
150   * @throws IOException exception
151   */
152  public void incrementalRestoreTable(Connection conn, Path tableBackupPath, Path[] logDirs,
153    TableName[] tableNames, TableName[] newTableNames, String incrBackupId) throws IOException {
154    try (Admin admin = conn.getAdmin()) {
155      if (tableNames.length != newTableNames.length) {
156        throw new IOException("Number of source tables and target tables does not match!");
157      }
158      FileSystem fileSys = tableBackupPath.getFileSystem(this.conf);
159
160      // for incremental backup image, expect the table already created either by user or previous
161      // full backup. Here, check that all new tables exists
162      for (TableName tableName : newTableNames) {
163        if (!admin.tableExists(tableName)) {
164          throw new IOException("HBase table " + tableName
165            + " does not exist. Create the table first, e.g. by restoring a full backup.");
166        }
167      }
168      // adjust table schema
169      for (int i = 0; i < tableNames.length; i++) {
170        TableName tableName = tableNames[i];
171        TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, incrBackupId);
172        if (tableDescriptor == null) {
173          throw new IOException("Can't find " + tableName + "'s descriptor.");
174        }
175        LOG.debug("Found descriptor " + tableDescriptor + " through " + incrBackupId);
176
177        TableName newTableName = newTableNames[i];
178        TableDescriptor newTableDescriptor = admin.getDescriptor(newTableName);
179        List<ColumnFamilyDescriptor> families = Arrays.asList(tableDescriptor.getColumnFamilies());
180        List<ColumnFamilyDescriptor> existingFamilies =
181          Arrays.asList(newTableDescriptor.getColumnFamilies());
182        TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(newTableDescriptor);
183        boolean schemaChangeNeeded = false;
184        for (ColumnFamilyDescriptor family : families) {
185          if (!existingFamilies.contains(family)) {
186            builder.setColumnFamily(family);
187            schemaChangeNeeded = true;
188          }
189        }
190        for (ColumnFamilyDescriptor family : existingFamilies) {
191          if (!families.contains(family)) {
192            builder.removeColumnFamily(family.getName());
193            schemaChangeNeeded = true;
194          }
195        }
196        if (schemaChangeNeeded) {
197          modifyTableSync(conn, builder.build());
198          LOG.info("Changed " + newTableDescriptor.getTableName() + " to: " + newTableDescriptor);
199        }
200      }
201      RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf);
202
203      restoreService.run(logDirs, tableNames, newTableNames, false);
204    }
205  }
206
207  public void fullRestoreTable(Connection conn, Path tableBackupPath, TableName tableName,
208    TableName newTableName, boolean truncateIfExists, String lastIncrBackupId) throws IOException {
209    createAndRestoreTable(conn, tableName, newTableName, tableBackupPath, truncateIfExists,
210      lastIncrBackupId);
211  }
212
213  /**
214   * Returns value represent path for path to backup table snapshot directory:
215   * "/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot"
216   * @param backupRootPath backup root path
217   * @param tableName      table name
218   * @param backupId       backup Id
219   * @return path for snapshot
220   */
221  Path getTableSnapshotPath(Path backupRootPath, TableName tableName, String backupId) {
222    return new Path(HBackupFileSystem.getTableBackupPath(tableName, backupRootPath, backupId),
223      HConstants.SNAPSHOT_DIR_NAME);
224  }
225
226  /**
227   * Returns value represent path for:
228   * ""/$USER/SBACKUP_ROOT/backup_id/namespace/table/.hbase-snapshot/
229   * snapshot_1396650097621_namespace_table" this path contains .snapshotinfo, .tabledesc (0.96 and
230   * 0.98) this path contains .snapshotinfo, .data.manifest (trunk)
231   * @param tableName table name
232   * @return path to table info
233   * @throws IOException exception
234   */
235  Path getTableInfoPath(TableName tableName) throws IOException {
236    Path tableSnapShotPath = getTableSnapshotPath(backupRootPath, tableName, backupId);
237    Path tableInfoPath = null;
238
239    // can't build the path directly as the timestamp values are different
240    FileStatus[] snapshots = fs.listStatus(tableSnapShotPath,
241      new SnapshotDescriptionUtils.CompletedSnaphotDirectoriesFilter(fs));
242    for (FileStatus snapshot : snapshots) {
243      tableInfoPath = snapshot.getPath();
244      // SnapshotManifest.DATA_MANIFEST_NAME = "data.manifest";
245      if (tableInfoPath.getName().endsWith("data.manifest")) {
246        break;
247      }
248    }
249    return tableInfoPath;
250  }
251
252  /**
253   * Get table descriptor
254   * @param tableName is the table backed up
255   * @return {@link TableDescriptor} saved in backup image of the table
256   */
257  TableDescriptor getTableDesc(TableName tableName) throws IOException {
258    Path tableInfoPath = this.getTableInfoPath(tableName);
259    SnapshotDescription desc = SnapshotDescriptionUtils.readSnapshotInfo(fs, tableInfoPath);
260    SnapshotManifest manifest = SnapshotManifest.open(conf, fs, tableInfoPath, desc);
261    TableDescriptor tableDescriptor = manifest.getTableDescriptor();
262    if (!tableDescriptor.getTableName().equals(tableName)) {
263      LOG.error("couldn't find Table Desc for table: " + tableName + " under tableInfoPath: "
264        + tableInfoPath.toString());
265      LOG.error(
266        "tableDescriptor.getNameAsString() = " + tableDescriptor.getTableName().getNameAsString());
267      throw new FileNotFoundException("couldn't find Table Desc for table: " + tableName
268        + " under tableInfoPath: " + tableInfoPath.toString());
269    }
270    return tableDescriptor;
271  }
272
273  private TableDescriptor getTableDescriptor(FileSystem fileSys, TableName tableName,
274    String lastIncrBackupId) throws IOException {
275    if (lastIncrBackupId != null) {
276      String target =
277        BackupUtils.getTableBackupDir(backupRootPath.toString(), lastIncrBackupId, tableName);
278      return FSTableDescriptors.getTableDescriptorFromFs(fileSys, new Path(target));
279    }
280    return null;
281  }
282
283  private void createAndRestoreTable(Connection conn, TableName tableName, TableName newTableName,
284    Path tableBackupPath, boolean truncateIfExists, String lastIncrBackupId) throws IOException {
285    if (newTableName == null) {
286      newTableName = tableName;
287    }
288    FileSystem fileSys = tableBackupPath.getFileSystem(this.conf);
289
290    // get table descriptor first
291    TableDescriptor tableDescriptor = getTableDescriptor(fileSys, tableName, lastIncrBackupId);
292    if (tableDescriptor != null) {
293      LOG.debug("Retrieved descriptor: " + tableDescriptor + " thru " + lastIncrBackupId);
294    }
295
296    if (tableDescriptor == null) {
297      Path tableSnapshotPath = getTableSnapshotPath(backupRootPath, tableName, backupId);
298      if (fileSys.exists(tableSnapshotPath)) {
299        // snapshot path exist means the backup path is in HDFS
300        // check whether snapshot dir already recorded for target table
301        if (snapshotMap.get(tableName) != null) {
302          SnapshotDescription desc =
303            SnapshotDescriptionUtils.readSnapshotInfo(fileSys, tableSnapshotPath);
304          SnapshotManifest manifest = SnapshotManifest.open(conf, fileSys, tableSnapshotPath, desc);
305          tableDescriptor = manifest.getTableDescriptor();
306        } else {
307          tableDescriptor = getTableDesc(tableName);
308          snapshotMap.put(tableName, getTableInfoPath(tableName));
309        }
310        if (tableDescriptor == null) {
311          LOG.debug("Found no table descriptor in the snapshot dir, previous schema would be lost");
312        }
313      } else {
314        throw new IOException(
315          "Table snapshot directory: " + tableSnapshotPath + " does not exist.");
316      }
317    }
318
319    Path tableArchivePath = getTableArchivePath(tableName);
320    if (tableArchivePath == null) {
321      if (tableDescriptor != null) {
322        // find table descriptor but no archive dir means the table is empty, create table and exit
323        if (LOG.isDebugEnabled()) {
324          LOG.debug("find table descriptor but no archive dir for table " + tableName
325            + ", will only create table");
326        }
327        tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor);
328        checkAndCreateTable(conn, tableBackupPath, tableName, newTableName, null, tableDescriptor,
329          truncateIfExists);
330        return;
331      } else {
332        throw new IllegalStateException(
333          "Cannot restore hbase table because directory '" + " tableArchivePath is null.");
334      }
335    }
336
337    if (tableDescriptor == null) {
338      tableDescriptor = TableDescriptorBuilder.newBuilder(newTableName).build();
339    } else {
340      tableDescriptor = TableDescriptorBuilder.copy(newTableName, tableDescriptor);
341    }
342
343    // record all region dirs:
344    // load all files in dir
345    try {
346      ArrayList<Path> regionPathList = getRegionList(tableName);
347
348      // should only try to create the table with all region informations, so we could pre-split
349      // the regions in fine grain
350      checkAndCreateTable(conn, tableBackupPath, tableName, newTableName, regionPathList,
351        tableDescriptor, truncateIfExists);
352      RestoreJob restoreService = BackupRestoreFactory.getRestoreJob(conf);
353      Path[] paths = new Path[regionPathList.size()];
354      regionPathList.toArray(paths);
355      restoreService.run(paths, new TableName[] { tableName }, new TableName[] { newTableName },
356        true);
357
358    } catch (Exception e) {
359      LOG.error(e.toString(), e);
360      throw new IllegalStateException("Cannot restore hbase table", e);
361    }
362  }
363
364  /**
365   * Gets region list
366   * @param tableArchivePath table archive path
367   * @return RegionList region list
368   * @throws IOException exception
369   */
370  ArrayList<Path> getRegionList(Path tableArchivePath) throws IOException {
371    ArrayList<Path> regionDirList = new ArrayList<>();
372    FileStatus[] children = fs.listStatus(tableArchivePath);
373    for (FileStatus childStatus : children) {
374      // here child refer to each region(Name)
375      Path child = childStatus.getPath();
376      regionDirList.add(child);
377    }
378    return regionDirList;
379  }
380
381  /**
382   * Calculate region boundaries and add all the column families to the table descriptor
383   * @param regionDirList region dir list
384   * @return a set of keys to store the boundaries
385   */
386  byte[][] generateBoundaryKeys(ArrayList<Path> regionDirList) throws IOException {
387    TreeMap<byte[], Integer> map = new TreeMap<>(Bytes.BYTES_COMPARATOR);
388    // Build a set of keys to store the boundaries
389    // calculate region boundaries and add all the column families to the table descriptor
390    for (Path regionDir : regionDirList) {
391      LOG.debug("Parsing region dir: " + regionDir);
392      Path hfofDir = regionDir;
393
394      if (!fs.exists(hfofDir)) {
395        LOG.warn("HFileOutputFormat dir " + hfofDir + " not found");
396      }
397
398      FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
399      if (familyDirStatuses == null) {
400        throw new IOException("No families found in " + hfofDir);
401      }
402
403      for (FileStatus stat : familyDirStatuses) {
404        if (!stat.isDirectory()) {
405          LOG.warn("Skipping non-directory " + stat.getPath());
406          continue;
407        }
408        boolean isIgnore = false;
409        String pathName = stat.getPath().getName();
410        for (String ignore : ignoreDirs) {
411          if (pathName.contains(ignore)) {
412            LOG.warn("Skipping non-family directory" + pathName);
413            isIgnore = true;
414            break;
415          }
416        }
417        if (isIgnore) {
418          continue;
419        }
420        Path familyDir = stat.getPath();
421        LOG.debug("Parsing family dir [" + familyDir.toString() + " in region [" + regionDir + "]");
422        // Skip _logs, etc
423        if (familyDir.getName().startsWith("_") || familyDir.getName().startsWith(".")) {
424          continue;
425        }
426
427        // start to parse hfile inside one family dir
428        Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
429        for (Path hfile : hfiles) {
430          if (
431            hfile.getName().startsWith("_") || hfile.getName().startsWith(".")
432              || StoreFileInfo.isReference(hfile.getName())
433              || HFileLink.isHFileLink(hfile.getName())
434          ) {
435            continue;
436          }
437          HFile.Reader reader = HFile.createReader(fs, hfile, conf);
438          final byte[] first, last;
439          try {
440            first = reader.getFirstRowKey().get();
441            last = reader.getLastRowKey().get();
442            LOG.debug("Trying to figure out region boundaries hfile=" + hfile + " first="
443              + Bytes.toStringBinary(first) + " last=" + Bytes.toStringBinary(last));
444
445            // To eventually infer start key-end key boundaries
446            Integer value = map.containsKey(first) ? (Integer) map.get(first) : 0;
447            map.put(first, value + 1);
448            value = map.containsKey(last) ? (Integer) map.get(last) : 0;
449            map.put(last, value - 1);
450          } finally {
451            reader.close();
452          }
453        }
454      }
455    }
456    return BulkLoadHFilesTool.inferBoundaries(map);
457  }
458
459  /**
460   * Prepare the table for bulkload, most codes copied from {@code createTable} method in
461   * {@code BulkLoadHFilesTool}.
462   * @param conn             connection
463   * @param tableBackupPath  path
464   * @param tableName        table name
465   * @param targetTableName  target table name
466   * @param regionDirList    region directory list
467   * @param htd              table descriptor
468   * @param truncateIfExists truncates table if exists
469   * @throws IOException exception
470   */
471  private void checkAndCreateTable(Connection conn, Path tableBackupPath, TableName tableName,
472    TableName targetTableName, ArrayList<Path> regionDirList, TableDescriptor htd,
473    boolean truncateIfExists) throws IOException {
474    try (Admin admin = conn.getAdmin()) {
475      boolean createNew = false;
476      if (admin.tableExists(targetTableName)) {
477        if (truncateIfExists) {
478          LOG.info(
479            "Truncating exising target table '" + targetTableName + "', preserving region splits");
480          admin.disableTable(targetTableName);
481          admin.truncateTable(targetTableName, true);
482        } else {
483          LOG.info("Using exising target table '" + targetTableName + "'");
484        }
485      } else {
486        createNew = true;
487      }
488      if (createNew) {
489        LOG.info("Creating target table '" + targetTableName + "'");
490        byte[][] keys = null;
491        try {
492          if (regionDirList == null || regionDirList.size() == 0) {
493            admin.createTable(htd);
494          } else {
495            keys = generateBoundaryKeys(regionDirList);
496            // create table using table descriptor and region boundaries
497            admin.createTable(htd, keys);
498          }
499        } catch (NamespaceNotFoundException e) {
500          LOG.warn("There was no namespace and the same will be created");
501          String namespaceAsString = targetTableName.getNamespaceAsString();
502          LOG.info("Creating target namespace '" + namespaceAsString + "'");
503          admin.createNamespace(NamespaceDescriptor.create(namespaceAsString).build());
504          if (null == keys) {
505            admin.createTable(htd);
506          } else {
507            admin.createTable(htd, keys);
508          }
509        }
510
511      }
512      long startTime = EnvironmentEdgeManager.currentTime();
513      while (!admin.isTableAvailable(targetTableName)) {
514        try {
515          Thread.sleep(100);
516        } catch (InterruptedException ie) {
517          Thread.currentThread().interrupt();
518        }
519        if (EnvironmentEdgeManager.currentTime() - startTime > TABLE_AVAILABILITY_WAIT_TIME) {
520          throw new IOException("Time out " + TABLE_AVAILABILITY_WAIT_TIME + "ms expired, table "
521            + targetTableName + " is still not available");
522        }
523      }
524    }
525  }
526}