001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.backup.master;
019
020import static org.junit.Assert.assertEquals;
021import static org.junit.Assert.assertFalse;
022import static org.junit.Assert.assertTrue;
023
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.Collection;
027import java.util.LinkedHashSet;
028import java.util.List;
029import java.util.Map;
030import java.util.Set;
031import org.apache.hadoop.fs.FileStatus;
032import org.apache.hadoop.fs.Path;
033import org.apache.hadoop.hbase.HBaseClassTestRule;
034import org.apache.hadoop.hbase.HRegionLocation;
035import org.apache.hadoop.hbase.ServerName;
036import org.apache.hadoop.hbase.TableName;
037import org.apache.hadoop.hbase.backup.BackupType;
038import org.apache.hadoop.hbase.backup.TestBackupBase;
039import org.apache.hadoop.hbase.backup.impl.BackupSystemTable;
040import org.apache.hadoop.hbase.backup.util.BackupBoundaries;
041import org.apache.hadoop.hbase.backup.util.BackupUtils;
042import org.apache.hadoop.hbase.client.Connection;
043import org.apache.hadoop.hbase.client.Put;
044import org.apache.hadoop.hbase.client.RegionInfo;
045import org.apache.hadoop.hbase.client.Table;
046import org.apache.hadoop.hbase.master.HMaster;
047import org.apache.hadoop.hbase.testclassification.LargeTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.JVMClusterUtil;
050import org.junit.BeforeClass;
051import org.junit.ClassRule;
052import org.junit.Test;
053import org.junit.experimental.categories.Category;
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056
057@Category(LargeTests.class)
058public class TestBackupLogCleaner extends TestBackupBase {
059
060  @ClassRule
061  public static final HBaseClassTestRule CLASS_RULE =
062    HBaseClassTestRule.forClass(TestBackupLogCleaner.class);
063
064  private static final Logger LOG = LoggerFactory.getLogger(TestBackupLogCleaner.class);
065
066  // implements all test cases in 1 test since incremental full backup/
067  // incremental backup has dependencies
068
069  @BeforeClass
070  public static void before() {
071    TEST_UTIL.getConfiguration().setLong(BackupLogCleaner.TS_BUFFER_KEY, 0);
072  }
073
074  @Test
075  public void testBackupLogCleaner() throws Exception {
076    Path backupRoot1 = new Path(BACKUP_ROOT_DIR, "root1");
077    Path backupRoot2 = new Path(BACKUP_ROOT_DIR, "root2");
078
079    List<TableName> tableSetFull = List.of(table1, table2, table3, table4);
080    List<TableName> tableSet14 = List.of(table1, table4);
081    List<TableName> tableSet23 = List.of(table2, table3);
082
083    try (BackupSystemTable systemTable = new BackupSystemTable(TEST_UTIL.getConnection())) {
084      // Verify that we have no backup sessions yet
085      assertFalse(systemTable.hasBackupSessions());
086
087      BackupLogCleaner cleaner = new BackupLogCleaner();
088      cleaner.setConf(TEST_UTIL.getConfiguration());
089      cleaner.init(Map.of(HMaster.MASTER, TEST_UTIL.getHBaseCluster().getMaster()));
090
091      // All WAL files can be deleted because we do not have backups
092      List<FileStatus> walFilesBeforeBackup = getListOfWALFiles(TEST_UTIL.getConfiguration());
093      Iterable<FileStatus> deletable = cleaner.getDeletableFiles(walFilesBeforeBackup);
094      assertEquals(walFilesBeforeBackup, deletable);
095
096      // Create a FULL backup B1 in backupRoot R1, containing all tables
097      String backupIdB1 = backupTables(BackupType.FULL, tableSetFull, backupRoot1.toString());
098      assertTrue(checkSucceeded(backupIdB1));
099
100      // As part of a backup, WALs are rolled, so we expect a new WAL file
101      Set<FileStatus> walFilesAfterB1 =
102        mergeAsSet(walFilesBeforeBackup, getListOfWALFiles(TEST_UTIL.getConfiguration()));
103      assertTrue(walFilesBeforeBackup.size() < walFilesAfterB1.size());
104
105      // Currently, we only have backup B1, so we can delete any WAL preceding B1
106      deletable = cleaner.getDeletableFiles(walFilesAfterB1);
107      assertEquals(toSet(walFilesBeforeBackup), toSet(deletable));
108
109      // Insert some data
110      Connection conn = TEST_UTIL.getConnection();
111      try (Table t1 = conn.getTable(table1)) {
112        Put p1;
113        for (int i = 0; i < NB_ROWS_IN_BATCH; i++) {
114          p1 = new Put(Bytes.toBytes("row-t1" + i));
115          p1.addColumn(famName, qualName, Bytes.toBytes("val" + i));
116          t1.put(p1);
117        }
118      }
119
120      try (Table t2 = conn.getTable(table2)) {
121        Put p2;
122        for (int i = 0; i < 5; i++) {
123          p2 = new Put(Bytes.toBytes("row-t2" + i));
124          p2.addColumn(famName, qualName, Bytes.toBytes("val" + i));
125          t2.put(p2);
126        }
127      }
128
129      // Create an INCREMENTAL backup B2 in backupRoot R1, requesting tables 1 & 4.
130      // Note that incremental tables always include all tables already included in the backup root,
131      // i.e. the backup will contain all tables (1, 2, 3, 4), ignoring what we specify here.
132      LOG.debug("Creating B2");
133      String backupIdB2 = backupTables(BackupType.INCREMENTAL, tableSet14, backupRoot1.toString());
134      assertTrue(checkSucceeded(backupIdB2));
135
136      // As part of a backup, WALs are rolled, so we expect a new WAL file
137      Set<FileStatus> walFilesAfterB2 =
138        mergeAsSet(walFilesAfterB1, getListOfWALFiles(TEST_UTIL.getConfiguration()));
139      assertTrue(walFilesAfterB1.size() < walFilesAfterB2.size());
140
141      // At this point, we have backups in root R1: B1 and B2.
142      // We only consider the most recent backup (B2) to determine which WALs can be deleted:
143      // all WALs preceding B2
144      deletable = cleaner.getDeletableFiles(walFilesAfterB2);
145      assertEquals(toSet(walFilesAfterB1), toSet(deletable));
146
147      // Create a FULL backup B3 in backupRoot R2, containing tables 1 & 4
148      LOG.debug("Creating B3");
149      String backupIdB3 = backupTables(BackupType.FULL, tableSetFull, backupRoot2.toString());
150      assertTrue(checkSucceeded(backupIdB3));
151
152      // As part of a backup, WALs are rolled, so we expect a new WAL file
153      Set<FileStatus> walFilesAfterB3 =
154        mergeAsSet(walFilesAfterB2, getListOfWALFiles(TEST_UTIL.getConfiguration()));
155      assertTrue(walFilesAfterB2.size() < walFilesAfterB3.size());
156
157      // At this point, we have backups in:
158      // root R1: B1 (timestamp=0, all tables), B2 (TS=1, all tables)
159      // root R2: B3 (TS=2, [T1, T4])
160      //
161      // To determine the WAL-deletion boundary, we only consider the most recent backup per root,
162      // so [B2, B3]. From these, we take the least recent as WAL-deletion boundary: B2, it contains
163      // all tables, so acts as the deletion boundary. I.e. only WALs preceding B2 are deletable.
164      deletable = cleaner.getDeletableFiles(walFilesAfterB3);
165      assertEquals(toSet(walFilesAfterB1), toSet(deletable));
166
167      // Create a FULL backup B4 in backupRoot R1, with a subset of tables
168      LOG.debug("Creating B4");
169      String backupIdB4 = backupTables(BackupType.FULL, tableSet14, backupRoot1.toString());
170      assertTrue(checkSucceeded(backupIdB4));
171
172      // As part of a backup, WALs are rolled, so we expect a new WAL file
173      Set<FileStatus> walFilesAfterB4 =
174        mergeAsSet(walFilesAfterB3, getListOfWALFiles(TEST_UTIL.getConfiguration()));
175      assertTrue(walFilesAfterB3.size() < walFilesAfterB4.size());
176
177      // At this point, we have backups in:
178      // root R1: B1 (timestamp=0, all tables), B2 (TS=1, all tables), B4 (TS=3, [T1, T4])
179      // root R2: B3 (TS=2, [T1, T4])
180      //
181      // To determine the WAL-deletion boundary, we only consider the most recent backup per root,
182      // so [B4, B3]. They contain the following timestamp boundaries per table:
183      // B4: { T1: 3, T2: 1, T3: 1, T4: 3 }
184      // B3: { T1: 2, T4: 2 }
185      // Taking the minimum timestamp (= 1), this means all WALs preceding B2 can be deleted.
186      deletable = cleaner.getDeletableFiles(walFilesAfterB4);
187      assertEquals(toSet(walFilesAfterB1), toSet(deletable));
188
189      // Create a FULL backup B5 in backupRoot R1, for tables 2 & 3
190      String backupIdB5 = backupTables(BackupType.FULL, tableSet23, backupRoot1.toString());
191      assertTrue(checkSucceeded(backupIdB5));
192
193      // As part of a backup, WALs are rolled, so we expect a new WAL file
194      Set<FileStatus> walFilesAfterB5 =
195        mergeAsSet(walFilesAfterB4, getListOfWALFiles(TEST_UTIL.getConfiguration()));
196      assertTrue(walFilesAfterB4.size() < walFilesAfterB5.size());
197
198      // At this point, we have backups in:
199      // root R1: ..., B2 (TS=1, all tables), B4 (TS=3, [T1, T4]), B5 (TS=4, [T2, T3])
200      // root R2: B3 (TS=2, [T1, T4])
201      //
202      // To determine the WAL-deletion boundary, we only consider the most recent backup per root,
203      // so [B5, B3]. They contain the following timestamp boundaries per table:
204      // B4: { T1: 3, T2: 4, T3: 4, T4: 3 }
205      // B3: { T1: 2, T4: 2 }
206      // Taking the minimum timestamp (= 2), this means all WALs preceding B3 can be deleted.
207      deletable = cleaner.getDeletableFiles(walFilesAfterB5);
208      assertEquals(toSet(walFilesAfterB2), toSet(deletable));
209    } finally {
210      TEST_UTIL.truncateTable(BackupSystemTable.getTableName(TEST_UTIL.getConfiguration())).close();
211    }
212  }
213
214  @Test
215  public void testDoesNotDeleteWALsFromNewServers() throws Exception {
216    Path backupRoot1 = new Path(BACKUP_ROOT_DIR, "backup1");
217    List<TableName> tableSetFull = List.of(table1, table2, table3, table4);
218
219    JVMClusterUtil.RegionServerThread rsThread = null;
220    try (BackupSystemTable systemTable = new BackupSystemTable(TEST_UTIL.getConnection())) {
221      LOG.info("Creating initial backup B1");
222      String backupIdB1 = backupTables(BackupType.FULL, tableSetFull, backupRoot1.toString());
223      assertTrue(checkSucceeded(backupIdB1));
224
225      List<FileStatus> walsAfterB1 = getListOfWALFiles(TEST_UTIL.getConfiguration());
226      LOG.info("WALs after B1: {}", walsAfterB1.size());
227
228      String startCodeStr = systemTable.readBackupStartCode(backupRoot1.toString());
229      long b1StartCode = Long.parseLong(startCodeStr);
230      LOG.info("B1 startCode: {}", b1StartCode);
231
232      // Add a new RegionServer to the cluster
233      LOG.info("Adding new RegionServer to cluster");
234      rsThread = TEST_UTIL.getMiniHBaseCluster().startRegionServer();
235      ServerName newServerName = rsThread.getRegionServer().getServerName();
236      LOG.info("New RegionServer started: {}", newServerName);
237
238      // Move a region to the new server to ensure it creates a WAL
239      List<RegionInfo> regions = TEST_UTIL.getAdmin().getRegions(table1);
240      RegionInfo regionToMove = regions.get(0);
241
242      LOG.info("Moving region {} to new server {}", regionToMove.getEncodedName(), newServerName);
243      TEST_UTIL.getAdmin().move(regionToMove.getEncodedNameAsBytes(), newServerName);
244
245      TEST_UTIL.waitFor(30000, () -> {
246        try {
247          HRegionLocation location = TEST_UTIL.getConnection().getRegionLocator(table1)
248            .getRegionLocation(regionToMove.getStartKey());
249          return location.getServerName().equals(newServerName);
250        } catch (IOException e) {
251          return false;
252        }
253      });
254
255      // Write some data to trigger WAL creation on the new server
256      try (Table t1 = TEST_UTIL.getConnection().getTable(table1)) {
257        for (int i = 0; i < 100; i++) {
258          Put p = new Put(Bytes.toBytes("newserver-row-" + i));
259          p.addColumn(famName, qualName, Bytes.toBytes("val" + i));
260          t1.put(p);
261        }
262      }
263      TEST_UTIL.getAdmin().flushRegion(regionToMove.getEncodedNameAsBytes());
264
265      List<FileStatus> walsAfterNewServer = getListOfWALFiles(TEST_UTIL.getConfiguration());
266      LOG.info("WALs after adding new server: {}", walsAfterNewServer.size());
267      assertTrue("Should have more WALs after new server",
268        walsAfterNewServer.size() > walsAfterB1.size());
269
270      List<FileStatus> newServerWALs = new ArrayList<>(walsAfterNewServer);
271      newServerWALs.removeAll(walsAfterB1);
272      assertFalse("Should have WALs from new server", newServerWALs.isEmpty());
273
274      BackupLogCleaner cleaner = new BackupLogCleaner();
275      cleaner.setConf(TEST_UTIL.getConfiguration());
276      cleaner.init(Map.of(HMaster.MASTER, TEST_UTIL.getHBaseCluster().getMaster()));
277
278      Set<FileStatus> deletable = toSet(cleaner.getDeletableFiles(walsAfterNewServer));
279      for (FileStatus newWAL : newServerWALs) {
280        assertFalse("WAL from new server should NOT be deletable: " + newWAL.getPath(),
281          deletable.contains(newWAL));
282      }
283    } finally {
284      TEST_UTIL.truncateTable(BackupSystemTable.getTableName(TEST_UTIL.getConfiguration())).close();
285      // Clean up the RegionServer we added
286      if (rsThread != null) {
287        LOG.info("Stopping the RegionServer added for test");
288        TEST_UTIL.getMiniHBaseCluster()
289          .stopRegionServer(rsThread.getRegionServer().getServerName());
290        TEST_UTIL.getMiniHBaseCluster()
291          .waitForRegionServerToStop(rsThread.getRegionServer().getServerName(), 30000);
292      }
293    }
294  }
295
296  @Test
297  public void testCanDeleteFileWithNewServerWALs() {
298    long backupStartCode = 1000000L;
299    // Old WAL from before the backup
300    Path oldWAL = new Path("/hbase/oldWALs/server1%2C60020%2C12345.500000");
301    String host = BackupUtils.parseHostNameFromLogFile(oldWAL);
302    BackupBoundaries boundaries = BackupBoundaries.builder(0L)
303      .addBackupTimestamps(host, backupStartCode, backupStartCode).build();
304
305    assertTrue("WAL older than backup should be deletable",
306      BackupLogCleaner.canDeleteFile(boundaries, oldWAL));
307
308    // WAL from exactly at the backup boundary
309    Path boundaryWAL = new Path("/hbase/oldWALs/server1%2C60020%2C12345.1000000");
310    assertTrue("WAL at boundary should be deletable",
311      BackupLogCleaner.canDeleteFile(boundaries, boundaryWAL));
312
313    // WAL from a server that joined AFTER the backup
314    Path newServerWAL = new Path("/hbase/oldWALs/newserver%2C60020%2C99999.1500000");
315    assertFalse("WAL from new server (after backup) should NOT be deletable",
316      BackupLogCleaner.canDeleteFile(boundaries, newServerWAL));
317  }
318
319  @Test
320  public void testCleansUpHMasterWal() {
321    Path path = new Path("/hbase/MasterData/WALs/hmaster,60000,1718808578163");
322    assertTrue(BackupLogCleaner.canDeleteFile(BackupBoundaries.builder(0L).build(), path));
323  }
324
325  @Test
326  public void testCleansUpArchivedHMasterWal() {
327    BackupBoundaries empty = BackupBoundaries.builder(0L).build();
328    Path normalPath =
329      new Path("/hbase/oldWALs/hmaster%2C60000%2C1716224062663.1716247552189$masterlocalwal$");
330    assertTrue(BackupLogCleaner.canDeleteFile(empty, normalPath));
331
332    Path masterPath = new Path(
333      "/hbase/MasterData/oldWALs/hmaster%2C60000%2C1716224062663.1716247552189$masterlocalwal$");
334    assertTrue(BackupLogCleaner.canDeleteFile(empty, masterPath));
335  }
336
337  private Set<FileStatus> mergeAsSet(Collection<FileStatus> toCopy, Collection<FileStatus> toAdd) {
338    Set<FileStatus> result = new LinkedHashSet<>(toCopy);
339    result.addAll(toAdd);
340    return result;
341  }
342
343  private <T> Set<T> toSet(Iterable<T> iterable) {
344    Set<T> result = new LinkedHashSet<>();
345    iterable.forEach(result::add);
346    return result;
347  }
348}