001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.backup.master; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.LinkedHashSet; 028import java.util.List; 029import java.util.Map; 030import java.util.Set; 031import org.apache.hadoop.fs.FileStatus; 032import org.apache.hadoop.fs.Path; 033import org.apache.hadoop.hbase.HBaseClassTestRule; 034import org.apache.hadoop.hbase.HRegionLocation; 035import org.apache.hadoop.hbase.ServerName; 036import org.apache.hadoop.hbase.TableName; 037import org.apache.hadoop.hbase.backup.BackupType; 038import org.apache.hadoop.hbase.backup.TestBackupBase; 039import org.apache.hadoop.hbase.backup.impl.BackupSystemTable; 040import org.apache.hadoop.hbase.backup.util.BackupBoundaries; 041import org.apache.hadoop.hbase.backup.util.BackupUtils; 042import org.apache.hadoop.hbase.client.Connection; 043import org.apache.hadoop.hbase.client.Put; 044import org.apache.hadoop.hbase.client.RegionInfo; 045import org.apache.hadoop.hbase.client.Table; 046import org.apache.hadoop.hbase.master.HMaster; 047import org.apache.hadoop.hbase.testclassification.LargeTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.apache.hadoop.hbase.util.JVMClusterUtil; 050import org.junit.BeforeClass; 051import org.junit.ClassRule; 052import org.junit.Test; 053import org.junit.experimental.categories.Category; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057@Category(LargeTests.class) 058public class TestBackupLogCleaner extends TestBackupBase { 059 060 @ClassRule 061 public static final HBaseClassTestRule CLASS_RULE = 062 HBaseClassTestRule.forClass(TestBackupLogCleaner.class); 063 064 private static final Logger LOG = LoggerFactory.getLogger(TestBackupLogCleaner.class); 065 066 // implements all test cases in 1 test since incremental full backup/ 067 // incremental backup has dependencies 068 069 @BeforeClass 070 public static void before() { 071 TEST_UTIL.getConfiguration().setLong(BackupLogCleaner.TS_BUFFER_KEY, 0); 072 } 073 074 @Test 075 public void testBackupLogCleaner() throws Exception { 076 Path backupRoot1 = new Path(BACKUP_ROOT_DIR, "root1"); 077 Path backupRoot2 = new Path(BACKUP_ROOT_DIR, "root2"); 078 079 List<TableName> tableSetFull = List.of(table1, table2, table3, table4); 080 List<TableName> tableSet14 = List.of(table1, table4); 081 List<TableName> tableSet23 = List.of(table2, table3); 082 083 try (BackupSystemTable systemTable = new BackupSystemTable(TEST_UTIL.getConnection())) { 084 // Verify that we have no backup sessions yet 085 assertFalse(systemTable.hasBackupSessions()); 086 087 BackupLogCleaner cleaner = new BackupLogCleaner(); 088 cleaner.setConf(TEST_UTIL.getConfiguration()); 089 cleaner.init(Map.of(HMaster.MASTER, TEST_UTIL.getHBaseCluster().getMaster())); 090 091 // All WAL files can be deleted because we do not have backups 092 List<FileStatus> walFilesBeforeBackup = getListOfWALFiles(TEST_UTIL.getConfiguration()); 093 Iterable<FileStatus> deletable = cleaner.getDeletableFiles(walFilesBeforeBackup); 094 assertEquals(walFilesBeforeBackup, deletable); 095 096 // Create a FULL backup B1 in backupRoot R1, containing all tables 097 String backupIdB1 = backupTables(BackupType.FULL, tableSetFull, backupRoot1.toString()); 098 assertTrue(checkSucceeded(backupIdB1)); 099 100 // As part of a backup, WALs are rolled, so we expect a new WAL file 101 Set<FileStatus> walFilesAfterB1 = 102 mergeAsSet(walFilesBeforeBackup, getListOfWALFiles(TEST_UTIL.getConfiguration())); 103 assertTrue(walFilesBeforeBackup.size() < walFilesAfterB1.size()); 104 105 // Currently, we only have backup B1, so we can delete any WAL preceding B1 106 deletable = cleaner.getDeletableFiles(walFilesAfterB1); 107 assertEquals(toSet(walFilesBeforeBackup), toSet(deletable)); 108 109 // Insert some data 110 Connection conn = TEST_UTIL.getConnection(); 111 try (Table t1 = conn.getTable(table1)) { 112 Put p1; 113 for (int i = 0; i < NB_ROWS_IN_BATCH; i++) { 114 p1 = new Put(Bytes.toBytes("row-t1" + i)); 115 p1.addColumn(famName, qualName, Bytes.toBytes("val" + i)); 116 t1.put(p1); 117 } 118 } 119 120 try (Table t2 = conn.getTable(table2)) { 121 Put p2; 122 for (int i = 0; i < 5; i++) { 123 p2 = new Put(Bytes.toBytes("row-t2" + i)); 124 p2.addColumn(famName, qualName, Bytes.toBytes("val" + i)); 125 t2.put(p2); 126 } 127 } 128 129 // Create an INCREMENTAL backup B2 in backupRoot R1, requesting tables 1 & 4. 130 // Note that incremental tables always include all tables already included in the backup root, 131 // i.e. the backup will contain all tables (1, 2, 3, 4), ignoring what we specify here. 132 LOG.debug("Creating B2"); 133 String backupIdB2 = backupTables(BackupType.INCREMENTAL, tableSet14, backupRoot1.toString()); 134 assertTrue(checkSucceeded(backupIdB2)); 135 136 // As part of a backup, WALs are rolled, so we expect a new WAL file 137 Set<FileStatus> walFilesAfterB2 = 138 mergeAsSet(walFilesAfterB1, getListOfWALFiles(TEST_UTIL.getConfiguration())); 139 assertTrue(walFilesAfterB1.size() < walFilesAfterB2.size()); 140 141 // At this point, we have backups in root R1: B1 and B2. 142 // We only consider the most recent backup (B2) to determine which WALs can be deleted: 143 // all WALs preceding B2 144 deletable = cleaner.getDeletableFiles(walFilesAfterB2); 145 assertEquals(toSet(walFilesAfterB1), toSet(deletable)); 146 147 // Create a FULL backup B3 in backupRoot R2, containing tables 1 & 4 148 LOG.debug("Creating B3"); 149 String backupIdB3 = backupTables(BackupType.FULL, tableSetFull, backupRoot2.toString()); 150 assertTrue(checkSucceeded(backupIdB3)); 151 152 // As part of a backup, WALs are rolled, so we expect a new WAL file 153 Set<FileStatus> walFilesAfterB3 = 154 mergeAsSet(walFilesAfterB2, getListOfWALFiles(TEST_UTIL.getConfiguration())); 155 assertTrue(walFilesAfterB2.size() < walFilesAfterB3.size()); 156 157 // At this point, we have backups in: 158 // root R1: B1 (timestamp=0, all tables), B2 (TS=1, all tables) 159 // root R2: B3 (TS=2, [T1, T4]) 160 // 161 // To determine the WAL-deletion boundary, we only consider the most recent backup per root, 162 // so [B2, B3]. From these, we take the least recent as WAL-deletion boundary: B2, it contains 163 // all tables, so acts as the deletion boundary. I.e. only WALs preceding B2 are deletable. 164 deletable = cleaner.getDeletableFiles(walFilesAfterB3); 165 assertEquals(toSet(walFilesAfterB1), toSet(deletable)); 166 167 // Create a FULL backup B4 in backupRoot R1, with a subset of tables 168 LOG.debug("Creating B4"); 169 String backupIdB4 = backupTables(BackupType.FULL, tableSet14, backupRoot1.toString()); 170 assertTrue(checkSucceeded(backupIdB4)); 171 172 // As part of a backup, WALs are rolled, so we expect a new WAL file 173 Set<FileStatus> walFilesAfterB4 = 174 mergeAsSet(walFilesAfterB3, getListOfWALFiles(TEST_UTIL.getConfiguration())); 175 assertTrue(walFilesAfterB3.size() < walFilesAfterB4.size()); 176 177 // At this point, we have backups in: 178 // root R1: B1 (timestamp=0, all tables), B2 (TS=1, all tables), B4 (TS=3, [T1, T4]) 179 // root R2: B3 (TS=2, [T1, T4]) 180 // 181 // To determine the WAL-deletion boundary, we only consider the most recent backup per root, 182 // so [B4, B3]. They contain the following timestamp boundaries per table: 183 // B4: { T1: 3, T2: 1, T3: 1, T4: 3 } 184 // B3: { T1: 2, T4: 2 } 185 // Taking the minimum timestamp (= 1), this means all WALs preceding B2 can be deleted. 186 deletable = cleaner.getDeletableFiles(walFilesAfterB4); 187 assertEquals(toSet(walFilesAfterB1), toSet(deletable)); 188 189 // Create a FULL backup B5 in backupRoot R1, for tables 2 & 3 190 String backupIdB5 = backupTables(BackupType.FULL, tableSet23, backupRoot1.toString()); 191 assertTrue(checkSucceeded(backupIdB5)); 192 193 // As part of a backup, WALs are rolled, so we expect a new WAL file 194 Set<FileStatus> walFilesAfterB5 = 195 mergeAsSet(walFilesAfterB4, getListOfWALFiles(TEST_UTIL.getConfiguration())); 196 assertTrue(walFilesAfterB4.size() < walFilesAfterB5.size()); 197 198 // At this point, we have backups in: 199 // root R1: ..., B2 (TS=1, all tables), B4 (TS=3, [T1, T4]), B5 (TS=4, [T2, T3]) 200 // root R2: B3 (TS=2, [T1, T4]) 201 // 202 // To determine the WAL-deletion boundary, we only consider the most recent backup per root, 203 // so [B5, B3]. They contain the following timestamp boundaries per table: 204 // B4: { T1: 3, T2: 4, T3: 4, T4: 3 } 205 // B3: { T1: 2, T4: 2 } 206 // Taking the minimum timestamp (= 2), this means all WALs preceding B3 can be deleted. 207 deletable = cleaner.getDeletableFiles(walFilesAfterB5); 208 assertEquals(toSet(walFilesAfterB2), toSet(deletable)); 209 } finally { 210 TEST_UTIL.truncateTable(BackupSystemTable.getTableName(TEST_UTIL.getConfiguration())).close(); 211 } 212 } 213 214 @Test 215 public void testDoesNotDeleteWALsFromNewServers() throws Exception { 216 Path backupRoot1 = new Path(BACKUP_ROOT_DIR, "backup1"); 217 List<TableName> tableSetFull = List.of(table1, table2, table3, table4); 218 219 JVMClusterUtil.RegionServerThread rsThread = null; 220 try (BackupSystemTable systemTable = new BackupSystemTable(TEST_UTIL.getConnection())) { 221 LOG.info("Creating initial backup B1"); 222 String backupIdB1 = backupTables(BackupType.FULL, tableSetFull, backupRoot1.toString()); 223 assertTrue(checkSucceeded(backupIdB1)); 224 225 List<FileStatus> walsAfterB1 = getListOfWALFiles(TEST_UTIL.getConfiguration()); 226 LOG.info("WALs after B1: {}", walsAfterB1.size()); 227 228 String startCodeStr = systemTable.readBackupStartCode(backupRoot1.toString()); 229 long b1StartCode = Long.parseLong(startCodeStr); 230 LOG.info("B1 startCode: {}", b1StartCode); 231 232 // Add a new RegionServer to the cluster 233 LOG.info("Adding new RegionServer to cluster"); 234 rsThread = TEST_UTIL.getMiniHBaseCluster().startRegionServer(); 235 ServerName newServerName = rsThread.getRegionServer().getServerName(); 236 LOG.info("New RegionServer started: {}", newServerName); 237 238 // Move a region to the new server to ensure it creates a WAL 239 List<RegionInfo> regions = TEST_UTIL.getAdmin().getRegions(table1); 240 RegionInfo regionToMove = regions.get(0); 241 242 LOG.info("Moving region {} to new server {}", regionToMove.getEncodedName(), newServerName); 243 TEST_UTIL.getAdmin().move(regionToMove.getEncodedNameAsBytes(), newServerName); 244 245 TEST_UTIL.waitFor(30000, () -> { 246 try { 247 HRegionLocation location = TEST_UTIL.getConnection().getRegionLocator(table1) 248 .getRegionLocation(regionToMove.getStartKey()); 249 return location.getServerName().equals(newServerName); 250 } catch (IOException e) { 251 return false; 252 } 253 }); 254 255 // Write some data to trigger WAL creation on the new server 256 try (Table t1 = TEST_UTIL.getConnection().getTable(table1)) { 257 for (int i = 0; i < 100; i++) { 258 Put p = new Put(Bytes.toBytes("newserver-row-" + i)); 259 p.addColumn(famName, qualName, Bytes.toBytes("val" + i)); 260 t1.put(p); 261 } 262 } 263 TEST_UTIL.getAdmin().flushRegion(regionToMove.getEncodedNameAsBytes()); 264 265 List<FileStatus> walsAfterNewServer = getListOfWALFiles(TEST_UTIL.getConfiguration()); 266 LOG.info("WALs after adding new server: {}", walsAfterNewServer.size()); 267 assertTrue("Should have more WALs after new server", 268 walsAfterNewServer.size() > walsAfterB1.size()); 269 270 List<FileStatus> newServerWALs = new ArrayList<>(walsAfterNewServer); 271 newServerWALs.removeAll(walsAfterB1); 272 assertFalse("Should have WALs from new server", newServerWALs.isEmpty()); 273 274 BackupLogCleaner cleaner = new BackupLogCleaner(); 275 cleaner.setConf(TEST_UTIL.getConfiguration()); 276 cleaner.init(Map.of(HMaster.MASTER, TEST_UTIL.getHBaseCluster().getMaster())); 277 278 Set<FileStatus> deletable = toSet(cleaner.getDeletableFiles(walsAfterNewServer)); 279 for (FileStatus newWAL : newServerWALs) { 280 assertFalse("WAL from new server should NOT be deletable: " + newWAL.getPath(), 281 deletable.contains(newWAL)); 282 } 283 } finally { 284 TEST_UTIL.truncateTable(BackupSystemTable.getTableName(TEST_UTIL.getConfiguration())).close(); 285 // Clean up the RegionServer we added 286 if (rsThread != null) { 287 LOG.info("Stopping the RegionServer added for test"); 288 TEST_UTIL.getMiniHBaseCluster() 289 .stopRegionServer(rsThread.getRegionServer().getServerName()); 290 TEST_UTIL.getMiniHBaseCluster() 291 .waitForRegionServerToStop(rsThread.getRegionServer().getServerName(), 30000); 292 } 293 } 294 } 295 296 @Test 297 public void testCanDeleteFileWithNewServerWALs() { 298 long backupStartCode = 1000000L; 299 // Old WAL from before the backup 300 Path oldWAL = new Path("/hbase/oldWALs/server1%2C60020%2C12345.500000"); 301 String host = BackupUtils.parseHostNameFromLogFile(oldWAL); 302 BackupBoundaries boundaries = BackupBoundaries.builder(0L) 303 .addBackupTimestamps(host, backupStartCode, backupStartCode).build(); 304 305 assertTrue("WAL older than backup should be deletable", 306 BackupLogCleaner.canDeleteFile(boundaries, oldWAL)); 307 308 // WAL from exactly at the backup boundary 309 Path boundaryWAL = new Path("/hbase/oldWALs/server1%2C60020%2C12345.1000000"); 310 assertTrue("WAL at boundary should be deletable", 311 BackupLogCleaner.canDeleteFile(boundaries, boundaryWAL)); 312 313 // WAL from a server that joined AFTER the backup 314 Path newServerWAL = new Path("/hbase/oldWALs/newserver%2C60020%2C99999.1500000"); 315 assertFalse("WAL from new server (after backup) should NOT be deletable", 316 BackupLogCleaner.canDeleteFile(boundaries, newServerWAL)); 317 } 318 319 @Test 320 public void testCleansUpHMasterWal() { 321 Path path = new Path("/hbase/MasterData/WALs/hmaster,60000,1718808578163"); 322 assertTrue(BackupLogCleaner.canDeleteFile(BackupBoundaries.builder(0L).build(), path)); 323 } 324 325 @Test 326 public void testCleansUpArchivedHMasterWal() { 327 BackupBoundaries empty = BackupBoundaries.builder(0L).build(); 328 Path normalPath = 329 new Path("/hbase/oldWALs/hmaster%2C60000%2C1716224062663.1716247552189$masterlocalwal$"); 330 assertTrue(BackupLogCleaner.canDeleteFile(empty, normalPath)); 331 332 Path masterPath = new Path( 333 "/hbase/MasterData/oldWALs/hmaster%2C60000%2C1716224062663.1716247552189$masterlocalwal$"); 334 assertTrue(BackupLogCleaner.canDeleteFile(empty, masterPath)); 335 } 336 337 private Set<FileStatus> mergeAsSet(Collection<FileStatus> toCopy, Collection<FileStatus> toAdd) { 338 Set<FileStatus> result = new LinkedHashSet<>(toCopy); 339 result.addAll(toAdd); 340 return result; 341 } 342 343 private <T> Set<T> toSet(Iterable<T> iterable) { 344 Set<T> result = new LinkedHashSet<>(); 345 iterable.forEach(result::add); 346 return result; 347 } 348}