001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder.NEW_VERSION_BEHAVIOR; 021import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES; 022import static org.junit.jupiter.api.Assertions.assertEquals; 023import static org.junit.jupiter.api.Assertions.assertTrue; 024 025import java.io.IOException; 026import java.util.ArrayList; 027import java.util.List; 028import java.util.Random; 029import java.util.stream.Stream; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.hbase.Cell; 032import org.apache.hadoop.hbase.CellUtil; 033import org.apache.hadoop.hbase.HBaseParameterizedTestTemplate; 034import org.apache.hadoop.hbase.HBaseTestingUtil; 035import org.apache.hadoop.hbase.KeepDeletedCells; 036import org.apache.hadoop.hbase.MemoryCompactionPolicy; 037import org.apache.hadoop.hbase.TableName; 038import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 039import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 040import org.apache.hadoop.hbase.client.Delete; 041import org.apache.hadoop.hbase.client.Put; 042import org.apache.hadoop.hbase.client.Scan; 043import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 044import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration; 045import org.apache.hadoop.hbase.testclassification.LargeTests; 046import org.apache.hadoop.hbase.testclassification.RegionServerTests; 047import org.apache.hadoop.hbase.util.Bytes; 048import org.junit.jupiter.api.AfterEach; 049import org.junit.jupiter.api.BeforeEach; 050import org.junit.jupiter.api.Tag; 051import org.junit.jupiter.api.TestTemplate; 052import org.junit.jupiter.params.provider.Arguments; 053 054/** 055 * Store file writer does not do any compaction. Each cell written to either the live or historical 056 * file. Regular (i.e., not-raw) scans that reads the latest put cells scans only live files. To 057 * ensure the correctness of store file writer, we need to verify that live files includes all live 058 * cells. This test indirectly verify this as follows. The test creates two tables, each with one 059 * region and one store. The dual file writing (live vs historical) is configured on only one of the 060 * tables. The test generates exact set of mutations on both tables. These mutations include all 061 * types of cells and these cells are written to multiple files using multiple memstore flushes. 062 * After writing all cells, the test first verify that both tables return the same set of cells for 063 * regular and raw scans. Then the same verification is done after tables are minor and finally 064 * major compacted. The test also verifies that flushes do not generate historical files and the 065 * historical files are generated only when historical file generation is enabled (by the config 066 * hbase.enable.historical.compaction.files). 067 */ 068@Tag(RegionServerTests.TAG) 069@Tag(LargeTests.TAG) 070@HBaseParameterizedTestTemplate( 071 name = "{index}: keepDeletedCells={0}, maxVersions={1}, newVersionBehavior={2}") 072public class TestStoreFileWriter { 073 074 private final int ROW_NUM = 100; 075 private final Random RANDOM = new Random(11); 076 private final HBaseTestingUtil testUtil = new HBaseTestingUtil(); 077 private HRegion[] regions = new HRegion[2]; 078 private final byte[][] qualifiers = 079 { Bytes.toBytes("0"), Bytes.toBytes("1"), Bytes.toBytes("2") }; 080 // This keeps track of all cells. It is a list of rows, each row is a list of columns, each 081 // column is a list of CellInfo object 082 private ArrayList<ArrayList<ArrayList<CellInfo>>> insertedCells; 083 private TableName[] tableName = new TableName[2]; 084 private final Configuration conf = testUtil.getConfiguration(); 085 private int flushCount = 0; 086 087 public KeepDeletedCells keepDeletedCells; 088 public int maxVersions; 089 public boolean newVersionBehavior; 090 091 public TestStoreFileWriter(KeepDeletedCells keepDeletedCells, int maxVersions, 092 boolean newVersionBehavior) { 093 this.keepDeletedCells = keepDeletedCells; 094 this.maxVersions = maxVersions; 095 this.newVersionBehavior = newVersionBehavior; 096 } 097 098 public static synchronized Stream<Arguments> parameters() { 099 return Stream.of(Arguments.of(KeepDeletedCells.FALSE, 1, true), 100 Arguments.of(KeepDeletedCells.FALSE, 2, false), Arguments.of(KeepDeletedCells.FALSE, 3, true), 101 Arguments.of(KeepDeletedCells.TRUE, 1, false), 102 // { KeepDeletedCells.TRUE, 2, true }, see HBASE-28442 103 Arguments.of(KeepDeletedCells.TRUE, 3, false)); 104 } 105 106 // In memory representation of a cell. We only need to know timestamp and type field for our 107 // testing for cell. Please note the row for the cell is implicit in insertedCells. 108 private static class CellInfo { 109 long timestamp; 110 Cell.Type type; 111 112 CellInfo(long timestamp, Cell.Type type) { 113 this.timestamp = timestamp; 114 this.type = type; 115 } 116 } 117 118 private void createTable(int index, boolean enableDualFileWriter) throws IOException { 119 tableName[index] = TableName.valueOf(getClass().getSimpleName() + "_" + index); 120 ColumnFamilyDescriptor familyDescriptor = 121 ColumnFamilyDescriptorBuilder.newBuilder(HBaseTestingUtil.fam1).setMaxVersions(maxVersions) 122 .setKeepDeletedCells(keepDeletedCells) 123 .setValue(NEW_VERSION_BEHAVIOR, Boolean.toString(newVersionBehavior)).build(); 124 TableDescriptorBuilder builder = 125 TableDescriptorBuilder.newBuilder(tableName[index]).setColumnFamily(familyDescriptor) 126 .setValue(ENABLE_HISTORICAL_COMPACTION_FILES, Boolean.toString(enableDualFileWriter)); 127 testUtil.createTable(builder.build(), null); 128 regions[index] = testUtil.getMiniHBaseCluster().getRegions(tableName[index]).get(0); 129 } 130 131 @BeforeEach 132 public void setUp() throws Exception { 133 conf.setInt(CompactionConfiguration.HBASE_HSTORE_COMPACTION_MAX_KEY, 6); 134 conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY, 135 String.valueOf(MemoryCompactionPolicy.NONE)); 136 testUtil.startMiniCluster(); 137 createTable(0, false); 138 createTable(1, true); 139 insertedCells = new ArrayList<>(ROW_NUM); 140 for (int r = 0; r < ROW_NUM; r++) { 141 insertedCells.add(new ArrayList<>(qualifiers.length)); 142 for (int q = 0; q < qualifiers.length; q++) { 143 insertedCells.get(r).add(new ArrayList<>(10)); 144 } 145 } 146 } 147 148 @AfterEach 149 public void tearDown() throws Exception { 150 this.testUtil.shutdownMiniCluster(); 151 testUtil.cleanupTestDir(); 152 } 153 154 @TestTemplate 155 public void testCompactedFiles() throws Exception { 156 for (int i = 0; i < 10; i++) { 157 insertRows(ROW_NUM * maxVersions); 158 deleteRows(ROW_NUM / 8); 159 deleteRowVersions(ROW_NUM / 8); 160 deleteColumns(ROW_NUM / 8); 161 deleteColumnVersions(ROW_NUM / 8); 162 flushRegion(); 163 } 164 165 verifyCells(); 166 167 HStore[] stores = new HStore[2]; 168 169 stores[0] = regions[0].getStore(HBaseTestingUtil.fam1); 170 assertEquals(flushCount, stores[0].getStorefilesCount()); 171 172 stores[1] = regions[1].getStore(HBaseTestingUtil.fam1); 173 assertEquals(flushCount, stores[1].getStorefilesCount()); 174 175 regions[0].compact(false); 176 assertEquals(flushCount - stores[0].getCompactedFiles().size() + 1, 177 stores[0].getStorefilesCount()); 178 179 regions[1].compact(false); 180 // HBASE-30036 skips redundant delete markers during minor compaction, so the historical 181 // file may end up empty and not be created. The count can be +1 or +2. 182 int minorCompactedCount = stores[1].getStorefilesCount(); 183 int expectedMin = flushCount - stores[1].getCompactedFiles().size() + 1; 184 int expectedMax = flushCount - stores[1].getCompactedFiles().size() + 2; 185 assertTrue(minorCompactedCount >= expectedMin && minorCompactedCount <= expectedMax, 186 "Expected store file count between " + expectedMin + " and " + expectedMax + " but was " 187 + minorCompactedCount); 188 189 verifyCells(); 190 191 regions[0].compact(true); 192 assertEquals(1, stores[0].getStorefilesCount()); 193 194 regions[1].compact(true); 195 assertEquals(keepDeletedCells == KeepDeletedCells.FALSE ? 1 : 2, 196 stores[1].getStorefilesCount()); 197 198 verifyCells(); 199 } 200 201 private void verifyCells() throws Exception { 202 scanAndCompare(false); 203 scanAndCompare(true); 204 } 205 206 private void flushRegion() throws Exception { 207 regions[0].flush(true); 208 regions[1].flush(true); 209 flushCount++; 210 } 211 212 private Long getRowTimestamp(int row) { 213 Long maxTimestamp = null; 214 for (int q = 0; q < qualifiers.length; q++) { 215 int size = insertedCells.get(row).get(q).size(); 216 if (size > 0) { 217 CellInfo mostRecentCellInfo = insertedCells.get(row).get(q).get(size - 1); 218 if (mostRecentCellInfo.type == Cell.Type.Put) { 219 if (maxTimestamp == null || maxTimestamp < mostRecentCellInfo.timestamp) { 220 maxTimestamp = mostRecentCellInfo.timestamp; 221 } 222 } 223 } 224 } 225 return maxTimestamp; 226 } 227 228 private long getNewTimestamp(long timestamp) throws Exception { 229 long newTimestamp = System.currentTimeMillis(); 230 if (timestamp == newTimestamp) { 231 Thread.sleep(1); 232 newTimestamp = System.currentTimeMillis(); 233 assertTrue(timestamp < newTimestamp); 234 } 235 return newTimestamp; 236 } 237 238 private void insertRows(int rowCount) throws Exception { 239 int row; 240 long timestamp = System.currentTimeMillis(); 241 for (int r = 0; r < rowCount; r++) { 242 row = RANDOM.nextInt(ROW_NUM); 243 Put put = new Put(Bytes.toBytes(String.valueOf(row)), timestamp); 244 for (int q = 0; q < qualifiers.length; q++) { 245 put.addColumn(HBaseTestingUtil.fam1, qualifiers[q], 246 Bytes.toBytes(String.valueOf(timestamp))); 247 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Put)); 248 } 249 regions[0].put(put); 250 regions[1].put(put); 251 timestamp = getNewTimestamp(timestamp); 252 } 253 } 254 255 private void deleteRows(int rowCount) throws Exception { 256 int row; 257 for (int r = 0; r < rowCount; r++) { 258 long timestamp = System.currentTimeMillis(); 259 row = RANDOM.nextInt(ROW_NUM); 260 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); 261 regions[0].delete(delete); 262 regions[1].delete(delete); 263 // For simplicity, the family delete markers are inserted for all columns (instead of 264 // allocating a separate column for them) in the memory representation of the data stored 265 // to HBase 266 for (int q = 0; q < qualifiers.length; q++) { 267 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamily)); 268 } 269 } 270 } 271 272 private void deleteSingleRowVersion(int row, long timestamp) throws IOException { 273 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); 274 delete.addFamilyVersion(HBaseTestingUtil.fam1, timestamp); 275 regions[0].delete(delete); 276 regions[1].delete(delete); 277 // For simplicity, the family delete version markers are inserted for all columns (instead of 278 // allocating a separate column for them) in the memory representation of the data stored 279 // to HBase 280 for (int q = 0; q < qualifiers.length; q++) { 281 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamilyVersion)); 282 } 283 } 284 285 private void deleteRowVersions(int rowCount) throws Exception { 286 int row; 287 for (int r = 0; r < rowCount; r++) { 288 row = RANDOM.nextInt(ROW_NUM); 289 Long timestamp = getRowTimestamp(row); 290 if (timestamp != null) { 291 deleteSingleRowVersion(row, timestamp); 292 } 293 } 294 // Just insert one more delete marker possibly does not delete any row version 295 row = RANDOM.nextInt(ROW_NUM); 296 deleteSingleRowVersion(row, System.currentTimeMillis()); 297 } 298 299 private void deleteColumns(int rowCount) throws Exception { 300 int row; 301 for (int r = 0; r < rowCount; r++) { 302 long timestamp = System.currentTimeMillis(); 303 row = RANDOM.nextInt(ROW_NUM); 304 int q = RANDOM.nextInt(qualifiers.length); 305 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)), timestamp); 306 delete.addColumns(HBaseTestingUtil.fam1, qualifiers[q], timestamp); 307 regions[0].delete(delete); 308 regions[1].delete(delete); 309 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteColumn)); 310 } 311 } 312 313 private void deleteColumnVersions(int rowCount) throws Exception { 314 int row; 315 for (int r = 0; r < rowCount; r++) { 316 row = RANDOM.nextInt(ROW_NUM); 317 Long timestamp = getRowTimestamp(row); 318 if (timestamp != null) { 319 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); 320 int q = RANDOM.nextInt(qualifiers.length); 321 delete.addColumn(HBaseTestingUtil.fam1, qualifiers[q], timestamp); 322 regions[0].delete(delete); 323 regions[1].delete(delete); 324 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Delete)); 325 } 326 } 327 } 328 329 private Scan createScan(boolean raw) { 330 Scan scan = new Scan(); 331 scan.readAllVersions(); 332 scan.setRaw(raw); 333 return scan; 334 } 335 336 private void scanAndCompare(boolean raw) throws Exception { 337 try (RegionScanner firstRS = regions[0].getScanner(createScan(raw))) { 338 try (RegionScanner secondRS = regions[1].getScanner(createScan(raw))) { 339 boolean firstHasMore; 340 boolean secondHasMore; 341 do { 342 List<Cell> firstRowList = new ArrayList<>(); 343 List<Cell> secondRowList = new ArrayList<>(); 344 firstHasMore = firstRS.nextRaw(firstRowList); 345 secondHasMore = secondRS.nextRaw(secondRowList); 346 assertEquals(firstRowList.size(), secondRowList.size()); 347 int size = firstRowList.size(); 348 for (int i = 0; i < size; i++) { 349 Cell firstCell = firstRowList.get(i); 350 Cell secondCell = secondRowList.get(i); 351 assertTrue(CellUtil.matchingRowColumn(firstCell, secondCell)); 352 assertTrue(firstCell.getType() == secondCell.getType()); 353 assertTrue( 354 Bytes.equals(CellUtil.cloneValue(firstCell), CellUtil.cloneValue(firstCell))); 355 } 356 } while (firstHasMore && secondHasMore); 357 assertEquals(firstHasMore, secondHasMore); 358 } 359 } 360 } 361}