001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder.NEW_VERSION_BEHAVIOR; 021import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES; 022import static org.junit.Assert.assertEquals; 023import static org.junit.Assert.assertTrue; 024 025import java.io.IOException; 026import java.util.ArrayList; 027import java.util.Arrays; 028import java.util.Collection; 029import java.util.List; 030import java.util.Random; 031import org.apache.hadoop.conf.Configuration; 032import org.apache.hadoop.hbase.Cell; 033import org.apache.hadoop.hbase.CellUtil; 034import org.apache.hadoop.hbase.HBaseClassTestRule; 035import org.apache.hadoop.hbase.HBaseTestingUtil; 036import org.apache.hadoop.hbase.KeepDeletedCells; 037import org.apache.hadoop.hbase.MemoryCompactionPolicy; 038import org.apache.hadoop.hbase.TableName; 039import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 041import org.apache.hadoop.hbase.client.Delete; 042import org.apache.hadoop.hbase.client.Put; 043import org.apache.hadoop.hbase.client.Scan; 044import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 045import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration; 046import org.apache.hadoop.hbase.testclassification.LargeTests; 047import org.apache.hadoop.hbase.testclassification.RegionServerTests; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.junit.After; 050import org.junit.Before; 051import org.junit.ClassRule; 052import org.junit.Test; 053import org.junit.experimental.categories.Category; 054import org.junit.runner.RunWith; 055import org.junit.runners.Parameterized; 056 057/** 058 * Store file writer does not do any compaction. Each cell written to either the live or historical 059 * file. Regular (i.e., not-raw) scans that reads the latest put cells scans only live files. To 060 * ensure the correctness of store file writer, we need to verify that live files includes all live 061 * cells. This test indirectly verify this as follows. The test creates two tables, each with one 062 * region and one store. The dual file writing (live vs historical) is configured on only one of the 063 * tables. The test generates exact set of mutations on both tables. These mutations include all 064 * types of cells and these cells are written to multiple files using multiple memstore flushes. 065 * After writing all cells, the test first verify that both tables return the same set of cells for 066 * regular and raw scans. Then the same verification is done after tables are minor and finally 067 * major compacted. The test also verifies that flushes do not generate historical files and the 068 * historical files are generated only when historical file generation is enabled (by the config 069 * hbase.enable.historical.compaction.files). 070 */ 071@Category({ RegionServerTests.class, LargeTests.class }) 072@RunWith(Parameterized.class) 073public class TestStoreFileWriter { 074 @ClassRule 075 public static final HBaseClassTestRule CLASS_RULE = 076 HBaseClassTestRule.forClass(TestStoreFileWriter.class); 077 private final int ROW_NUM = 100; 078 private final Random RANDOM = new Random(11); 079 private final HBaseTestingUtil testUtil = new HBaseTestingUtil(); 080 private HRegion[] regions = new HRegion[2]; 081 private final byte[][] qualifiers = 082 { Bytes.toBytes("0"), Bytes.toBytes("1"), Bytes.toBytes("2") }; 083 // This keeps track of all cells. It is a list of rows, each row is a list of columns, each 084 // column is a list of CellInfo object 085 private ArrayList<ArrayList<ArrayList<CellInfo>>> insertedCells; 086 private TableName[] tableName = new TableName[2]; 087 private final Configuration conf = testUtil.getConfiguration(); 088 private int flushCount = 0; 089 090 @Parameterized.Parameter(0) 091 public KeepDeletedCells keepDeletedCells; 092 @Parameterized.Parameter(1) 093 public int maxVersions; 094 @Parameterized.Parameter(2) 095 public boolean newVersionBehavior; 096 097 @Parameterized.Parameters(name = "keepDeletedCells={0}, maxVersions={1}, newVersionBehavior={2}") 098 public static synchronized Collection<Object[]> data() { 099 return Arrays.asList( 100 new Object[][] { { KeepDeletedCells.FALSE, 1, true }, { KeepDeletedCells.FALSE, 2, false }, 101 { KeepDeletedCells.FALSE, 3, true }, { KeepDeletedCells.TRUE, 1, false }, 102 // { KeepDeletedCells.TRUE, 2, true }, see HBASE-28442 103 { KeepDeletedCells.TRUE, 3, false } }); 104 } 105 106 // In memory representation of a cell. We only need to know timestamp and type field for our 107 // testing for cell. Please note the row for the cell is implicit in insertedCells. 108 private static class CellInfo { 109 long timestamp; 110 Cell.Type type; 111 112 CellInfo(long timestamp, Cell.Type type) { 113 this.timestamp = timestamp; 114 this.type = type; 115 } 116 } 117 118 private void createTable(int index, boolean enableDualFileWriter) throws IOException { 119 tableName[index] = TableName.valueOf(getClass().getSimpleName() + "_" + index); 120 ColumnFamilyDescriptor familyDescriptor = 121 ColumnFamilyDescriptorBuilder.newBuilder(HBaseTestingUtil.fam1).setMaxVersions(maxVersions) 122 .setKeepDeletedCells(keepDeletedCells) 123 .setValue(NEW_VERSION_BEHAVIOR, Boolean.toString(newVersionBehavior)).build(); 124 TableDescriptorBuilder builder = 125 TableDescriptorBuilder.newBuilder(tableName[index]).setColumnFamily(familyDescriptor) 126 .setValue(ENABLE_HISTORICAL_COMPACTION_FILES, Boolean.toString(enableDualFileWriter)); 127 testUtil.createTable(builder.build(), null); 128 regions[index] = testUtil.getMiniHBaseCluster().getRegions(tableName[index]).get(0); 129 } 130 131 @Before 132 public void setUp() throws Exception { 133 conf.setInt(CompactionConfiguration.HBASE_HSTORE_COMPACTION_MAX_KEY, 6); 134 conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY, 135 String.valueOf(MemoryCompactionPolicy.NONE)); 136 testUtil.startMiniCluster(); 137 createTable(0, false); 138 createTable(1, true); 139 insertedCells = new ArrayList<>(ROW_NUM); 140 for (int r = 0; r < ROW_NUM; r++) { 141 insertedCells.add(new ArrayList<>(qualifiers.length)); 142 for (int q = 0; q < qualifiers.length; q++) { 143 insertedCells.get(r).add(new ArrayList<>(10)); 144 } 145 } 146 } 147 148 @After 149 public void tearDown() throws Exception { 150 this.testUtil.shutdownMiniCluster(); 151 testUtil.cleanupTestDir(); 152 } 153 154 @Test 155 public void testCompactedFiles() throws Exception { 156 for (int i = 0; i < 10; i++) { 157 insertRows(ROW_NUM * maxVersions); 158 deleteRows(ROW_NUM / 8); 159 deleteRowVersions(ROW_NUM / 8); 160 deleteColumns(ROW_NUM / 8); 161 deleteColumnVersions(ROW_NUM / 8); 162 flushRegion(); 163 } 164 165 verifyCells(); 166 167 HStore[] stores = new HStore[2]; 168 169 stores[0] = regions[0].getStore(HBaseTestingUtil.fam1); 170 assertEquals(flushCount, stores[0].getStorefilesCount()); 171 172 stores[1] = regions[1].getStore(HBaseTestingUtil.fam1); 173 assertEquals(flushCount, stores[1].getStorefilesCount()); 174 175 regions[0].compact(false); 176 assertEquals(flushCount - stores[0].getCompactedFiles().size() + 1, 177 stores[0].getStorefilesCount()); 178 179 regions[1].compact(false); 180 // HBASE-30036 skips redundant delete markers during minor compaction, so the historical 181 // file may end up empty and not be created. The count can be +1 or +2. 182 int minorCompactedCount = stores[1].getStorefilesCount(); 183 int expectedMin = flushCount - stores[1].getCompactedFiles().size() + 1; 184 int expectedMax = flushCount - stores[1].getCompactedFiles().size() + 2; 185 assertTrue( 186 "Expected store file count between " + expectedMin + " and " + expectedMax + " but was " 187 + minorCompactedCount, 188 minorCompactedCount >= expectedMin && minorCompactedCount <= expectedMax); 189 190 verifyCells(); 191 192 regions[0].compact(true); 193 assertEquals(1, stores[0].getStorefilesCount()); 194 195 regions[1].compact(true); 196 assertEquals(keepDeletedCells == KeepDeletedCells.FALSE ? 1 : 2, 197 stores[1].getStorefilesCount()); 198 199 verifyCells(); 200 } 201 202 private void verifyCells() throws Exception { 203 scanAndCompare(false); 204 scanAndCompare(true); 205 } 206 207 private void flushRegion() throws Exception { 208 regions[0].flush(true); 209 regions[1].flush(true); 210 flushCount++; 211 } 212 213 private Long getRowTimestamp(int row) { 214 Long maxTimestamp = null; 215 for (int q = 0; q < qualifiers.length; q++) { 216 int size = insertedCells.get(row).get(q).size(); 217 if (size > 0) { 218 CellInfo mostRecentCellInfo = insertedCells.get(row).get(q).get(size - 1); 219 if (mostRecentCellInfo.type == Cell.Type.Put) { 220 if (maxTimestamp == null || maxTimestamp < mostRecentCellInfo.timestamp) { 221 maxTimestamp = mostRecentCellInfo.timestamp; 222 } 223 } 224 } 225 } 226 return maxTimestamp; 227 } 228 229 private long getNewTimestamp(long timestamp) throws Exception { 230 long newTimestamp = System.currentTimeMillis(); 231 if (timestamp == newTimestamp) { 232 Thread.sleep(1); 233 newTimestamp = System.currentTimeMillis(); 234 assertTrue(timestamp < newTimestamp); 235 } 236 return newTimestamp; 237 } 238 239 private void insertRows(int rowCount) throws Exception { 240 int row; 241 long timestamp = System.currentTimeMillis(); 242 for (int r = 0; r < rowCount; r++) { 243 row = RANDOM.nextInt(ROW_NUM); 244 Put put = new Put(Bytes.toBytes(String.valueOf(row)), timestamp); 245 for (int q = 0; q < qualifiers.length; q++) { 246 put.addColumn(HBaseTestingUtil.fam1, qualifiers[q], 247 Bytes.toBytes(String.valueOf(timestamp))); 248 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Put)); 249 } 250 regions[0].put(put); 251 regions[1].put(put); 252 timestamp = getNewTimestamp(timestamp); 253 } 254 } 255 256 private void deleteRows(int rowCount) throws Exception { 257 int row; 258 for (int r = 0; r < rowCount; r++) { 259 long timestamp = System.currentTimeMillis(); 260 row = RANDOM.nextInt(ROW_NUM); 261 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); 262 regions[0].delete(delete); 263 regions[1].delete(delete); 264 // For simplicity, the family delete markers are inserted for all columns (instead of 265 // allocating a separate column for them) in the memory representation of the data stored 266 // to HBase 267 for (int q = 0; q < qualifiers.length; q++) { 268 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamily)); 269 } 270 } 271 } 272 273 private void deleteSingleRowVersion(int row, long timestamp) throws IOException { 274 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); 275 delete.addFamilyVersion(HBaseTestingUtil.fam1, timestamp); 276 regions[0].delete(delete); 277 regions[1].delete(delete); 278 // For simplicity, the family delete version markers are inserted for all columns (instead of 279 // allocating a separate column for them) in the memory representation of the data stored 280 // to HBase 281 for (int q = 0; q < qualifiers.length; q++) { 282 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamilyVersion)); 283 } 284 } 285 286 private void deleteRowVersions(int rowCount) throws Exception { 287 int row; 288 for (int r = 0; r < rowCount; r++) { 289 row = RANDOM.nextInt(ROW_NUM); 290 Long timestamp = getRowTimestamp(row); 291 if (timestamp != null) { 292 deleteSingleRowVersion(row, timestamp); 293 } 294 } 295 // Just insert one more delete marker possibly does not delete any row version 296 row = RANDOM.nextInt(ROW_NUM); 297 deleteSingleRowVersion(row, System.currentTimeMillis()); 298 } 299 300 private void deleteColumns(int rowCount) throws Exception { 301 int row; 302 for (int r = 0; r < rowCount; r++) { 303 long timestamp = System.currentTimeMillis(); 304 row = RANDOM.nextInt(ROW_NUM); 305 int q = RANDOM.nextInt(qualifiers.length); 306 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)), timestamp); 307 delete.addColumns(HBaseTestingUtil.fam1, qualifiers[q], timestamp); 308 regions[0].delete(delete); 309 regions[1].delete(delete); 310 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteColumn)); 311 } 312 } 313 314 private void deleteColumnVersions(int rowCount) throws Exception { 315 int row; 316 for (int r = 0; r < rowCount; r++) { 317 row = RANDOM.nextInt(ROW_NUM); 318 Long timestamp = getRowTimestamp(row); 319 if (timestamp != null) { 320 Delete delete = new Delete(Bytes.toBytes(String.valueOf(row))); 321 int q = RANDOM.nextInt(qualifiers.length); 322 delete.addColumn(HBaseTestingUtil.fam1, qualifiers[q], timestamp); 323 regions[0].delete(delete); 324 regions[1].delete(delete); 325 insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Delete)); 326 } 327 } 328 } 329 330 private Scan createScan(boolean raw) { 331 Scan scan = new Scan(); 332 scan.readAllVersions(); 333 scan.setRaw(raw); 334 return scan; 335 } 336 337 private void scanAndCompare(boolean raw) throws Exception { 338 try (RegionScanner firstRS = regions[0].getScanner(createScan(raw))) { 339 try (RegionScanner secondRS = regions[1].getScanner(createScan(raw))) { 340 boolean firstHasMore; 341 boolean secondHasMore; 342 do { 343 List<Cell> firstRowList = new ArrayList<>(); 344 List<Cell> secondRowList = new ArrayList<>(); 345 firstHasMore = firstRS.nextRaw(firstRowList); 346 secondHasMore = secondRS.nextRaw(secondRowList); 347 assertEquals(firstRowList.size(), secondRowList.size()); 348 int size = firstRowList.size(); 349 for (int i = 0; i < size; i++) { 350 Cell firstCell = firstRowList.get(i); 351 Cell secondCell = secondRowList.get(i); 352 assertTrue(CellUtil.matchingRowColumn(firstCell, secondCell)); 353 assertTrue(firstCell.getType() == secondCell.getType()); 354 assertTrue( 355 Bytes.equals(CellUtil.cloneValue(firstCell), CellUtil.cloneValue(firstCell))); 356 } 357 } while (firstHasMore && secondHasMore); 358 assertEquals(firstHasMore, secondHasMore); 359 } 360 } 361 } 362}