001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder.NEW_VERSION_BEHAVIOR;
021import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES;
022import static org.junit.jupiter.api.Assertions.assertEquals;
023import static org.junit.jupiter.api.Assertions.assertTrue;
024
025import java.io.IOException;
026import java.util.ArrayList;
027import java.util.List;
028import java.util.Random;
029import java.util.stream.Stream;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.Cell;
032import org.apache.hadoop.hbase.CellUtil;
033import org.apache.hadoop.hbase.HBaseParameterizedTestTemplate;
034import org.apache.hadoop.hbase.HBaseTestingUtil;
035import org.apache.hadoop.hbase.KeepDeletedCells;
036import org.apache.hadoop.hbase.MemoryCompactionPolicy;
037import org.apache.hadoop.hbase.TableName;
038import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
039import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
040import org.apache.hadoop.hbase.client.Delete;
041import org.apache.hadoop.hbase.client.Put;
042import org.apache.hadoop.hbase.client.Scan;
043import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
044import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration;
045import org.apache.hadoop.hbase.testclassification.LargeTests;
046import org.apache.hadoop.hbase.testclassification.RegionServerTests;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.junit.jupiter.api.AfterEach;
049import org.junit.jupiter.api.BeforeEach;
050import org.junit.jupiter.api.Tag;
051import org.junit.jupiter.api.TestTemplate;
052import org.junit.jupiter.params.provider.Arguments;
053
054/**
055 * Store file writer does not do any compaction. Each cell written to either the live or historical
056 * file. Regular (i.e., not-raw) scans that reads the latest put cells scans only live files. To
057 * ensure the correctness of store file writer, we need to verify that live files includes all live
058 * cells. This test indirectly verify this as follows. The test creates two tables, each with one
059 * region and one store. The dual file writing (live vs historical) is configured on only one of the
060 * tables. The test generates exact set of mutations on both tables. These mutations include all
061 * types of cells and these cells are written to multiple files using multiple memstore flushes.
062 * After writing all cells, the test first verify that both tables return the same set of cells for
063 * regular and raw scans. Then the same verification is done after tables are minor and finally
064 * major compacted. The test also verifies that flushes do not generate historical files and the
065 * historical files are generated only when historical file generation is enabled (by the config
066 * hbase.enable.historical.compaction.files).
067 */
068@Tag(RegionServerTests.TAG)
069@Tag(LargeTests.TAG)
070@HBaseParameterizedTestTemplate(
071    name = "{index}: keepDeletedCells={0}, maxVersions={1}, newVersionBehavior={2}")
072public class TestStoreFileWriter {
073
074  private final int ROW_NUM = 100;
075  private final Random RANDOM = new Random(11);
076  private final HBaseTestingUtil testUtil = new HBaseTestingUtil();
077  private HRegion[] regions = new HRegion[2];
078  private final byte[][] qualifiers =
079    { Bytes.toBytes("0"), Bytes.toBytes("1"), Bytes.toBytes("2") };
080  // This keeps track of all cells. It is a list of rows, each row is a list of columns, each
081  // column is a list of CellInfo object
082  private ArrayList<ArrayList<ArrayList<CellInfo>>> insertedCells;
083  private TableName[] tableName = new TableName[2];
084  private final Configuration conf = testUtil.getConfiguration();
085  private int flushCount = 0;
086
087  public KeepDeletedCells keepDeletedCells;
088  public int maxVersions;
089  public boolean newVersionBehavior;
090
091  public TestStoreFileWriter(KeepDeletedCells keepDeletedCells, int maxVersions,
092    boolean newVersionBehavior) {
093    this.keepDeletedCells = keepDeletedCells;
094    this.maxVersions = maxVersions;
095    this.newVersionBehavior = newVersionBehavior;
096  }
097
098  public static synchronized Stream<Arguments> parameters() {
099    return Stream.of(Arguments.of(KeepDeletedCells.FALSE, 1, true),
100      Arguments.of(KeepDeletedCells.FALSE, 2, false), Arguments.of(KeepDeletedCells.FALSE, 3, true),
101      Arguments.of(KeepDeletedCells.TRUE, 1, false),
102      // { KeepDeletedCells.TRUE, 2, true }, see HBASE-28442
103      Arguments.of(KeepDeletedCells.TRUE, 3, false));
104  }
105
106  // In memory representation of a cell. We only need to know timestamp and type field for our
107  // testing for cell. Please note the row for the cell is implicit in insertedCells.
108  private static class CellInfo {
109    long timestamp;
110    Cell.Type type;
111
112    CellInfo(long timestamp, Cell.Type type) {
113      this.timestamp = timestamp;
114      this.type = type;
115    }
116  }
117
118  private void createTable(int index, boolean enableDualFileWriter) throws IOException {
119    tableName[index] = TableName.valueOf(getClass().getSimpleName() + "_" + index);
120    ColumnFamilyDescriptor familyDescriptor =
121      ColumnFamilyDescriptorBuilder.newBuilder(HBaseTestingUtil.fam1).setMaxVersions(maxVersions)
122        .setKeepDeletedCells(keepDeletedCells)
123        .setValue(NEW_VERSION_BEHAVIOR, Boolean.toString(newVersionBehavior)).build();
124    TableDescriptorBuilder builder =
125      TableDescriptorBuilder.newBuilder(tableName[index]).setColumnFamily(familyDescriptor)
126        .setValue(ENABLE_HISTORICAL_COMPACTION_FILES, Boolean.toString(enableDualFileWriter));
127    testUtil.createTable(builder.build(), null);
128    regions[index] = testUtil.getMiniHBaseCluster().getRegions(tableName[index]).get(0);
129  }
130
131  @BeforeEach
132  public void setUp() throws Exception {
133    conf.setInt(CompactionConfiguration.HBASE_HSTORE_COMPACTION_MAX_KEY, 6);
134    conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY,
135      String.valueOf(MemoryCompactionPolicy.NONE));
136    testUtil.startMiniCluster();
137    createTable(0, false);
138    createTable(1, true);
139    insertedCells = new ArrayList<>(ROW_NUM);
140    for (int r = 0; r < ROW_NUM; r++) {
141      insertedCells.add(new ArrayList<>(qualifiers.length));
142      for (int q = 0; q < qualifiers.length; q++) {
143        insertedCells.get(r).add(new ArrayList<>(10));
144      }
145    }
146  }
147
148  @AfterEach
149  public void tearDown() throws Exception {
150    this.testUtil.shutdownMiniCluster();
151    testUtil.cleanupTestDir();
152  }
153
154  @TestTemplate
155  public void testCompactedFiles() throws Exception {
156    for (int i = 0; i < 10; i++) {
157      insertRows(ROW_NUM * maxVersions);
158      deleteRows(ROW_NUM / 8);
159      deleteRowVersions(ROW_NUM / 8);
160      deleteColumns(ROW_NUM / 8);
161      deleteColumnVersions(ROW_NUM / 8);
162      flushRegion();
163    }
164
165    verifyCells();
166
167    HStore[] stores = new HStore[2];
168
169    stores[0] = regions[0].getStore(HBaseTestingUtil.fam1);
170    assertEquals(flushCount, stores[0].getStorefilesCount());
171
172    stores[1] = regions[1].getStore(HBaseTestingUtil.fam1);
173    assertEquals(flushCount, stores[1].getStorefilesCount());
174
175    regions[0].compact(false);
176    assertEquals(flushCount - stores[0].getCompactedFiles().size() + 1,
177      stores[0].getStorefilesCount());
178
179    regions[1].compact(false);
180    // HBASE-30036 skips redundant delete markers during minor compaction, so the historical
181    // file may end up empty and not be created. The count can be +1 or +2.
182    int minorCompactedCount = stores[1].getStorefilesCount();
183    int expectedMin = flushCount - stores[1].getCompactedFiles().size() + 1;
184    int expectedMax = flushCount - stores[1].getCompactedFiles().size() + 2;
185    assertTrue(minorCompactedCount >= expectedMin && minorCompactedCount <= expectedMax,
186      "Expected store file count between " + expectedMin + " and " + expectedMax + " but was "
187        + minorCompactedCount);
188
189    verifyCells();
190
191    regions[0].compact(true);
192    assertEquals(1, stores[0].getStorefilesCount());
193
194    regions[1].compact(true);
195    assertEquals(keepDeletedCells == KeepDeletedCells.FALSE ? 1 : 2,
196      stores[1].getStorefilesCount());
197
198    verifyCells();
199  }
200
201  private void verifyCells() throws Exception {
202    scanAndCompare(false);
203    scanAndCompare(true);
204  }
205
206  private void flushRegion() throws Exception {
207    regions[0].flush(true);
208    regions[1].flush(true);
209    flushCount++;
210  }
211
212  private Long getRowTimestamp(int row) {
213    Long maxTimestamp = null;
214    for (int q = 0; q < qualifiers.length; q++) {
215      int size = insertedCells.get(row).get(q).size();
216      if (size > 0) {
217        CellInfo mostRecentCellInfo = insertedCells.get(row).get(q).get(size - 1);
218        if (mostRecentCellInfo.type == Cell.Type.Put) {
219          if (maxTimestamp == null || maxTimestamp < mostRecentCellInfo.timestamp) {
220            maxTimestamp = mostRecentCellInfo.timestamp;
221          }
222        }
223      }
224    }
225    return maxTimestamp;
226  }
227
228  private long getNewTimestamp(long timestamp) throws Exception {
229    long newTimestamp = System.currentTimeMillis();
230    if (timestamp == newTimestamp) {
231      Thread.sleep(1);
232      newTimestamp = System.currentTimeMillis();
233      assertTrue(timestamp < newTimestamp);
234    }
235    return newTimestamp;
236  }
237
238  private void insertRows(int rowCount) throws Exception {
239    int row;
240    long timestamp = System.currentTimeMillis();
241    for (int r = 0; r < rowCount; r++) {
242      row = RANDOM.nextInt(ROW_NUM);
243      Put put = new Put(Bytes.toBytes(String.valueOf(row)), timestamp);
244      for (int q = 0; q < qualifiers.length; q++) {
245        put.addColumn(HBaseTestingUtil.fam1, qualifiers[q],
246          Bytes.toBytes(String.valueOf(timestamp)));
247        insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Put));
248      }
249      regions[0].put(put);
250      regions[1].put(put);
251      timestamp = getNewTimestamp(timestamp);
252    }
253  }
254
255  private void deleteRows(int rowCount) throws Exception {
256    int row;
257    for (int r = 0; r < rowCount; r++) {
258      long timestamp = System.currentTimeMillis();
259      row = RANDOM.nextInt(ROW_NUM);
260      Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)));
261      regions[0].delete(delete);
262      regions[1].delete(delete);
263      // For simplicity, the family delete markers are inserted for all columns (instead of
264      // allocating a separate column for them) in the memory representation of the data stored
265      // to HBase
266      for (int q = 0; q < qualifiers.length; q++) {
267        insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamily));
268      }
269    }
270  }
271
272  private void deleteSingleRowVersion(int row, long timestamp) throws IOException {
273    Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)));
274    delete.addFamilyVersion(HBaseTestingUtil.fam1, timestamp);
275    regions[0].delete(delete);
276    regions[1].delete(delete);
277    // For simplicity, the family delete version markers are inserted for all columns (instead of
278    // allocating a separate column for them) in the memory representation of the data stored
279    // to HBase
280    for (int q = 0; q < qualifiers.length; q++) {
281      insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamilyVersion));
282    }
283  }
284
285  private void deleteRowVersions(int rowCount) throws Exception {
286    int row;
287    for (int r = 0; r < rowCount; r++) {
288      row = RANDOM.nextInt(ROW_NUM);
289      Long timestamp = getRowTimestamp(row);
290      if (timestamp != null) {
291        deleteSingleRowVersion(row, timestamp);
292      }
293    }
294    // Just insert one more delete marker possibly does not delete any row version
295    row = RANDOM.nextInt(ROW_NUM);
296    deleteSingleRowVersion(row, System.currentTimeMillis());
297  }
298
299  private void deleteColumns(int rowCount) throws Exception {
300    int row;
301    for (int r = 0; r < rowCount; r++) {
302      long timestamp = System.currentTimeMillis();
303      row = RANDOM.nextInt(ROW_NUM);
304      int q = RANDOM.nextInt(qualifiers.length);
305      Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)), timestamp);
306      delete.addColumns(HBaseTestingUtil.fam1, qualifiers[q], timestamp);
307      regions[0].delete(delete);
308      regions[1].delete(delete);
309      insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteColumn));
310    }
311  }
312
313  private void deleteColumnVersions(int rowCount) throws Exception {
314    int row;
315    for (int r = 0; r < rowCount; r++) {
316      row = RANDOM.nextInt(ROW_NUM);
317      Long timestamp = getRowTimestamp(row);
318      if (timestamp != null) {
319        Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)));
320        int q = RANDOM.nextInt(qualifiers.length);
321        delete.addColumn(HBaseTestingUtil.fam1, qualifiers[q], timestamp);
322        regions[0].delete(delete);
323        regions[1].delete(delete);
324        insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Delete));
325      }
326    }
327  }
328
329  private Scan createScan(boolean raw) {
330    Scan scan = new Scan();
331    scan.readAllVersions();
332    scan.setRaw(raw);
333    return scan;
334  }
335
336  private void scanAndCompare(boolean raw) throws Exception {
337    try (RegionScanner firstRS = regions[0].getScanner(createScan(raw))) {
338      try (RegionScanner secondRS = regions[1].getScanner(createScan(raw))) {
339        boolean firstHasMore;
340        boolean secondHasMore;
341        do {
342          List<Cell> firstRowList = new ArrayList<>();
343          List<Cell> secondRowList = new ArrayList<>();
344          firstHasMore = firstRS.nextRaw(firstRowList);
345          secondHasMore = secondRS.nextRaw(secondRowList);
346          assertEquals(firstRowList.size(), secondRowList.size());
347          int size = firstRowList.size();
348          for (int i = 0; i < size; i++) {
349            Cell firstCell = firstRowList.get(i);
350            Cell secondCell = secondRowList.get(i);
351            assertTrue(CellUtil.matchingRowColumn(firstCell, secondCell));
352            assertTrue(firstCell.getType() == secondCell.getType());
353            assertTrue(
354              Bytes.equals(CellUtil.cloneValue(firstCell), CellUtil.cloneValue(firstCell)));
355          }
356        } while (firstHasMore && secondHasMore);
357        assertEquals(firstHasMore, secondHasMore);
358      }
359    }
360  }
361}