001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import static org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder.NEW_VERSION_BEHAVIOR;
021import static org.apache.hadoop.hbase.regionserver.StoreFileWriter.ENABLE_HISTORICAL_COMPACTION_FILES;
022import static org.junit.Assert.assertEquals;
023import static org.junit.Assert.assertTrue;
024
025import java.io.IOException;
026import java.util.ArrayList;
027import java.util.Arrays;
028import java.util.Collection;
029import java.util.List;
030import java.util.Random;
031import org.apache.hadoop.conf.Configuration;
032import org.apache.hadoop.hbase.Cell;
033import org.apache.hadoop.hbase.CellUtil;
034import org.apache.hadoop.hbase.HBaseClassTestRule;
035import org.apache.hadoop.hbase.HBaseTestingUtil;
036import org.apache.hadoop.hbase.KeepDeletedCells;
037import org.apache.hadoop.hbase.MemoryCompactionPolicy;
038import org.apache.hadoop.hbase.TableName;
039import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
040import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
041import org.apache.hadoop.hbase.client.Delete;
042import org.apache.hadoop.hbase.client.Put;
043import org.apache.hadoop.hbase.client.Scan;
044import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
045import org.apache.hadoop.hbase.regionserver.compactions.CompactionConfiguration;
046import org.apache.hadoop.hbase.testclassification.LargeTests;
047import org.apache.hadoop.hbase.testclassification.RegionServerTests;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.junit.After;
050import org.junit.Before;
051import org.junit.ClassRule;
052import org.junit.Test;
053import org.junit.experimental.categories.Category;
054import org.junit.runner.RunWith;
055import org.junit.runners.Parameterized;
056
057/**
058 * Store file writer does not do any compaction. Each cell written to either the live or historical
059 * file. Regular (i.e., not-raw) scans that reads the latest put cells scans only live files. To
060 * ensure the correctness of store file writer, we need to verify that live files includes all live
061 * cells. This test indirectly verify this as follows. The test creates two tables, each with one
062 * region and one store. The dual file writing (live vs historical) is configured on only one of the
063 * tables. The test generates exact set of mutations on both tables. These mutations include all
064 * types of cells and these cells are written to multiple files using multiple memstore flushes.
065 * After writing all cells, the test first verify that both tables return the same set of cells for
066 * regular and raw scans. Then the same verification is done after tables are minor and finally
067 * major compacted. The test also verifies that flushes do not generate historical files and the
068 * historical files are generated only when historical file generation is enabled (by the config
069 * hbase.enable.historical.compaction.files).
070 */
071@Category({ RegionServerTests.class, LargeTests.class })
072@RunWith(Parameterized.class)
073public class TestStoreFileWriter {
074  @ClassRule
075  public static final HBaseClassTestRule CLASS_RULE =
076    HBaseClassTestRule.forClass(TestStoreFileWriter.class);
077  private final int ROW_NUM = 100;
078  private final Random RANDOM = new Random(11);
079  private final HBaseTestingUtil testUtil = new HBaseTestingUtil();
080  private HRegion[] regions = new HRegion[2];
081  private final byte[][] qualifiers =
082    { Bytes.toBytes("0"), Bytes.toBytes("1"), Bytes.toBytes("2") };
083  // This keeps track of all cells. It is a list of rows, each row is a list of columns, each
084  // column is a list of CellInfo object
085  private ArrayList<ArrayList<ArrayList<CellInfo>>> insertedCells;
086  private TableName[] tableName = new TableName[2];
087  private final Configuration conf = testUtil.getConfiguration();
088  private int flushCount = 0;
089
090  @Parameterized.Parameter(0)
091  public KeepDeletedCells keepDeletedCells;
092  @Parameterized.Parameter(1)
093  public int maxVersions;
094  @Parameterized.Parameter(2)
095  public boolean newVersionBehavior;
096
097  @Parameterized.Parameters(name = "keepDeletedCells={0}, maxVersions={1}, newVersionBehavior={2}")
098  public static synchronized Collection<Object[]> data() {
099    return Arrays.asList(
100      new Object[][] { { KeepDeletedCells.FALSE, 1, true }, { KeepDeletedCells.FALSE, 2, false },
101        { KeepDeletedCells.FALSE, 3, true }, { KeepDeletedCells.TRUE, 1, false },
102        // { KeepDeletedCells.TRUE, 2, true }, see HBASE-28442
103        { KeepDeletedCells.TRUE, 3, false } });
104  }
105
106  // In memory representation of a cell. We only need to know timestamp and type field for our
107  // testing for cell. Please note the row for the cell is implicit in insertedCells.
108  private static class CellInfo {
109    long timestamp;
110    Cell.Type type;
111
112    CellInfo(long timestamp, Cell.Type type) {
113      this.timestamp = timestamp;
114      this.type = type;
115    }
116  }
117
118  private void createTable(int index, boolean enableDualFileWriter) throws IOException {
119    tableName[index] = TableName.valueOf(getClass().getSimpleName() + "_" + index);
120    ColumnFamilyDescriptor familyDescriptor =
121      ColumnFamilyDescriptorBuilder.newBuilder(HBaseTestingUtil.fam1).setMaxVersions(maxVersions)
122        .setKeepDeletedCells(keepDeletedCells)
123        .setValue(NEW_VERSION_BEHAVIOR, Boolean.toString(newVersionBehavior)).build();
124    TableDescriptorBuilder builder =
125      TableDescriptorBuilder.newBuilder(tableName[index]).setColumnFamily(familyDescriptor)
126        .setValue(ENABLE_HISTORICAL_COMPACTION_FILES, Boolean.toString(enableDualFileWriter));
127    testUtil.createTable(builder.build(), null);
128    regions[index] = testUtil.getMiniHBaseCluster().getRegions(tableName[index]).get(0);
129  }
130
131  @Before
132  public void setUp() throws Exception {
133    conf.setInt(CompactionConfiguration.HBASE_HSTORE_COMPACTION_MAX_KEY, 6);
134    conf.set(CompactingMemStore.COMPACTING_MEMSTORE_TYPE_KEY,
135      String.valueOf(MemoryCompactionPolicy.NONE));
136    testUtil.startMiniCluster();
137    createTable(0, false);
138    createTable(1, true);
139    insertedCells = new ArrayList<>(ROW_NUM);
140    for (int r = 0; r < ROW_NUM; r++) {
141      insertedCells.add(new ArrayList<>(qualifiers.length));
142      for (int q = 0; q < qualifiers.length; q++) {
143        insertedCells.get(r).add(new ArrayList<>(10));
144      }
145    }
146  }
147
148  @After
149  public void tearDown() throws Exception {
150    this.testUtil.shutdownMiniCluster();
151    testUtil.cleanupTestDir();
152  }
153
154  @Test
155  public void testCompactedFiles() throws Exception {
156    for (int i = 0; i < 10; i++) {
157      insertRows(ROW_NUM * maxVersions);
158      deleteRows(ROW_NUM / 8);
159      deleteRowVersions(ROW_NUM / 8);
160      deleteColumns(ROW_NUM / 8);
161      deleteColumnVersions(ROW_NUM / 8);
162      flushRegion();
163    }
164
165    verifyCells();
166
167    HStore[] stores = new HStore[2];
168
169    stores[0] = regions[0].getStore(HBaseTestingUtil.fam1);
170    assertEquals(flushCount, stores[0].getStorefilesCount());
171
172    stores[1] = regions[1].getStore(HBaseTestingUtil.fam1);
173    assertEquals(flushCount, stores[1].getStorefilesCount());
174
175    regions[0].compact(false);
176    assertEquals(flushCount - stores[0].getCompactedFiles().size() + 1,
177      stores[0].getStorefilesCount());
178
179    regions[1].compact(false);
180    // HBASE-30036 skips redundant delete markers during minor compaction, so the historical
181    // file may end up empty and not be created. The count can be +1 or +2.
182    int minorCompactedCount = stores[1].getStorefilesCount();
183    int expectedMin = flushCount - stores[1].getCompactedFiles().size() + 1;
184    int expectedMax = flushCount - stores[1].getCompactedFiles().size() + 2;
185    assertTrue(
186      "Expected store file count between " + expectedMin + " and " + expectedMax + " but was "
187        + minorCompactedCount,
188      minorCompactedCount >= expectedMin && minorCompactedCount <= expectedMax);
189
190    verifyCells();
191
192    regions[0].compact(true);
193    assertEquals(1, stores[0].getStorefilesCount());
194
195    regions[1].compact(true);
196    assertEquals(keepDeletedCells == KeepDeletedCells.FALSE ? 1 : 2,
197      stores[1].getStorefilesCount());
198
199    verifyCells();
200  }
201
202  private void verifyCells() throws Exception {
203    scanAndCompare(false);
204    scanAndCompare(true);
205  }
206
207  private void flushRegion() throws Exception {
208    regions[0].flush(true);
209    regions[1].flush(true);
210    flushCount++;
211  }
212
213  private Long getRowTimestamp(int row) {
214    Long maxTimestamp = null;
215    for (int q = 0; q < qualifiers.length; q++) {
216      int size = insertedCells.get(row).get(q).size();
217      if (size > 0) {
218        CellInfo mostRecentCellInfo = insertedCells.get(row).get(q).get(size - 1);
219        if (mostRecentCellInfo.type == Cell.Type.Put) {
220          if (maxTimestamp == null || maxTimestamp < mostRecentCellInfo.timestamp) {
221            maxTimestamp = mostRecentCellInfo.timestamp;
222          }
223        }
224      }
225    }
226    return maxTimestamp;
227  }
228
229  private long getNewTimestamp(long timestamp) throws Exception {
230    long newTimestamp = System.currentTimeMillis();
231    if (timestamp == newTimestamp) {
232      Thread.sleep(1);
233      newTimestamp = System.currentTimeMillis();
234      assertTrue(timestamp < newTimestamp);
235    }
236    return newTimestamp;
237  }
238
239  private void insertRows(int rowCount) throws Exception {
240    int row;
241    long timestamp = System.currentTimeMillis();
242    for (int r = 0; r < rowCount; r++) {
243      row = RANDOM.nextInt(ROW_NUM);
244      Put put = new Put(Bytes.toBytes(String.valueOf(row)), timestamp);
245      for (int q = 0; q < qualifiers.length; q++) {
246        put.addColumn(HBaseTestingUtil.fam1, qualifiers[q],
247          Bytes.toBytes(String.valueOf(timestamp)));
248        insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Put));
249      }
250      regions[0].put(put);
251      regions[1].put(put);
252      timestamp = getNewTimestamp(timestamp);
253    }
254  }
255
256  private void deleteRows(int rowCount) throws Exception {
257    int row;
258    for (int r = 0; r < rowCount; r++) {
259      long timestamp = System.currentTimeMillis();
260      row = RANDOM.nextInt(ROW_NUM);
261      Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)));
262      regions[0].delete(delete);
263      regions[1].delete(delete);
264      // For simplicity, the family delete markers are inserted for all columns (instead of
265      // allocating a separate column for them) in the memory representation of the data stored
266      // to HBase
267      for (int q = 0; q < qualifiers.length; q++) {
268        insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamily));
269      }
270    }
271  }
272
273  private void deleteSingleRowVersion(int row, long timestamp) throws IOException {
274    Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)));
275    delete.addFamilyVersion(HBaseTestingUtil.fam1, timestamp);
276    regions[0].delete(delete);
277    regions[1].delete(delete);
278    // For simplicity, the family delete version markers are inserted for all columns (instead of
279    // allocating a separate column for them) in the memory representation of the data stored
280    // to HBase
281    for (int q = 0; q < qualifiers.length; q++) {
282      insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteFamilyVersion));
283    }
284  }
285
286  private void deleteRowVersions(int rowCount) throws Exception {
287    int row;
288    for (int r = 0; r < rowCount; r++) {
289      row = RANDOM.nextInt(ROW_NUM);
290      Long timestamp = getRowTimestamp(row);
291      if (timestamp != null) {
292        deleteSingleRowVersion(row, timestamp);
293      }
294    }
295    // Just insert one more delete marker possibly does not delete any row version
296    row = RANDOM.nextInt(ROW_NUM);
297    deleteSingleRowVersion(row, System.currentTimeMillis());
298  }
299
300  private void deleteColumns(int rowCount) throws Exception {
301    int row;
302    for (int r = 0; r < rowCount; r++) {
303      long timestamp = System.currentTimeMillis();
304      row = RANDOM.nextInt(ROW_NUM);
305      int q = RANDOM.nextInt(qualifiers.length);
306      Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)), timestamp);
307      delete.addColumns(HBaseTestingUtil.fam1, qualifiers[q], timestamp);
308      regions[0].delete(delete);
309      regions[1].delete(delete);
310      insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.DeleteColumn));
311    }
312  }
313
314  private void deleteColumnVersions(int rowCount) throws Exception {
315    int row;
316    for (int r = 0; r < rowCount; r++) {
317      row = RANDOM.nextInt(ROW_NUM);
318      Long timestamp = getRowTimestamp(row);
319      if (timestamp != null) {
320        Delete delete = new Delete(Bytes.toBytes(String.valueOf(row)));
321        int q = RANDOM.nextInt(qualifiers.length);
322        delete.addColumn(HBaseTestingUtil.fam1, qualifiers[q], timestamp);
323        regions[0].delete(delete);
324        regions[1].delete(delete);
325        insertedCells.get(row).get(q).add(new CellInfo(timestamp, Cell.Type.Delete));
326      }
327    }
328  }
329
330  private Scan createScan(boolean raw) {
331    Scan scan = new Scan();
332    scan.readAllVersions();
333    scan.setRaw(raw);
334    return scan;
335  }
336
337  private void scanAndCompare(boolean raw) throws Exception {
338    try (RegionScanner firstRS = regions[0].getScanner(createScan(raw))) {
339      try (RegionScanner secondRS = regions[1].getScanner(createScan(raw))) {
340        boolean firstHasMore;
341        boolean secondHasMore;
342        do {
343          List<Cell> firstRowList = new ArrayList<>();
344          List<Cell> secondRowList = new ArrayList<>();
345          firstHasMore = firstRS.nextRaw(firstRowList);
346          secondHasMore = secondRS.nextRaw(secondRowList);
347          assertEquals(firstRowList.size(), secondRowList.size());
348          int size = firstRowList.size();
349          for (int i = 0; i < size; i++) {
350            Cell firstCell = firstRowList.get(i);
351            Cell secondCell = secondRowList.get(i);
352            assertTrue(CellUtil.matchingRowColumn(firstCell, secondCell));
353            assertTrue(firstCell.getType() == secondCell.getType());
354            assertTrue(
355              Bytes.equals(CellUtil.cloneValue(firstCell), CellUtil.cloneValue(firstCell)));
356          }
357        } while (firstHasMore && secondHasMore);
358        assertEquals(firstHasMore, secondHasMore);
359      }
360    }
361  }
362}