001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import static org.junit.Assert.assertEquals; 021import static org.junit.Assert.assertFalse; 022import static org.junit.Assert.assertTrue; 023 024import java.io.IOException; 025import java.util.ArrayList; 026import java.util.Collection; 027import java.util.Collections; 028import java.util.HashMap; 029import java.util.HashSet; 030import java.util.List; 031import java.util.Map; 032import java.util.Random; 033import java.util.Set; 034import java.util.TreeSet; 035import java.util.concurrent.ThreadLocalRandom; 036import org.apache.hadoop.hbase.Cell; 037import org.apache.hadoop.hbase.CellComparatorImpl; 038import org.apache.hadoop.hbase.CellUtil; 039import org.apache.hadoop.hbase.HBaseTestingUtility; 040import org.apache.hadoop.hbase.KeyValue; 041import org.apache.hadoop.hbase.KeyValueTestUtil; 042import org.apache.hadoop.hbase.PrivateCellUtil; 043import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 044import org.apache.hadoop.hbase.client.Delete; 045import org.apache.hadoop.hbase.client.Put; 046import org.apache.hadoop.hbase.client.Scan; 047import org.apache.hadoop.hbase.io.compress.Compression; 048import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; 049import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory; 050import org.apache.hadoop.hbase.util.BloomFilterUtil; 051import org.apache.hadoop.hbase.util.Bytes; 052import org.junit.Test; 053import org.junit.runners.Parameterized.Parameter; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057/** 058 * Tests optimized scanning of multiple columns. <br> 059 * We separated the big test into several sub-class UT, because When in ROWCOL bloom type, we will 060 * test the row-col bloom filter frequently for saving HDFS seek once we switch from one column to 061 * another in our UT. It's cpu time consuming (~45s for each case), so moved the ROWCOL case into a 062 * separated LargeTests to avoid timeout failure. <br> 063 * <br> 064 * To be clear: In TestMultiColumnScanner, we will flush 10 (NUM_FLUSHES=10) HFiles here, and the 065 * table will put ~1000 cells (rows=20, ts=6, qualifiers=8, total=20*6*8 ~ 1000) . Each full table 066 * scan will check the ROWCOL bloom filter 20 (rows)* 8 (column) * 10 (hfiles)= 1600 times, beside 067 * it will scan the full table 6*2^8=1536 times, so finally will have 1600*1536=2457600 bloom filter 068 * testing. (See HBASE-21520) 069 */ 070public abstract class TestMultiColumnScanner { 071 072 private static final Logger LOG = LoggerFactory.getLogger(TestMultiColumnScanner.class); 073 074 private static final String TABLE_NAME = TestMultiColumnScanner.class.getSimpleName(); 075 076 static final int MAX_VERSIONS = 50; 077 078 private static final String FAMILY = "CF"; 079 private static final byte[] FAMILY_BYTES = Bytes.toBytes(FAMILY); 080 081 /** 082 * The size of the column qualifier set used. Increasing this parameter exponentially increases 083 * test time. 084 */ 085 private static final int NUM_COLUMNS = 8; 086 087 private static final int MAX_COLUMN_BIT_MASK = 1 << NUM_COLUMNS - 1; 088 private static final int NUM_FLUSHES = 10; 089 private static final int NUM_ROWS = 20; 090 091 /** A large value of type long for use as a timestamp */ 092 private static final long BIG_LONG = 9111222333444555666L; 093 094 /** 095 * Timestamps to test with. Cannot use {@link Long#MAX_VALUE} here, because it will be replaced by 096 * an timestamp auto-generated based on the time. 097 */ 098 private static final long[] TIMESTAMPS = 099 new long[] { 1, 3, 5, Integer.MAX_VALUE, BIG_LONG, Long.MAX_VALUE - 1 }; 100 101 /** The probability that a column is skipped in a store file. */ 102 private static final double COLUMN_SKIP_IN_STORE_FILE_PROB = 0.7; 103 104 /** The probability to delete a row/column pair */ 105 private static final double DELETE_PROBABILITY = 0.02; 106 107 private final static HBaseTestingUtility TEST_UTIL = HBaseTestingUtility.createLocalHTU(); 108 109 @Parameter(0) 110 public Compression.Algorithm comprAlgo; 111 112 @Parameter(1) 113 public BloomType bloomType; 114 115 @Parameter(2) 116 public DataBlockEncoding dataBlockEncoding; 117 118 // Some static sanity-checking. 119 static { 120 assertTrue(BIG_LONG > 0.9 * Long.MAX_VALUE); // Guard against typos. 121 122 // Ensure TIMESTAMPS are sorted. 123 for (int i = 0; i < TIMESTAMPS.length - 1; ++i) 124 assertTrue(TIMESTAMPS[i] < TIMESTAMPS[i + 1]); 125 } 126 127 public static Collection<Object[]> generateParams(Compression.Algorithm algo, 128 boolean useDataBlockEncoding) { 129 List<Object[]> parameters = new ArrayList<>(); 130 for (BloomType bloomType : BloomType.values()) { 131 DataBlockEncoding dataBlockEncoding = 132 useDataBlockEncoding ? DataBlockEncoding.PREFIX : DataBlockEncoding.NONE; 133 parameters.add(new Object[] { algo, bloomType, dataBlockEncoding }); 134 } 135 return parameters; 136 } 137 138 @Test 139 public void testMultiColumnScanner() throws IOException { 140 TEST_UTIL.getConfiguration().setInt(BloomFilterUtil.PREFIX_LENGTH_KEY, 10); 141 HRegion region = TEST_UTIL.createTestRegion(TABLE_NAME, 142 ColumnFamilyDescriptorBuilder.newBuilder(FAMILY_BYTES).setCompressionType(comprAlgo) 143 .setBloomFilterType(bloomType).setMaxVersions(MAX_VERSIONS) 144 .setDataBlockEncoding(dataBlockEncoding).build(), 145 BlockCacheFactory.createBlockCache(TEST_UTIL.getConfiguration())); 146 List<String> rows = sequentialStrings("row", NUM_ROWS); 147 List<String> qualifiers = sequentialStrings("qual", NUM_COLUMNS); 148 List<KeyValue> kvs = new ArrayList<>(); 149 Set<String> keySet = new HashSet<>(); 150 151 // A map from <row>_<qualifier> to the most recent delete timestamp for 152 // that column. 153 Map<String, Long> lastDelTimeMap = new HashMap<>(); 154 155 Random rand = ThreadLocalRandom.current(); 156 for (int iFlush = 0; iFlush < NUM_FLUSHES; ++iFlush) { 157 for (String qual : qualifiers) { 158 // This is where we decide to include or not include this column into 159 // this store file, regardless of row and timestamp. 160 if (rand.nextDouble() < COLUMN_SKIP_IN_STORE_FILE_PROB) continue; 161 162 byte[] qualBytes = Bytes.toBytes(qual); 163 for (String row : rows) { 164 Put p = new Put(Bytes.toBytes(row)); 165 for (long ts : TIMESTAMPS) { 166 String value = createValue(row, qual, ts); 167 KeyValue kv = KeyValueTestUtil.create(row, FAMILY, qual, ts, value); 168 assertEquals(kv.getTimestamp(), ts); 169 p.add(kv); 170 String keyAsString = kv.toString(); 171 if (!keySet.contains(keyAsString)) { 172 keySet.add(keyAsString); 173 kvs.add(kv); 174 } 175 } 176 region.put(p); 177 178 Delete d = new Delete(Bytes.toBytes(row)); 179 boolean deletedSomething = false; 180 for (long ts : TIMESTAMPS) 181 if (rand.nextDouble() < DELETE_PROBABILITY) { 182 d.addColumns(FAMILY_BYTES, qualBytes, ts); 183 String rowAndQual = row + "_" + qual; 184 Long whenDeleted = lastDelTimeMap.get(rowAndQual); 185 lastDelTimeMap.put(rowAndQual, whenDeleted == null ? ts : Math.max(ts, whenDeleted)); 186 deletedSomething = true; 187 } 188 if (deletedSomething) region.delete(d); 189 } 190 } 191 region.flush(true); 192 } 193 194 Collections.sort(kvs, CellComparatorImpl.COMPARATOR); 195 for (int maxVersions = 1; maxVersions <= TIMESTAMPS.length; ++maxVersions) { 196 for (int columnBitMask = 1; columnBitMask <= MAX_COLUMN_BIT_MASK; ++columnBitMask) { 197 Scan scan = new Scan(); 198 scan.readVersions(maxVersions); 199 Set<String> qualSet = new TreeSet<>(); 200 { 201 int columnMaskTmp = columnBitMask; 202 for (String qual : qualifiers) { 203 if ((columnMaskTmp & 1) != 0) { 204 scan.addColumn(FAMILY_BYTES, Bytes.toBytes(qual)); 205 qualSet.add(qual); 206 } 207 columnMaskTmp >>= 1; 208 } 209 assertEquals(0, columnMaskTmp); 210 } 211 212 InternalScanner scanner = region.getScanner(scan); 213 List<Cell> results = new ArrayList<>(); 214 215 int kvPos = 0; 216 int numResults = 0; 217 String queryInfo = "columns queried: " + qualSet + " (columnBitMask=" + columnBitMask 218 + "), maxVersions=" + maxVersions; 219 220 while (scanner.next(results) || results.size() > 0) { 221 for (Cell kv : results) { 222 while ( 223 kvPos < kvs.size() 224 && !matchesQuery(kvs.get(kvPos), qualSet, maxVersions, lastDelTimeMap) 225 ) { 226 ++kvPos; 227 } 228 String rowQual = getRowQualStr(kv); 229 String deleteInfo = ""; 230 Long lastDelTS = lastDelTimeMap.get(rowQual); 231 if (lastDelTS != null) { 232 deleteInfo = 233 "; last timestamp when row/column " + rowQual + " was deleted: " + lastDelTS; 234 } 235 assertTrue( 236 "Scanner returned additional key/value: " + kv + ", " + queryInfo + deleteInfo + ";", 237 kvPos < kvs.size()); 238 assertTrue("Scanner returned wrong key/value; " + queryInfo + deleteInfo + ";", 239 PrivateCellUtil.equalsIgnoreMvccVersion(kvs.get(kvPos), (kv))); 240 ++kvPos; 241 ++numResults; 242 } 243 results.clear(); 244 } 245 for (; kvPos < kvs.size(); ++kvPos) { 246 KeyValue remainingKV = kvs.get(kvPos); 247 assertFalse( 248 "Matching column not returned by scanner: " + remainingKV + ", " + queryInfo 249 + ", results returned: " + numResults, 250 matchesQuery(remainingKV, qualSet, maxVersions, lastDelTimeMap)); 251 } 252 } 253 } 254 assertTrue("This test is supposed to delete at least some row/column " + "pairs", 255 lastDelTimeMap.size() > 0); 256 LOG.info("Number of row/col pairs deleted at least once: " + lastDelTimeMap.size()); 257 HBaseTestingUtility.closeRegionAndWAL(region); 258 } 259 260 private static String getRowQualStr(Cell kv) { 261 String rowStr = Bytes.toString(CellUtil.cloneRow(kv)); 262 String qualStr = Bytes.toString(CellUtil.cloneQualifier(kv)); 263 return rowStr + "_" + qualStr; 264 } 265 266 private static boolean matchesQuery(KeyValue kv, Set<String> qualSet, int maxVersions, 267 Map<String, Long> lastDelTimeMap) { 268 Long lastDelTS = lastDelTimeMap.get(getRowQualStr(kv)); 269 long ts = kv.getTimestamp(); 270 return qualSet.contains(qualStr(kv)) && ts >= TIMESTAMPS[TIMESTAMPS.length - maxVersions] 271 && (lastDelTS == null || ts > lastDelTS); 272 } 273 274 private static String qualStr(KeyValue kv) { 275 return Bytes.toString(kv.getQualifierArray(), kv.getQualifierOffset(), kv.getQualifierLength()); 276 } 277 278 static String createValue(String row, String qual, long ts) { 279 return "value_for_" + row + "_" + qual + "_" + ts; 280 } 281 282 private static List<String> sequentialStrings(String prefix, int n) { 283 List<String> lst = new ArrayList<>(); 284 for (int i = 0; i < n; ++i) { 285 StringBuilder sb = new StringBuilder(); 286 sb.append(prefix + i); 287 288 // Make column length depend on i. 289 int iBitShifted = i; 290 while (iBitShifted != 0) { 291 sb.append((iBitShifted & 1) == 0 ? 'a' : 'b'); 292 iBitShifted >>= 1; 293 } 294 295 lst.add(sb.toString()); 296 } 297 return lst; 298 } 299}