001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.nio.ByteBuffer;
022import org.apache.hadoop.hbase.ByteBufferKeyValue;
023import org.apache.hadoop.hbase.Cell;
024import org.apache.hadoop.hbase.CellComparator;
025import org.apache.hadoop.hbase.CellUtil;
026import org.apache.hadoop.hbase.ExtendedCell;
027import org.apache.hadoop.hbase.KeyValue;
028import org.apache.hadoop.hbase.util.ByteBufferUtils;
029import org.apache.hadoop.hbase.util.ClassSize;
030import org.apache.yetus.audience.InterfaceAudience;
031
032/**
033 * CellChunkImmutableSegment extends the API supported by a {@link Segment}, and
034 * {@link ImmutableSegment}. This immutable segment is working with CellSet with CellChunkMap
035 * delegatee.
036 */
037@InterfaceAudience.Private
038public class CellChunkImmutableSegment extends ImmutableSegment {
039
040  public static final long DEEP_OVERHEAD_CCM =
041    ImmutableSegment.DEEP_OVERHEAD + ClassSize.CELL_CHUNK_MAP;
042  public static final float INDEX_CHUNK_UNUSED_SPACE_PRECENTAGE = 0.1f;
043
044  ///////////////////// CONSTRUCTORS /////////////////////
045  /**
046   * ------------------------------------------------------------------------ C-tor to be used when
047   * new CellChunkImmutableSegment is built as a result of compaction/merge of a list of older
048   * ImmutableSegments. The given iterator returns the Cells that "survived" the compaction.
049   */
050  protected CellChunkImmutableSegment(CellComparator comparator, MemStoreSegmentsIterator iterator,
051    MemStoreLAB memStoreLAB, int numOfCells, MemStoreCompactionStrategy.Action action) {
052    super(null, comparator, memStoreLAB); // initialize the CellSet with NULL
053    long indexOverhead = DEEP_OVERHEAD_CCM;
054    // memStoreLAB cannot be null in this class
055    boolean onHeap = getMemStoreLAB().isOnHeap();
056    // initiate the heapSize with the size of the segment metadata
057    if (onHeap) {
058      incMemStoreSize(0, indexOverhead, 0, 0);
059    } else {
060      incMemStoreSize(0, 0, indexOverhead, 0);
061    }
062    // build the new CellSet based on CellArrayMap and update the CellSet of the new Segment
063    initializeCellSet(numOfCells, iterator, action);
064  }
065
066  /**
067   * ------------------------------------------------------------------------ C-tor to be used when
068   * new CellChunkImmutableSegment is built as a result of flattening of CSLMImmutableSegment The
069   * given iterator returns the Cells that "survived" the compaction.
070   */
071  protected CellChunkImmutableSegment(CSLMImmutableSegment segment, MemStoreSizing memstoreSizing,
072    MemStoreCompactionStrategy.Action action) {
073    super(segment); // initiailize the upper class
074    long indexOverhead = -CSLMImmutableSegment.DEEP_OVERHEAD_CSLM + DEEP_OVERHEAD_CCM;
075    // memStoreLAB cannot be null in this class
076    boolean onHeap = getMemStoreLAB().isOnHeap();
077    // initiate the heapSize with the size of the segment metadata
078    if (onHeap) {
079      incMemStoreSize(0, indexOverhead, 0, 0);
080      memstoreSizing.incMemStoreSize(0, indexOverhead, 0, 0);
081    } else {
082      incMemStoreSize(0, -CSLMImmutableSegment.DEEP_OVERHEAD_CSLM, DEEP_OVERHEAD_CCM, 0);
083      memstoreSizing.incMemStoreSize(0, -CSLMImmutableSegment.DEEP_OVERHEAD_CSLM, DEEP_OVERHEAD_CCM,
084        0);
085    }
086    int numOfCells = segment.getCellsCount();
087    // build the new CellSet based on CellChunkMap
088    reinitializeCellSet(numOfCells, segment.getScanner(Long.MAX_VALUE), segment.getCellSet(),
089      memstoreSizing, action);
090    // arrange the meta-data size, decrease all meta-data sizes related to SkipList;
091    // add sizes of CellChunkMap entry, decrease also Cell object sizes
092    // (reinitializeCellSet doesn't take the care for the sizes)
093    long newSegmentSizeDelta =
094      numOfCells * (indexEntrySize() - ClassSize.CONCURRENT_SKIPLISTMAP_ENTRY);
095    if (onHeap) {
096      incMemStoreSize(0, newSegmentSizeDelta, 0, 0);
097      memstoreSizing.incMemStoreSize(0, newSegmentSizeDelta, 0, 0);
098    } else {
099      incMemStoreSize(0, 0, newSegmentSizeDelta, 0);
100      memstoreSizing.incMemStoreSize(0, 0, newSegmentSizeDelta, 0);
101
102    }
103  }
104
105  @Override
106  protected long indexEntryOnHeapSize(boolean onHeap) {
107    if (onHeap) {
108      return indexEntrySize();
109    }
110    // else the index is allocated off-heap
111    return 0;
112  }
113
114  @Override
115  protected long indexEntryOffHeapSize(boolean offHeap) {
116    if (offHeap) {
117      return indexEntrySize();
118    }
119    // else the index is allocated on-heap
120    return 0;
121  }
122
123  @Override
124  protected long indexEntrySize() {
125    return ((long) ClassSize.CELL_CHUNK_MAP_ENTRY - KeyValue.FIXED_OVERHEAD);
126  }
127
128  @Override
129  protected boolean canBeFlattened() {
130    return false;
131  }
132
133  ///////////////////// PRIVATE METHODS /////////////////////
134  /*------------------------------------------------------------------------*/
135  // Create CellSet based on CellChunkMap from compacting iterator
136  private void initializeCellSet(int numOfCells, MemStoreSegmentsIterator iterator,
137    MemStoreCompactionStrategy.Action action) {
138
139    int numOfCellsAfterCompaction = 0;
140    int currentChunkIdx = 0;
141    int offsetInCurentChunk = ChunkCreator.SIZEOF_CHUNK_HEADER;
142    int numUniqueKeys = 0;
143    Cell prev = null;
144    Chunk[] chunks = allocIndexChunks(numOfCells);
145    while (iterator.hasNext()) { // the iterator hides the elimination logic for compaction
146      boolean alreadyCopied = false;
147      Cell c = iterator.next();
148      numOfCellsAfterCompaction++;
149      assert (c instanceof ExtendedCell);
150      if (((ExtendedCell) c).getChunkId() == ExtendedCell.CELL_NOT_BASED_ON_CHUNK) {
151        // CellChunkMap assumes all cells are allocated on MSLAB.
152        // Therefore, cells which are not allocated on MSLAB initially,
153        // are copied into MSLAB here.
154        c = copyCellIntoMSLAB(c, null); // no memstore sizing object to update
155        alreadyCopied = true;
156      }
157      if (offsetInCurentChunk + ClassSize.CELL_CHUNK_MAP_ENTRY > chunks[currentChunkIdx].size) {
158        currentChunkIdx++; // continue to the next index chunk
159        offsetInCurentChunk = ChunkCreator.SIZEOF_CHUNK_HEADER;
160      }
161      if (action == MemStoreCompactionStrategy.Action.COMPACT && !alreadyCopied) {
162
163        // For compaction copy cell to the new segment (MSLAB copy),here we set forceCloneOfBigCell
164        // to true, because the chunk which the cell is allocated may be freed after the compaction
165        // is completed, see HBASE-27464.
166        c = maybeCloneWithAllocator(c, true);
167      }
168      offsetInCurentChunk = // add the Cell reference to the index chunk
169        createCellReference((ByteBufferKeyValue) c, chunks[currentChunkIdx].getData(),
170          offsetInCurentChunk);
171      // the sizes still need to be updated in the new segment
172      // second parameter true, because in compaction/merge the addition of the cell to new segment
173      // is always successful
174      updateMetaInfo(c, true, null); // updates the size per cell
175      if (action == MemStoreCompactionStrategy.Action.MERGE_COUNT_UNIQUE_KEYS) {
176        // counting number of unique keys
177        if (prev != null) {
178          if (!CellUtil.matchingRowColumnBytes(prev, c)) {
179            numUniqueKeys++;
180          }
181        } else {
182          numUniqueKeys++;
183        }
184      }
185      prev = c;
186    }
187    if (action == MemStoreCompactionStrategy.Action.COMPACT) {
188      numUniqueKeys = numOfCells;
189    } else if (action != MemStoreCompactionStrategy.Action.MERGE_COUNT_UNIQUE_KEYS) {
190      numUniqueKeys = CellSet.UNKNOWN_NUM_UNIQUES;
191    }
192    // build the immutable CellSet
193    CellChunkMap ccm =
194      new CellChunkMap(getComparator(), chunks, 0, numOfCellsAfterCompaction, false);
195    this.setCellSet(null, new CellSet(ccm, numUniqueKeys)); // update the CellSet of this Segment
196  }
197
198  /*------------------------------------------------------------------------*/
199  // Create CellSet based on CellChunkMap from current ConcurrentSkipListMap based CellSet
200  // (without compacting iterator)
201  // This is a service for not-flat immutable segments
202  private void reinitializeCellSet(int numOfCells, KeyValueScanner segmentScanner,
203    CellSet oldCellSet, MemStoreSizing memstoreSizing, MemStoreCompactionStrategy.Action action) {
204    Cell curCell;
205    Chunk[] chunks = allocIndexChunks(numOfCells);
206
207    int currentChunkIdx = 0;
208    int offsetInCurentChunk = ChunkCreator.SIZEOF_CHUNK_HEADER;
209
210    int numUniqueKeys = 0;
211    Cell prev = null;
212    try {
213      while ((curCell = segmentScanner.next()) != null) {
214        assert (curCell instanceof ExtendedCell);
215        if (((ExtendedCell) curCell).getChunkId() == ExtendedCell.CELL_NOT_BASED_ON_CHUNK) {
216          // CellChunkMap assumes all cells are allocated on MSLAB.
217          // Therefore, cells which are not allocated on MSLAB initially,
218          // are copied into MSLAB here.
219          curCell = copyCellIntoMSLAB(curCell, memstoreSizing);
220        }
221        if (offsetInCurentChunk + ClassSize.CELL_CHUNK_MAP_ENTRY > chunks[currentChunkIdx].size) {
222          // continue to the next metadata chunk
223          currentChunkIdx++;
224          offsetInCurentChunk = ChunkCreator.SIZEOF_CHUNK_HEADER;
225        }
226        offsetInCurentChunk = createCellReference((ByteBufferKeyValue) curCell,
227          chunks[currentChunkIdx].getData(), offsetInCurentChunk);
228        if (action == MemStoreCompactionStrategy.Action.FLATTEN_COUNT_UNIQUE_KEYS) {
229          // counting number of unique keys
230          if (prev != null) {
231            if (!CellUtil.matchingRowColumn(prev, curCell)) {
232              numUniqueKeys++;
233            }
234          } else {
235            numUniqueKeys++;
236          }
237        }
238        prev = curCell;
239      }
240      if (action != MemStoreCompactionStrategy.Action.FLATTEN_COUNT_UNIQUE_KEYS) {
241        numUniqueKeys = CellSet.UNKNOWN_NUM_UNIQUES;
242      }
243    } catch (IOException ie) {
244      throw new IllegalStateException(ie);
245    } finally {
246      segmentScanner.close();
247    }
248
249    CellChunkMap ccm = new CellChunkMap(getComparator(), chunks, 0, numOfCells, false);
250    // update the CellSet of this Segment
251    this.setCellSet(oldCellSet, new CellSet(ccm, numUniqueKeys));
252  }
253
254  /*------------------------------------------------------------------------*/
255  // for a given cell, write the cell representation on the index chunk
256  private int createCellReference(ByteBufferKeyValue cell, ByteBuffer idxBuffer, int idxOffset) {
257    int offset = idxOffset;
258    int dataChunkID = cell.getChunkId();
259
260    offset = ByteBufferUtils.putInt(idxBuffer, offset, dataChunkID); // write data chunk id
261    offset = ByteBufferUtils.putInt(idxBuffer, offset, cell.getOffset()); // offset
262    offset = ByteBufferUtils.putInt(idxBuffer, offset, cell.getSerializedSize()); // length
263    offset = ByteBufferUtils.putLong(idxBuffer, offset, cell.getSequenceId()); // seqId
264
265    return offset;
266  }
267
268  private int calculateNumberOfChunks(int numOfCells, int chunkSize) {
269    int numOfCellsInChunk = calcNumOfCellsInChunk(chunkSize);
270    int numberOfChunks = numOfCells / numOfCellsInChunk;
271    if (numOfCells % numOfCellsInChunk != 0) { // if cells cannot be divided evenly between chunks
272      numberOfChunks++; // add one additional chunk
273    }
274    return numberOfChunks;
275  }
276
277  // Assuming we are going to use regular data chunks as index chunks,
278  // we check here how much free space will remain in the last allocated chunk
279  // (the least occupied one).
280  // If the percentage of its remaining free space is above the INDEX_CHUNK_UNUSED_SPACE
281  // threshold, then we will use index chunks (which are smaller) instead.
282  private ChunkCreator.ChunkType useIndexChunks(int numOfCells) {
283    int dataChunkSize = ChunkCreator.getInstance().getChunkSize();
284    int numOfCellsInChunk = calcNumOfCellsInChunk(dataChunkSize);
285    int cellsInLastChunk = numOfCells % numOfCellsInChunk;
286    if (cellsInLastChunk == 0) { // There is no free space in the last chunk and thus,
287      return ChunkCreator.ChunkType.DATA_CHUNK; // no need to use index chunks.
288    } else {
289      int chunkSpace = dataChunkSize - ChunkCreator.SIZEOF_CHUNK_HEADER;
290      int freeSpaceInLastChunk = chunkSpace - cellsInLastChunk * ClassSize.CELL_CHUNK_MAP_ENTRY;
291      if (freeSpaceInLastChunk > INDEX_CHUNK_UNUSED_SPACE_PRECENTAGE * chunkSpace) {
292        return ChunkCreator.ChunkType.INDEX_CHUNK;
293      }
294      return ChunkCreator.ChunkType.DATA_CHUNK;
295    }
296  }
297
298  private int calcNumOfCellsInChunk(int chunkSize) {
299    int chunkSpace = chunkSize - ChunkCreator.SIZEOF_CHUNK_HEADER;
300    int numOfCellsInChunk = chunkSpace / ClassSize.CELL_CHUNK_MAP_ENTRY;
301    return numOfCellsInChunk;
302  }
303
304  private Chunk[] allocIndexChunks(int numOfCells) {
305    // Decide whether to use regular or small chunks and then
306    // calculate how many chunks we will need for index
307
308    ChunkCreator.ChunkType chunkType = useIndexChunks(numOfCells);
309    int chunkSize = ChunkCreator.getInstance().getChunkSize(chunkType);
310    int numberOfChunks = calculateNumberOfChunks(numOfCells, chunkSize);
311    // all index Chunks are allocated from ChunkCreator
312    Chunk[] chunks = new Chunk[numberOfChunks];
313    // all index Chunks are allocated from ChunkCreator
314    for (int i = 0; i < numberOfChunks; i++) {
315      chunks[i] = this.getMemStoreLAB().getNewExternalChunk(chunkType);
316    }
317    return chunks;
318  }
319
320  private Cell copyCellIntoMSLAB(Cell cell, MemStoreSizing memstoreSizing) {
321    // Take care for a special case when a cell is copied from on-heap to (probably off-heap) MSLAB.
322    // The cell allocated as an on-heap JVM object (byte array) occupies slightly different
323    // amount of memory, than when the cell serialized and allocated on the MSLAB.
324    // Here, we update the heap size of the new segment only for the difference between object and
325    // serialized size. This is a decrease of the size as serialized cell is a bit smaller.
326    // The actual size of the cell is not added yet, and will be added (only in compaction)
327    // in initializeCellSet#updateMetaInfo().
328    long oldHeapSize = heapSizeChange(cell, true);
329    long oldOffHeapSize = offHeapSizeChange(cell, true);
330    long oldCellSize = getCellLength(cell);
331    cell = maybeCloneWithAllocator(cell, true);
332    long newHeapSize = heapSizeChange(cell, true);
333    long newOffHeapSize = offHeapSizeChange(cell, true);
334    long newCellSize = getCellLength(cell);
335    long heapOverhead = newHeapSize - oldHeapSize;
336    long offHeapOverhead = newOffHeapSize - oldOffHeapSize;
337    incMemStoreSize(newCellSize - oldCellSize, heapOverhead, offHeapOverhead, 0);
338    if (memstoreSizing != null) {
339      memstoreSizing.incMemStoreSize(newCellSize - oldCellSize, heapOverhead, offHeapOverhead, 0);
340    }
341    return cell;
342  }
343}