001/*
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.regionserver;
020
021import java.util.Collections;
022import java.util.Iterator;
023import java.util.List;
024import java.util.Objects;
025import java.util.SortedSet;
026import java.util.concurrent.atomic.AtomicReference;
027
028import org.apache.hadoop.hbase.Cell;
029import org.apache.hadoop.hbase.CellComparator;
030import org.apache.hadoop.hbase.PrivateCellUtil;
031import org.apache.hadoop.hbase.KeyValue;
032import org.apache.hadoop.hbase.KeyValueUtil;
033import org.apache.hadoop.hbase.io.TimeRange;
034import org.apache.hadoop.hbase.util.Bytes;
035import org.apache.hadoop.hbase.util.ClassSize;
036import org.apache.yetus.audience.InterfaceAudience;
037import org.slf4j.Logger;
038import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
039
040/**
041 * This is an abstraction of a segment maintained in a memstore, e.g., the active
042 * cell set or its snapshot.
043 *
044 * This abstraction facilitates the management of the compaction pipeline and the shifts of these
045 * segments from active set to snapshot set in the default implementation.
046 */
047@InterfaceAudience.Private
048public abstract class Segment implements MemStoreSizing {
049
050  public final static long FIXED_OVERHEAD = ClassSize.align(ClassSize.OBJECT
051      + 5 * ClassSize.REFERENCE // cellSet, comparator, memStoreLAB, memStoreSizing,
052                                // and timeRangeTracker
053      + Bytes.SIZEOF_LONG // minSequenceId
054      + Bytes.SIZEOF_BOOLEAN); // tagsPresent
055  public final static long DEEP_OVERHEAD = FIXED_OVERHEAD + ClassSize.ATOMIC_REFERENCE
056      + ClassSize.CELL_SET + 2 * ClassSize.ATOMIC_LONG;
057
058  private AtomicReference<CellSet> cellSet= new AtomicReference<>();
059  private final CellComparator comparator;
060  protected long minSequenceId;
061  private MemStoreLAB memStoreLAB;
062  // Sum of sizes of all Cells added to this Segment. Cell's HeapSize is considered. This is not
063  // including the heap overhead of this class.
064  protected final MemStoreSizing memStoreSizing;
065  protected final TimeRangeTracker timeRangeTracker;
066  protected volatile boolean tagsPresent;
067
068  // Empty constructor to be used when Segment is used as interface,
069  // and there is no need in true Segments state
070  protected Segment(CellComparator comparator, TimeRangeTracker trt) {
071    this.comparator = comparator;
072    // Do we need to be thread safe always? What if ImmutableSegment?
073    // DITTO for the TimeRangeTracker below.
074    this.memStoreSizing = new ThreadSafeMemStoreSizing();
075    this.timeRangeTracker = trt;
076  }
077
078  protected Segment(CellComparator comparator, List<ImmutableSegment> segments,
079      TimeRangeTracker trt) {
080    long dataSize = 0;
081    long heapSize = 0;
082    long OffHeapSize = 0;
083    int cellsCount = 0;
084    for (Segment segment : segments) {
085      MemStoreSize memStoreSize = segment.getMemStoreSize();
086      dataSize += memStoreSize.getDataSize();
087      heapSize += memStoreSize.getHeapSize();
088      OffHeapSize += memStoreSize.getOffHeapSize();
089      cellsCount += memStoreSize.getCellsCount();
090    }
091    this.comparator = comparator;
092    // Do we need to be thread safe always? What if ImmutableSegment?
093    // DITTO for the TimeRangeTracker below.
094    this.memStoreSizing = new ThreadSafeMemStoreSizing(dataSize, heapSize, OffHeapSize, cellsCount);
095    this.timeRangeTracker = trt;
096  }
097
098  // This constructor is used to create empty Segments.
099  protected Segment(CellSet cellSet, CellComparator comparator, MemStoreLAB memStoreLAB,
100      TimeRangeTracker trt) {
101    this.cellSet.set(cellSet);
102    this.comparator = comparator;
103    this.minSequenceId = Long.MAX_VALUE;
104    this.memStoreLAB = memStoreLAB;
105    // Do we need to be thread safe always? What if ImmutableSegment?
106    // DITTO for the TimeRangeTracker below.
107    this.memStoreSizing = new ThreadSafeMemStoreSizing();
108    this.tagsPresent = false;
109    this.timeRangeTracker = trt;
110  }
111
112  protected Segment(Segment segment) {
113    this.cellSet.set(segment.getCellSet());
114    this.comparator = segment.getComparator();
115    this.minSequenceId = segment.getMinSequenceId();
116    this.memStoreLAB = segment.getMemStoreLAB();
117    this.memStoreSizing = new ThreadSafeMemStoreSizing(segment.memStoreSizing.getMemStoreSize());
118    this.tagsPresent = segment.isTagsPresent();
119    this.timeRangeTracker = segment.getTimeRangeTracker();
120  }
121
122  /**
123   * Creates the scanner for the given read point
124   * @return a scanner for the given read point
125   */
126  protected KeyValueScanner getScanner(long readPoint) {
127    return new SegmentScanner(this, readPoint);
128  }
129
130  /**
131   * Creates the scanner for the given read point, and a specific order in a list
132   * @return a scanner for the given read point
133   */
134  public KeyValueScanner getScanner(long readPoint, long order) {
135    return new SegmentScanner(this, readPoint, order);
136  }
137
138  public List<KeyValueScanner> getScanners(long readPoint, long order) {
139    return Collections.singletonList(new SegmentScanner(this, readPoint, order));
140  }
141
142  /**
143   * @return whether the segment has any cells
144   */
145  public boolean isEmpty() {
146    return getCellSet().isEmpty();
147  }
148
149
150  /**
151   * Closing a segment before it is being discarded
152   */
153  public void close() {
154    if (this.memStoreLAB != null) {
155      this.memStoreLAB.close();
156    }
157    // do not set MSLab to null as scanners may still be reading the data here and need to decrease
158    // the counter when they finish
159  }
160
161  /**
162   * If the segment has a memory allocator the cell is being cloned to this space, and returned;
163   * otherwise the given cell is returned
164   *
165   * When a cell's size is too big (bigger than maxAlloc), it is not allocated on MSLAB.
166   * Since the process of flattening to CellChunkMap assumes that all cells
167   * are allocated on MSLAB, during this process, the input parameter
168   * forceCloneOfBigCell is set to 'true' and the cell is copied into MSLAB.
169   *
170   * @return either the given cell or its clone
171   */
172  public Cell maybeCloneWithAllocator(Cell cell, boolean forceCloneOfBigCell) {
173    if (this.memStoreLAB == null) {
174      return cell;
175    }
176
177    Cell cellFromMslab;
178    if (forceCloneOfBigCell) {
179      cellFromMslab = this.memStoreLAB.forceCopyOfBigCellInto(cell);
180    } else {
181      cellFromMslab = this.memStoreLAB.copyCellInto(cell);
182    }
183    return (cellFromMslab != null) ? cellFromMslab : cell;
184  }
185
186  /**
187   * Get cell length after serialized in {@link KeyValue}
188   */
189  @VisibleForTesting
190  static int getCellLength(Cell cell) {
191    return KeyValueUtil.length(cell);
192  }
193
194  public boolean shouldSeek(TimeRange tr, long oldestUnexpiredTS) {
195    return !isEmpty()
196        && (tr.isAllTime() || timeRangeTracker.includesTimeRange(tr))
197        && timeRangeTracker.getMax() >= oldestUnexpiredTS;
198  }
199
200  public boolean isTagsPresent() {
201    return tagsPresent;
202  }
203
204  public void incScannerCount() {
205    if (this.memStoreLAB != null) {
206      this.memStoreLAB.incScannerCount();
207    }
208  }
209
210  public void decScannerCount() {
211    if (this.memStoreLAB != null) {
212      this.memStoreLAB.decScannerCount();
213    }
214  }
215
216  /**
217   * Setting the CellSet of the segment - used only for flat immutable segment for setting
218   * immutable CellSet after its creation in immutable segment constructor
219   * @return this object
220   */
221
222  protected Segment setCellSet(CellSet cellSetOld, CellSet cellSetNew) {
223    this.cellSet.compareAndSet(cellSetOld, cellSetNew);
224    return this;
225  }
226
227  @Override
228  public MemStoreSize getMemStoreSize() {
229    return this.memStoreSizing.getMemStoreSize();
230  }
231
232  @Override
233  public long getDataSize() {
234    return this.memStoreSizing.getDataSize();
235  }
236
237  @Override
238  public long getHeapSize() {
239    return this.memStoreSizing.getHeapSize();
240  }
241
242  @Override
243  public long getOffHeapSize() {
244    return this.memStoreSizing.getOffHeapSize();
245  }
246
247  @Override
248  public int getCellsCount() {
249    return this.memStoreSizing.getCellsCount();
250  }
251
252  @Override
253  public long incMemStoreSize(long delta, long heapOverhead, long offHeapOverhead, int cellsCount) {
254    return this.memStoreSizing.incMemStoreSize(delta, heapOverhead, offHeapOverhead, cellsCount);
255  }
256
257  public long getMinSequenceId() {
258    return minSequenceId;
259  }
260
261  public TimeRangeTracker getTimeRangeTracker() {
262    return this.timeRangeTracker;
263  }
264
265  //*** Methods for SegmentsScanner
266  public Cell last() {
267    return getCellSet().last();
268  }
269
270  public Iterator<Cell> iterator() {
271    return getCellSet().iterator();
272  }
273
274  public SortedSet<Cell> headSet(Cell firstKeyOnRow) {
275    return getCellSet().headSet(firstKeyOnRow);
276  }
277
278  public int compare(Cell left, Cell right) {
279    return getComparator().compare(left, right);
280  }
281
282  public int compareRows(Cell left, Cell right) {
283    return getComparator().compareRows(left, right);
284  }
285
286  /**
287   * @return a set of all cells in the segment
288   */
289  protected CellSet getCellSet() {
290    return cellSet.get();
291  }
292
293  /**
294   * Returns the Cell comparator used by this segment
295   * @return the Cell comparator used by this segment
296   */
297  protected CellComparator getComparator() {
298    return comparator;
299  }
300
301  protected void internalAdd(Cell cell, boolean mslabUsed, MemStoreSizing memstoreSizing) {
302    boolean succ = getCellSet().add(cell);
303    updateMetaInfo(cell, succ, mslabUsed, memstoreSizing);
304  }
305
306  protected void updateMetaInfo(Cell cellToAdd, boolean succ, boolean mslabUsed,
307      MemStoreSizing memstoreSizing) {
308    long cellSize = 0;
309    int cellsCount = succ ? 1 : 0;
310    // If there's already a same cell in the CellSet and we are using MSLAB, we must count in the
311    // MSLAB allocation size as well, or else there will be memory leak (occupied heap size larger
312    // than the counted number)
313    boolean sizeChanged = succ || mslabUsed;
314    if (sizeChanged) {
315      cellSize = getCellLength(cellToAdd);
316    }
317    // same as above, if MSLAB is used, we need to inc the heap/offheap size, otherwise there will
318    // be a memory miscount. Since we are now use heapSize + offHeapSize to decide whether a flush
319    // is needed.
320    long heapSize = heapSizeChange(cellToAdd, sizeChanged);
321    long offHeapSize = offHeapSizeChange(cellToAdd, sizeChanged);
322    incMemStoreSize(cellSize, heapSize, offHeapSize, cellsCount);
323    if (memstoreSizing != null) {
324      memstoreSizing.incMemStoreSize(cellSize, heapSize, offHeapSize, cellsCount);
325    }
326    getTimeRangeTracker().includeTimestamp(cellToAdd);
327    minSequenceId = Math.min(minSequenceId, cellToAdd.getSequenceId());
328    // In no tags case this NoTagsKeyValue.getTagsLength() is a cheap call.
329    // When we use ACL CP or Visibility CP which deals with Tags during
330    // mutation, the TagRewriteCell.getTagsLength() is a cheaper call. We do not
331    // parse the byte[] to identify the tags length.
332    if (cellToAdd.getTagsLength() > 0) {
333      tagsPresent = true;
334    }
335  }
336
337  protected void updateMetaInfo(Cell cellToAdd, boolean succ, MemStoreSizing memstoreSizing) {
338    updateMetaInfo(cellToAdd, succ, (getMemStoreLAB()!=null), memstoreSizing);
339  }
340
341  /**
342   * @return The increase in heap size because of this cell addition. This includes this cell POJO's
343   *         heap size itself and additional overhead because of addition on to CSLM.
344   */
345  protected long heapSizeChange(Cell cell, boolean succ) {
346    long res = 0;
347    if (succ) {
348      boolean onHeap = true;
349      MemStoreLAB memStoreLAB = getMemStoreLAB();
350      if(memStoreLAB != null) {
351        onHeap = memStoreLAB.isOnHeap();
352      }
353      res += indexEntryOnHeapSize(onHeap);
354      if(onHeap) {
355        res += PrivateCellUtil.estimatedSizeOfCell(cell);
356      }
357      res = ClassSize.align(res);
358    }
359    return res;
360  }
361
362  protected long offHeapSizeChange(Cell cell, boolean succ) {
363    long res = 0;
364    if (succ) {
365      boolean offHeap = false;
366      MemStoreLAB memStoreLAB = getMemStoreLAB();
367      if(memStoreLAB != null) {
368        offHeap = memStoreLAB.isOffHeap();
369      }
370      res += indexEntryOffHeapSize(offHeap);
371      if(offHeap) {
372        res += PrivateCellUtil.estimatedSizeOfCell(cell);
373      }
374      res = ClassSize.align(res);
375    }
376    return res;
377  }
378
379  protected long indexEntryOnHeapSize(boolean onHeap) {
380    // in most cases index is allocated on-heap
381    // override this method when it is not always the case, e.g., in CCM
382    return indexEntrySize();
383  }
384
385  protected long indexEntryOffHeapSize(boolean offHeap) {
386    // in most cases index is allocated on-heap
387    // override this method when it is not always the case, e.g., in CCM
388    return 0;
389  }
390
391  protected abstract long indexEntrySize();
392
393  /**
394   * Returns a subset of the segment cell set, which starts with the given cell
395   * @param firstCell a cell in the segment
396   * @return a subset of the segment cell set, which starts with the given cell
397   */
398  protected SortedSet<Cell> tailSet(Cell firstCell) {
399    return getCellSet().tailSet(firstCell);
400  }
401
402  @VisibleForTesting
403  MemStoreLAB getMemStoreLAB() {
404    return memStoreLAB;
405  }
406
407  // Debug methods
408  /**
409   * Dumps all cells of the segment into the given log
410   */
411  void dump(Logger log) {
412    for (Cell cell: getCellSet()) {
413      log.debug(Objects.toString(cell));
414    }
415  }
416
417  @Override
418  public String toString() {
419    String res = "type=" + this.getClass().getSimpleName() + ", ";
420    res += "empty=" + (isEmpty()? "yes": "no") + ", ";
421    res += "cellCount=" + getCellsCount() + ", ";
422    res += "cellSize=" + getDataSize() + ", ";
423    res += "totalHeapSize=" + getHeapSize() + ", ";
424    res += "min timestamp=" + timeRangeTracker.getMin() + ", ";
425    res += "max timestamp=" + timeRangeTracker.getMax();
426    return res;
427  }
428}