001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import org.apache.hadoop.hbase.io.HeapSize;
021import org.apache.yetus.audience.InterfaceAudience;
022
023/**
024 * The unit of storage in HBase consisting of the following fields: <br>
025 *
026 * <pre>
027 * 1) row
028 * 2) column family
029 * 3) column qualifier
030 * 4) timestamp
031 * 5) type
032 * 6) MVCC version
033 * 7) value
034 * </pre>
035 * <p>
036 * Uniqueness is determined by the combination of row, column family, column qualifier, timestamp,
037 * and type.
038 * </p>
039 * <p>
040 * The natural comparator will perform a bitwise comparison on row, column family, and column
041 * qualifier. Less intuitively, it will then treat the greater timestamp as the lesser value with
042 * the goal of sorting newer cells first.
043 * </p>
044 * <p>
045 * Cell implements Comparable&lt;Cell&gt; which is only meaningful when comparing to other keys in
046 * the same table. It uses CellComparator which does not work on the -ROOT- and hbase:meta tables.
047 * </p>
048 * <p>
049 * In the future, we may consider adding a boolean isOnHeap() method and a getValueBuffer() method
050 * that can be used to pass a value directly from an off-heap ByteBuffer to the network without
051 * copying into an on-heap byte[].
052 * </p>
053 * <p>
054 * Historic note: the original Cell implementation (KeyValue) requires that all fields be encoded as
055 * consecutive bytes in the same byte[], whereas this interface allows fields to reside in separate
056 * byte[]'s.
057 * </p>
058 */
059@InterfaceAudience.Public
060public interface Cell extends HeapSize {
061
062  // 1) Row
063
064  /**
065   * Contiguous raw bytes that may start at any index in the containing array. Max length is
066   * Short.MAX_VALUE which is 32,767 bytes.
067   * @return The array containing the row bytes.
068   */
069  byte[] getRowArray();
070
071  /** Returns Array index of first row byte */
072  int getRowOffset();
073
074  /** Returns Number of row bytes. Must be &lt; rowArray.length - offset. */
075  short getRowLength();
076
077  // 2) Family
078
079  /**
080   * Contiguous bytes composed of legal HDFS filename characters which may start at any index in the
081   * containing array. Max length is Byte.MAX_VALUE, which is 127 bytes.
082   * @return the array containing the family bytes.
083   */
084  byte[] getFamilyArray();
085
086  /** Returns Array index of first family byte */
087  int getFamilyOffset();
088
089  /** Returns Number of family bytes. Must be &lt; familyArray.length - offset. */
090  byte getFamilyLength();
091
092  // 3) Qualifier
093
094  /**
095   * Contiguous raw bytes that may start at any index in the containing array.
096   * @return The array containing the qualifier bytes.
097   */
098  byte[] getQualifierArray();
099
100  /** Returns Array index of first qualifier byte */
101  int getQualifierOffset();
102
103  /** Returns Number of qualifier bytes. Must be &lt; qualifierArray.length - offset. */
104  int getQualifierLength();
105
106  // 4) Timestamp
107
108  /**
109   * Return a long value representing time at which this cell was "Put" into the row. Typically
110   * represents the time of insertion, but can be any value from 0 to Long.MAX_VALUE.
111   */
112  long getTimestamp();
113
114  // 5) Type
115
116  /**
117   * Return the byte representation of the KeyValue.TYPE of this cell: one of Put, Delete, etc
118   * @deprecated As of HBase-2.0. Will be removed in HBase-3.0. Use {@link #getType()}.
119   */
120  @Deprecated
121  byte getTypeByte();
122
123  // 6) SequenceId
124
125  /**
126   * A region-specific unique monotonically increasing sequence ID given to each Cell. It always
127   * exists for cells in the memstore but is not retained forever. It will be kept for
128   * {@link HConstants#KEEP_SEQID_PERIOD} days, but generally becomes irrelevant after the cell's
129   * row is no longer involved in any operations that require strict consistency.
130   * @return seqId (always &gt; 0 if exists), or 0 if it no longer exists
131   * @deprecated As of HBase-2.0. Will be removed in HBase-3.0.
132   */
133  @Deprecated
134  long getSequenceId();
135
136  // 7) Value
137
138  /**
139   * Contiguous raw bytes that may start at any index in the containing array. Max length is
140   * Integer.MAX_VALUE which is 2,147,483,647 bytes.
141   * @return The array containing the value bytes.
142   */
143  byte[] getValueArray();
144
145  /** Returns Array index of first value byte */
146  int getValueOffset();
147
148  /** Returns Number of value bytes. Must be &lt; valueArray.length - offset. */
149  int getValueLength();
150
151  /** Returns Serialized size (defaults to include tag length if has some tags). */
152  int getSerializedSize();
153
154  /**
155   * Contiguous raw bytes representing tags that may start at any index in the containing array.
156   * @return the tags byte array
157   * @deprecated As of HBase-2.0. Will be removed in HBase-3.0. Tags are are now internal.
158   */
159  @Deprecated
160  byte[] getTagsArray();
161
162  /**
163   * Return the first offset where the tags start in the Cell
164   * @deprecated As of HBase-2.0. Will be removed in HBase-3.0. Tags are are now internal.
165   */
166  @Deprecated
167  int getTagsOffset();
168
169  /**
170   * HBase internally uses 2 bytes to store tags length in Cell. As the tags length is always a
171   * non-negative number, to make good use of the sign bit, the max of tags length is defined 2 *
172   * Short.MAX_VALUE + 1 = 65535. As a result, the return type is int, because a short is not
173   * capable of handling that. Please note that even if the return type is int, the max tags length
174   * is far less than Integer.MAX_VALUE.
175   * @return the total length of the tags in the Cell.
176   * @deprecated As of HBase-2.0. Will be removed in HBase-3.0. Tags are are now internal.
177   */
178  @Deprecated
179  int getTagsLength();
180
181  /**
182   * Returns the type of cell in a human readable format using {@link Type}. Note : This does not
183   * expose the internal types of Cells like {@link KeyValue.Type#Maximum} and
184   * {@link KeyValue.Type#Minimum}
185   * @return The data type this cell: one of Put, Delete, etc
186   */
187  default Type getType() {
188    byte byteType = getTypeByte();
189    Type t = Type.CODE_ARRAY[byteType & 0xff];
190    if (t != null) {
191      return t;
192    }
193    throw new UnsupportedOperationException("Invalid type of cell " + byteType);
194  }
195
196  /**
197   * The valid types for user to build the cell. Currently, This is subset of {@link KeyValue.Type}.
198   */
199  enum Type {
200    Put((byte) 4),
201
202    Delete((byte) 8),
203
204    DeleteFamilyVersion((byte) 10),
205
206    DeleteColumn((byte) 12),
207
208    DeleteFamily((byte) 14);
209
210    private final byte code;
211
212    Type(final byte c) {
213      this.code = c;
214    }
215
216    public byte getCode() {
217      return this.code;
218    }
219
220    private static final Type[] CODE_ARRAY = new Type[256];
221
222    static {
223      for (Type t : Type.values()) {
224        CODE_ARRAY[t.code & 0xff] = t;
225      }
226    }
227  }
228}