001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase; 020 021import org.apache.hadoop.hbase.io.HeapSize; 022import org.apache.yetus.audience.InterfaceAudience; 023 024 025/** 026 * The unit of storage in HBase consisting of the following fields: 027 * <br> 028 * <pre> 029 * 1) row 030 * 2) column family 031 * 3) column qualifier 032 * 4) timestamp 033 * 5) type 034 * 6) MVCC version 035 * 7) value 036 * </pre> 037 * <p> 038 * Uniqueness is determined by the combination of row, column family, column qualifier, 039 * timestamp, and type. 040 * </p> 041 * <p> 042 * The natural comparator will perform a bitwise comparison on row, column family, and column 043 * qualifier. Less intuitively, it will then treat the greater timestamp as the lesser value with 044 * the goal of sorting newer cells first. 045 * </p> 046 * <p> 047 * Cell implements Comparable<Cell> which is only meaningful when 048 * comparing to other keys in the 049 * same table. It uses CellComparator which does not work on the -ROOT- and hbase:meta tables. 050 * </p> 051 * <p> 052 * In the future, we may consider adding a boolean isOnHeap() method and a getValueBuffer() method 053 * that can be used to pass a value directly from an off-heap ByteBuffer to the network without 054 * copying into an on-heap byte[]. 055 * </p> 056 * <p> 057 * Historic note: the original Cell implementation (KeyValue) requires that all fields be encoded as 058 * consecutive bytes in the same byte[], whereas this interface allows fields to reside in separate 059 * byte[]'s. 060 * </p> 061 */ 062@InterfaceAudience.Public 063public interface Cell extends HeapSize { 064 065 //1) Row 066 067 /** 068 * Contiguous raw bytes that may start at any index in the containing array. Max length is 069 * Short.MAX_VALUE which is 32,767 bytes. 070 * @return The array containing the row bytes. 071 */ 072 byte[] getRowArray(); 073 074 /** 075 * @return Array index of first row byte 076 */ 077 int getRowOffset(); 078 079 /** 080 * @return Number of row bytes. Must be < rowArray.length - offset. 081 */ 082 short getRowLength(); 083 084 085 //2) Family 086 087 /** 088 * Contiguous bytes composed of legal HDFS filename characters which may start at any index in the 089 * containing array. Max length is Byte.MAX_VALUE, which is 127 bytes. 090 * @return the array containing the family bytes. 091 */ 092 byte[] getFamilyArray(); 093 094 /** 095 * @return Array index of first family byte 096 */ 097 int getFamilyOffset(); 098 099 /** 100 * @return Number of family bytes. Must be < familyArray.length - offset. 101 */ 102 byte getFamilyLength(); 103 104 105 //3) Qualifier 106 107 /** 108 * Contiguous raw bytes that may start at any index in the containing array. 109 * @return The array containing the qualifier bytes. 110 */ 111 byte[] getQualifierArray(); 112 113 /** 114 * @return Array index of first qualifier byte 115 */ 116 int getQualifierOffset(); 117 118 /** 119 * @return Number of qualifier bytes. Must be < qualifierArray.length - offset. 120 */ 121 int getQualifierLength(); 122 123 124 //4) Timestamp 125 126 /** 127 * @return Long value representing time at which this cell was "Put" into the row. Typically 128 * represents the time of insertion, but can be any value from 0 to Long.MAX_VALUE. 129 */ 130 long getTimestamp(); 131 132 133 //5) Type 134 135 /** 136 * @return The byte representation of the KeyValue.TYPE of this cell: one of Put, Delete, etc 137 * @deprecated As of HBase-2.0. Will be removed in HBase-3.0. Use {@link #getType()}. 138 */ 139 @Deprecated 140 byte getTypeByte(); 141 142 143 //6) SequenceId 144 145 /** 146 * A region-specific unique monotonically increasing sequence ID given to each Cell. It always 147 * exists for cells in the memstore but is not retained forever. It will be kept for 148 * {@link HConstants#KEEP_SEQID_PERIOD} days, but generally becomes irrelevant after the cell's 149 * row is no longer involved in any operations that require strict consistency. 150 * @return seqId (always > 0 if exists), or 0 if it no longer exists 151 * @deprecated As of HBase-2.0. Will be removed in HBase-3.0. 152 */ 153 @Deprecated 154 long getSequenceId(); 155 156 //7) Value 157 158 /** 159 * Contiguous raw bytes that may start at any index in the containing array. Max length is 160 * Integer.MAX_VALUE which is 2,147,483,647 bytes. 161 * @return The array containing the value bytes. 162 */ 163 byte[] getValueArray(); 164 165 /** 166 * @return Array index of first value byte 167 */ 168 int getValueOffset(); 169 170 /** 171 * @return Number of value bytes. Must be < valueArray.length - offset. 172 */ 173 int getValueLength(); 174 175 /** 176 * @return Serialized size (defaults to include tag length if has some tags). 177 */ 178 int getSerializedSize(); 179 180 /** 181 * Contiguous raw bytes representing tags that may start at any index in the containing array. 182 * @return the tags byte array 183 * @deprecated As of HBase-2.0. Will be removed in HBase-3.0. Tags are are now internal. 184 */ 185 @Deprecated 186 byte[] getTagsArray(); 187 188 /** 189 * @return the first offset where the tags start in the Cell 190 * @deprecated As of HBase-2.0. Will be removed in HBase-3.0. Tags are are now internal. 191 */ 192 @Deprecated 193 int getTagsOffset(); 194 195 /** 196 * HBase internally uses 2 bytes to store tags length in Cell. 197 * As the tags length is always a non-negative number, to make good use of the sign bit, 198 * the max of tags length is defined 2 * Short.MAX_VALUE + 1 = 65535. 199 * As a result, the return type is int, because a short is not capable of handling that. 200 * Please note that even if the return type is int, the max tags length is far 201 * less than Integer.MAX_VALUE. 202 * 203 * @return the total length of the tags in the Cell. 204 * @deprecated As of HBase-2.0. Will be removed in HBase-3.0. Tags are are now internal. 205 */ 206 @Deprecated 207 int getTagsLength(); 208 209 /** 210 * Returns the type of cell in a human readable format using {@link Type}. 211 * Note : This does not expose the internal types of Cells like {@link KeyValue.Type#Maximum} and 212 * {@link KeyValue.Type#Minimum} 213 * @return The data type this cell: one of Put, Delete, etc 214 */ 215 default Type getType() { 216 byte byteType = getTypeByte(); 217 Type t = Type.CODE_ARRAY[byteType & 0xff]; 218 if (t != null) { 219 return t; 220 } 221 throw new UnsupportedOperationException("Invalid type of cell " + byteType); 222 } 223 224 /** 225 * The valid types for user to build the cell. Currently, This is subset of {@link KeyValue.Type}. 226 */ 227 enum Type { 228 Put((byte) 4), 229 230 Delete((byte) 8), 231 232 DeleteFamilyVersion((byte) 10), 233 234 DeleteColumn((byte) 12), 235 236 DeleteFamily((byte) 14); 237 238 private final byte code; 239 240 Type(final byte c) { 241 this.code = c; 242 } 243 244 public byte getCode() { 245 return this.code; 246 } 247 248 private static final Type[] CODE_ARRAY = new Type[256]; 249 250 static { 251 for (Type t : Type.values()) { 252 CODE_ARRAY[t.code & 0xff] = t; 253 } 254 } 255 } 256}