001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.encoding; 019 020import java.io.IOException; 021import java.io.OutputStream; 022import org.apache.hadoop.hbase.util.Bytes; 023import org.apache.yetus.audience.InterfaceAudience; 024 025/** 026 * Provide access to all data block encoding algorithms. All of the algorithms are required to have 027 * unique id which should <b>NEVER</b> be changed. If you want to add a new algorithm/version, 028 * assign it a new id. Announce the new id in the HBase mailing list to prevent collisions. 029 */ 030@InterfaceAudience.Public 031@SuppressWarnings("ImmutableEnumChecker") 032public enum DataBlockEncoding { 033 034 /** Disable data block encoding. */ 035 NONE(0, null), 036 // id 1 is reserved for the BITSET algorithm to be added later 037 PREFIX(2, "org.apache.hadoop.hbase.io.encoding.PrefixKeyDeltaEncoder"), 038 DIFF(3, "org.apache.hadoop.hbase.io.encoding.DiffKeyDeltaEncoder"), 039 FAST_DIFF(4, "org.apache.hadoop.hbase.io.encoding.FastDiffDeltaEncoder"), 040 // id 5 is reserved for the COPY_KEY algorithm for benchmarking 041 // COPY_KEY(5, "org.apache.hadoop.hbase.io.encoding.CopyKeyDataBlockEncoder"), 042 // PREFIX_TREE(6, "org.apache.hadoop.hbase.codec.prefixtree.PrefixTreeCodec"), 043 ROW_INDEX_V1(7, "org.apache.hadoop.hbase.io.encoding.RowIndexCodecV1"); 044 045 private final short id; 046 private final byte[] idInBytes; 047 private DataBlockEncoder encoder; 048 private final String encoderCls; 049 050 public static final int ID_SIZE = Bytes.SIZEOF_SHORT; 051 052 /** Maps data block encoding ids to enum instances. */ 053 private static DataBlockEncoding[] idArray = new DataBlockEncoding[Byte.MAX_VALUE + 1]; 054 055 static { 056 for (DataBlockEncoding algo : values()) { 057 if (idArray[algo.id] != null) { 058 throw new RuntimeException( 059 String.format("Two data block encoder algorithms '%s' and '%s' have " + "the same id %d", 060 idArray[algo.id].toString(), algo.toString(), (int) algo.id)); 061 } 062 idArray[algo.id] = algo; 063 } 064 } 065 066 private DataBlockEncoding(int id, String encoderClsName) { 067 if (id < 0 || id > Byte.MAX_VALUE) { 068 throw new AssertionError("Data block encoding algorithm id is out of range: " + id); 069 } 070 this.id = (short) id; 071 this.idInBytes = Bytes.toBytes(this.id); 072 if (idInBytes.length != ID_SIZE) { 073 // White this may seem redundant, if we accidentally serialize 074 // the id as e.g. an int instead of a short, all encoders will break. 075 throw new RuntimeException("Unexpected length of encoder ID byte " + "representation: " 076 + Bytes.toStringBinary(idInBytes)); 077 } 078 this.encoderCls = encoderClsName; 079 } 080 081 /** Returns name converted to bytes. */ 082 public byte[] getNameInBytes() { 083 return Bytes.toBytes(toString()); 084 } 085 086 /** Returns The id of a data block encoder. */ 087 public short getId() { 088 return id; 089 } 090 091 /** 092 * Writes id in bytes. 093 * @param stream where the id should be written. 094 */ 095 public void writeIdInBytes(OutputStream stream) throws IOException { 096 stream.write(idInBytes); 097 } 098 099 /** 100 * Writes id bytes to the given array starting from offset. 101 * @param dest output array 102 * @param offset starting offset of the output array n 103 */ 104 // System.arraycopy is static native. Nothing we can do this until we have minimum JDK 9. 105 @SuppressWarnings("UnsafeFinalization") 106 public void writeIdInBytes(byte[] dest, int offset) throws IOException { 107 System.arraycopy(idInBytes, 0, dest, offset, ID_SIZE); 108 } 109 110 /** 111 * Return new data block encoder for given algorithm type. 112 * @return data block encoder if algorithm is specified, null if none is selected. 113 */ 114 public DataBlockEncoder getEncoder() { 115 if (encoder == null && id != 0) { 116 // lazily create the encoder 117 encoder = createEncoder(encoderCls); 118 } 119 return encoder; 120 } 121 122 /** 123 * Find and create data block encoder for given id; 124 * @param encoderId id of data block encoder. 125 * @return Newly created data block encoder. 126 */ 127 public static DataBlockEncoder getDataBlockEncoderById(short encoderId) { 128 return getEncodingById(encoderId).getEncoder(); 129 } 130 131 /** 132 * Find and return the name of data block encoder for the given id. 133 * @param encoderId id of data block encoder 134 * @return name, same as used in options in column family 135 */ 136 public static String getNameFromId(short encoderId) { 137 return getEncodingById(encoderId).toString(); 138 } 139 140 /** 141 * Check if given encoder has this id. 142 * @param encoder encoder which id will be checked 143 * @param encoderId id which we except 144 * @return true if id is right for given encoder, false otherwise 145 * @exception IllegalArgumentException thrown when there is no matching data block encoder 146 */ 147 public static boolean isCorrectEncoder(DataBlockEncoder encoder, short encoderId) { 148 DataBlockEncoding algorithm = getEncodingById(encoderId); 149 String encoderCls = encoder.getClass().getName(); 150 return encoderCls.equals(algorithm.encoderCls); 151 } 152 153 public static DataBlockEncoding getEncodingById(short dataBlockEncodingId) { 154 DataBlockEncoding algorithm = null; 155 if (dataBlockEncodingId >= 0 && dataBlockEncodingId <= Byte.MAX_VALUE) { 156 algorithm = idArray[dataBlockEncodingId]; 157 } 158 if (algorithm == null) { 159 throw new IllegalArgumentException(String 160 .format("There is no data block encoder for given id '%d'", (int) dataBlockEncodingId)); 161 } 162 return algorithm; 163 } 164 165 static DataBlockEncoder createEncoder(String fullyQualifiedClassName) { 166 try { 167 return Class.forName(fullyQualifiedClassName).asSubclass(DataBlockEncoder.class) 168 .getDeclaredConstructor().newInstance(); 169 } catch (Exception e) { 170 throw new RuntimeException(e); 171 } 172 } 173 174}