001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase.io.util; 020 021import java.io.IOException; 022import java.io.OutputStream; 023import java.nio.ByteBuffer; 024 025import org.apache.hadoop.hbase.util.ByteBufferUtils; 026import org.apache.yetus.audience.InterfaceAudience; 027 028/** 029 * Dictionary interface 030 * 031 * Dictionary indexes should be either bytes or shorts, only positive. (The 032 * first bit is reserved for detecting whether something is compressed or not). 033 */ 034@InterfaceAudience.Private 035public interface Dictionary { 036 byte NOT_IN_DICTIONARY = -1; 037 038 void init(int initialSize); 039 /** 040 * Gets an entry from the dictionary. 041 * 042 * @param idx index of the entry 043 * @return the entry, or null if non existent 044 */ 045 byte[] getEntry(short idx); 046 047 /** 048 * Finds the index of an entry. 049 * If no entry found, we add it. 050 * 051 * @param data the byte array that we're looking up 052 * @param offset Offset into <code>data</code> to add to Dictionary. 053 * @param length Length beyond <code>offset</code> that comprises entry; must be > 0. 054 * @return the index of the entry, or {@link #NOT_IN_DICTIONARY} if not found 055 */ 056 short findEntry(byte[] data, int offset, int length); 057 058 /** 059 * Finds the index of an entry. 060 * If no entry found, we add it. 061 * @param data the ByteBuffer that we're looking up 062 * @param offset Offset into <code>data</code> to add to Dictionary. 063 * @param length Length beyond <code>offset</code> that comprises entry; must be > 0. 064 * @return the index of the entry, or {@link #NOT_IN_DICTIONARY} if not found 065 */ 066 short findEntry(ByteBuffer data, int offset, int length); 067 068 /** 069 * Adds an entry to the dictionary. 070 * Be careful using this method. It will add an entry to the 071 * dictionary even if it already has an entry for the same data. 072 * Call {{@link #findEntry(byte[], int, int)}} to add without duplicating 073 * dictionary entries. 074 * 075 * @param data the entry to add 076 * @param offset Offset into <code>data</code> to add to Dictionary. 077 * @param length Length beyond <code>offset</code> that comprises entry; must be > 0. 078 * @return the index of the entry 079 */ 080 short addEntry(byte[] data, int offset, int length); 081 082 /** 083 * Flushes the dictionary, empties all values. 084 */ 085 void clear(); 086 087 /** 088 * Helper methods to write the dictionary data to the OutputStream 089 * @param out the outputstream to which data needs to be written 090 * @param data the data to be written in byte[] 091 * @param offset the offset 092 * @param length length to be written 093 * @param dict the dictionary whose contents are to written 094 * @throws IOException 095 */ 096 public static void write(OutputStream out, byte[] data, int offset, int length, Dictionary dict) 097 throws IOException { 098 short dictIdx = Dictionary.NOT_IN_DICTIONARY; 099 if (dict != null) { 100 dictIdx = dict.findEntry(data, offset, length); 101 } 102 if (dictIdx == Dictionary.NOT_IN_DICTIONARY) { 103 out.write(Dictionary.NOT_IN_DICTIONARY); 104 StreamUtils.writeRawVInt32(out, length); 105 out.write(data, offset, length); 106 } else { 107 StreamUtils.writeShort(out, dictIdx); 108 } 109 } 110 111 /** 112 * Helper methods to write the dictionary data to the OutputStream 113 * @param out the outputstream to which data needs to be written 114 * @param data the data to be written in ByteBuffer 115 * @param offset the offset 116 * @param length length to be written 117 * @param dict the dictionary whose contents are to written 118 * @throws IOException 119 */ 120 public static void write(OutputStream out, ByteBuffer data, int offset, int length, 121 Dictionary dict) throws IOException { 122 short dictIdx = Dictionary.NOT_IN_DICTIONARY; 123 if (dict != null) { 124 dictIdx = dict.findEntry(data, offset, length); 125 } 126 if (dictIdx == Dictionary.NOT_IN_DICTIONARY) { 127 out.write(Dictionary.NOT_IN_DICTIONARY); 128 StreamUtils.writeRawVInt32(out, length); 129 ByteBufferUtils.copyBufferToStream(out, data, offset, length); 130 } else { 131 StreamUtils.writeShort(out, dictIdx); 132 } 133 } 134}