001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.io.util; 019 020import java.io.IOException; 021import java.io.OutputStream; 022import java.nio.ByteBuffer; 023import org.apache.hadoop.hbase.util.ByteBufferUtils; 024import org.apache.yetus.audience.InterfaceAudience; 025 026/** 027 * Dictionary interface Dictionary indexes should be either bytes or shorts, only positive. (The 028 * first bit is reserved for detecting whether something is compressed or not). 029 */ 030@InterfaceAudience.Private 031public interface Dictionary { 032 byte NOT_IN_DICTIONARY = -1; 033 034 void init(int initialSize); 035 036 /** 037 * Gets an entry from the dictionary. 038 * @param idx index of the entry 039 * @return the entry, or null if non existent 040 */ 041 byte[] getEntry(short idx); 042 043 /** 044 * Finds the index of an entry. If no entry found, we add it. 045 * @param data the byte array that we're looking up 046 * @param offset Offset into <code>data</code> to add to Dictionary. 047 * @param length Length beyond <code>offset</code> that comprises entry; must be > 0. 048 * @return the index of the entry, or {@link #NOT_IN_DICTIONARY} if not found 049 */ 050 short findEntry(byte[] data, int offset, int length); 051 052 /** 053 * Finds the index of an entry. If no entry found, we add it. 054 * @param data the ByteBuffer that we're looking up 055 * @param offset Offset into <code>data</code> to add to Dictionary. 056 * @param length Length beyond <code>offset</code> that comprises entry; must be > 0. 057 * @return the index of the entry, or {@link #NOT_IN_DICTIONARY} if not found 058 */ 059 short findEntry(ByteBuffer data, int offset, int length); 060 061 /** 062 * Adds an entry to the dictionary. Be careful using this method. It will add an entry to the 063 * dictionary even if it already has an entry for the same data. Call 064 * {{@link #findEntry(byte[], int, int)}} to add without duplicating dictionary entries. 065 * @param data the entry to add 066 * @param offset Offset into <code>data</code> to add to Dictionary. 067 * @param length Length beyond <code>offset</code> that comprises entry; must be > 0. 068 * @return the index of the entry 069 */ 070 short addEntry(byte[] data, int offset, int length); 071 072 /** 073 * Flushes the dictionary, empties all values. 074 */ 075 void clear(); 076 077 /** 078 * Helper methods to write the dictionary data to the OutputStream 079 * @param out the outputstream to which data needs to be written 080 * @param data the data to be written in byte[] 081 * @param offset the offset 082 * @param length length to be written 083 * @param dict the dictionary whose contents are to written n 084 */ 085 public static void write(OutputStream out, byte[] data, int offset, int length, Dictionary dict) 086 throws IOException { 087 short dictIdx = Dictionary.NOT_IN_DICTIONARY; 088 if (dict != null) { 089 dictIdx = dict.findEntry(data, offset, length); 090 } 091 if (dictIdx == Dictionary.NOT_IN_DICTIONARY) { 092 out.write(Dictionary.NOT_IN_DICTIONARY); 093 StreamUtils.writeRawVInt32(out, length); 094 out.write(data, offset, length); 095 } else { 096 StreamUtils.writeShort(out, dictIdx); 097 } 098 } 099 100 /** 101 * Helper methods to write the dictionary data to the OutputStream 102 * @param out the outputstream to which data needs to be written 103 * @param data the data to be written in ByteBuffer 104 * @param offset the offset 105 * @param length length to be written 106 * @param dict the dictionary whose contents are to written n 107 */ 108 public static void write(OutputStream out, ByteBuffer data, int offset, int length, 109 Dictionary dict) throws IOException { 110 short dictIdx = Dictionary.NOT_IN_DICTIONARY; 111 if (dict != null) { 112 dictIdx = dict.findEntry(data, offset, length); 113 } 114 if (dictIdx == Dictionary.NOT_IN_DICTIONARY) { 115 out.write(Dictionary.NOT_IN_DICTIONARY); 116 StreamUtils.writeRawVInt32(out, length); 117 ByteBufferUtils.copyBufferToStream(out, data, offset, length); 118 } else { 119 StreamUtils.writeShort(out, dictIdx); 120 } 121 } 122}