001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.io.util;
020
021import java.nio.ByteBuffer;
022import java.util.HashMap;
023
024import org.apache.hadoop.hbase.util.ByteBufferUtils;
025import org.apache.hadoop.hbase.util.Bytes;
026import org.apache.yetus.audience.InterfaceAudience;
027
028import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
029
030/**
031 * WALDictionary using an LRU eviction algorithm. Uses a linked list running
032 * through a hashtable.  Currently has max of 2^15 entries.  Will start
033 * evicting if exceeds this number  The maximum memory we expect this dictionary
034 * to take in the worst case is about:
035 * <code>(2 ^ 15) * 5 (Regionname, Row key, CF, Column qual, table) * 100 bytes 
036 * (these are some big names) = ~16MB</code>.
037 * If you want to get silly, even at 1kb entries, it maxes out at 160 megabytes.
038 */
039@InterfaceAudience.Private
040public class LRUDictionary implements Dictionary {
041
042  BidirectionalLRUMap backingStore;
043  @Override
044  public byte[] getEntry(short idx) {
045    return backingStore.get(idx);
046  }
047
048  @Override
049  public void init(int initialSize) {
050    backingStore = new BidirectionalLRUMap(initialSize);
051  }
052  @Override
053  public short findEntry(byte[] data, int offset, int length) {
054    short ret = backingStore.findIdx(data, offset, length);
055    if (ret == NOT_IN_DICTIONARY) {
056      addEntry(data, offset, length);
057    }
058    return ret;
059  }
060
061  @Override
062  public short addEntry(byte[] data, int offset, int length) {
063    return addEntryInternal(data, offset, length, true);
064  }
065
066  private short addEntryInternal(byte[] data, int offset, int length, boolean copy) {
067    if (length <= 0) return NOT_IN_DICTIONARY;
068    return backingStore.put(data, offset, length, copy);
069  }
070
071  @Override
072  public void clear() {
073    backingStore.clear();
074  }
075
076  /*
077   * Internal class used to implement LRU eviction and dual lookup (by key and
078   * value).
079   * 
080   * This is not thread safe. Don't use in multi-threaded applications.
081   */
082  static class BidirectionalLRUMap {
083    private int currSize = 0;
084
085    // Head and tail of the LRU list.
086    private Node head;
087    private Node tail;
088
089    private HashMap<Node, Short> nodeToIndex = new HashMap<>();
090    private Node[] indexToNode;
091    private int initSize = 0;
092
093    public BidirectionalLRUMap(int initialSize) {
094      initSize = initialSize;
095      indexToNode = new Node[initialSize];
096    }
097
098    private short put(byte[] array, int offset, int length, boolean copy) {
099      if (copy) {
100        // We copy the bytes we want, otherwise we might be holding references to
101        // massive arrays in our dictionary (or those arrays might change)
102        byte[] stored = new byte[length];
103        Bytes.putBytes(stored, 0, array, offset, length);
104        return putInternal(stored);
105      } else {
106        return putInternal(array);
107      }
108    }
109
110    private short putInternal(byte[] stored) {
111      if (currSize < initSize) {
112        // There is space to add without evicting.
113        if (indexToNode[currSize] == null) {
114          indexToNode[currSize] = new ByteArrayBackedNode();
115        }
116        indexToNode[currSize].setContents(stored, 0, stored.length);
117        setHead(indexToNode[currSize]);
118        short ret = (short) currSize++;
119        nodeToIndex.put(indexToNode[ret], ret);
120        return ret;
121      } else {
122        short s = nodeToIndex.remove(tail);
123        tail.setContents(stored, 0, stored.length);
124        // we need to rehash this.
125        nodeToIndex.put(tail, s);
126        moveToHead(tail);
127        return s;
128      }
129    }
130
131    private short findIdx(byte[] array, int offset, int length) {
132      Short s;
133      final Node comparisonNode = new ByteArrayBackedNode();
134      comparisonNode.setContents(array, offset, length);
135      if ((s = nodeToIndex.get(comparisonNode)) != null) {
136        moveToHead(indexToNode[s]);
137        return s;
138      } else {
139        return -1;
140      }
141    }
142
143    private short findIdx(ByteBuffer buf, int offset, int length) {
144      Short s;
145      final ByteBufferBackedNode comparisonNode = new ByteBufferBackedNode();
146      comparisonNode.setContents(buf, offset, length);
147      if ((s = nodeToIndex.get(comparisonNode)) != null) {
148        moveToHead(indexToNode[s]);
149        return s;
150      } else {
151        return -1;
152      }
153    }
154
155    private byte[] get(short idx) {
156      Preconditions.checkElementIndex(idx, currSize);
157      moveToHead(indexToNode[idx]);
158      return indexToNode[idx].getContents();
159    }
160
161    private void moveToHead(Node n) {
162      if (head == n) {
163        // no-op -- it's already the head.
164        return;
165      }
166      // At this point we definitely have prev, since it's not the head.
167      assert n.prev != null;
168      // Unlink prev.
169      n.prev.next = n.next;
170
171      // Unlink next
172      if (n.next != null) {
173        n.next.prev = n.prev;
174      } else {
175        assert n == tail;
176        tail = n.prev;
177      }
178      // Node is now removed from the list. Re-add it at the head.
179      setHead(n);
180    }
181
182    private void setHead(Node n) {
183      // assume it's already unlinked from the list at this point.
184      n.prev = null;
185      n.next = head;
186      if (head != null) {
187        assert head.prev == null;
188        head.prev = n;
189      }
190
191      head = n;
192
193      // First entry
194      if (tail == null) {
195        tail = n;
196      }
197    }
198
199    private void clear() {
200      for (int i = 0; i < currSize; i++) {
201        indexToNode[i].next = null;
202        indexToNode[i].prev = null;
203        indexToNode[i].resetContents();
204      }
205      currSize = 0;
206      nodeToIndex.clear();
207      tail = null;
208      head = null;
209    }
210
211    private static abstract class Node {
212      int offset;
213      int length;
214      Node next; // link towards the tail
215      Node prev; // link towards the head
216      abstract void setContents(byte[] container, int offset, int length);
217      abstract byte[] getContents();
218      abstract void resetContents();
219    }
220    // The actual contents of the LRUDictionary are of ByteArrayBackedNode type 
221    private static class ByteArrayBackedNode extends Node {
222      private byte[] container;
223
224      @Override
225      void setContents(byte[] container, int offset, int length) {
226        this.container = container;
227        this.offset = offset;
228        this.length = length;
229      }
230
231      @Override
232      byte[] getContents() {
233        return this.container;
234      }
235
236      @Override
237      public int hashCode() {
238        return Bytes.hashCode(container, offset, length);
239      }
240
241      @Override
242      void resetContents() {
243        this.container = null;
244      }
245
246      @Override
247      public boolean equals(Object other) {
248        if (!(other instanceof Node)) {
249          return false;
250        }
251
252        Node casted = (Node) other;
253        return Bytes.equals(container, offset, length, casted.getContents(),
254            casted.offset, casted.length);
255      }
256    }
257
258    // Currently only used for finding the index and hence this node acts
259    // as a temporary holder to look up in the indexToNode map
260    // which is formed by ByteArrayBackedNode
261    private static class ByteBufferBackedNode extends Node {
262      private ByteBuffer container;
263
264      public ByteBufferBackedNode() {
265      }
266
267      @Override
268      void setContents(byte[] container, int offset, int length) {
269        this.container = ByteBuffer.wrap(container);
270        this.offset = offset;
271        this.length = length;
272      }
273
274      void setContents(ByteBuffer container, int offset, int length) {
275        this.container = container;
276        this.offset = offset;
277        this.length = length;
278      }
279
280      @Override
281      void resetContents() {
282        this.container = null;
283      }
284
285      @Override
286      byte[] getContents() {
287        // This ideally should not be called
288        byte[] copy = new byte[this.length];
289        ByteBufferUtils.copyFromBufferToArray(copy, (ByteBuffer) this.container, this.offset, 0,
290            length);
291        return copy;
292      }
293
294      @Override
295      public int hashCode() {
296        return ByteBufferUtils.hashCode(container, offset, length);
297      }
298
299      @Override
300      public boolean equals(Object other) {
301        if (!(other instanceof Node)) {
302          return false;
303        }
304        // This was done to avoid findbugs comment
305        Node casted = (Node) other;
306        // The other side should be a byte array backed node only as we add only
307        // ByteArrayBackedNode to the indexToNode map.
308        return ByteBufferUtils.equals(this.container, offset, length,
309            casted.getContents(), casted.offset, casted.length);
310      }
311    }
312  }
313
314  @Override
315  public short findEntry(ByteBuffer data, int offset, int length) {
316    short ret = backingStore.findIdx(data, offset, length);
317    if (ret == NOT_IN_DICTIONARY) {
318      byte[] copy = new byte[length];
319      ByteBufferUtils.copyFromBufferToArray(copy, data, offset, 0, length);
320      addEntryInternal(copy, 0, length, false);
321    }
322    return ret;
323  }
324}