View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import com.google.common.annotations.VisibleForTesting;
22  import com.google.common.base.Preconditions;
23  import java.io.IOException;
24  import java.util.Collections;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.Locale;
28  import java.util.Map;
29  import java.util.Set;
30
31  import org.apache.hadoop.hbase.classification.InterfaceAudience;
32  import org.apache.hadoop.hbase.classification.InterfaceStability;
33  import org.apache.hadoop.hbase.exceptions.DeserializationException;
34  import org.apache.hadoop.hbase.exceptions.HBaseException;
35  import org.apache.hadoop.hbase.io.compress.Compression;
36  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
37  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
38  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ColumnFamilySchema;
39  import org.apache.hadoop.hbase.regionserver.BloomType;
40  import org.apache.hadoop.hbase.util.Bytes;
41  import org.apache.hadoop.hbase.util.PrettyPrinter;
42  import org.apache.hadoop.hbase.util.PrettyPrinter.Unit;
43
44
45  /**
46   * An HColumnDescriptor contains information about a column family such as the
47   * number of versions, compression settings, etc.
48   *
49   * It is used as input when creating a table or adding a column.
50   */
51  @InterfaceAudience.Public
52  @InterfaceStability.Evolving
53  public class HColumnDescriptor implements Comparable<HColumnDescriptor> {
54    // For future backward compatibility
55
56    // Version  3 was when column names become byte arrays and when we picked up
57    // Time-to-live feature.  Version 4 was when we moved to byte arrays, HBASE-82.
58    // Version  5 was when bloom filter descriptors were removed.
59    // Version  6 adds metadata as a map where keys and values are byte[].
60    // Version  7 -- add new compression and hfile blocksize to HColumnDescriptor (HBASE-1217)
61    // Version  8 -- reintroduction of bloom filters, changed from boolean to enum
62    // Version  9 -- add data block encoding
63    // Version 10 -- change metadata to standard type.
64    // Version 11 -- add column family level configuration.
65    private static final byte COLUMN_DESCRIPTOR_VERSION = (byte) 11;
66
67    public static final String IN_MEMORY_COMPACTION = "IN_MEMORY_COMPACTION";
68
69    // These constants are used as FileInfo keys
70    public static final String COMPRESSION = "COMPRESSION";
71    public static final String COMPRESSION_COMPACT = "COMPRESSION_COMPACT";
72    public static final String ENCODE_ON_DISK = // To be removed, it is not used anymore
73        "ENCODE_ON_DISK";
74    public static final String DATA_BLOCK_ENCODING =
75        "DATA_BLOCK_ENCODING";
76    /**
77     * Key for the BLOCKCACHE attribute.
78     * A more exact name would be CACHE_DATA_ON_READ because this flag sets whether or not we
79     * cache DATA blocks.  We always cache INDEX and BLOOM blocks; caching these blocks cannot be
80     * disabled.
81     */
82    public static final String BLOCKCACHE = "BLOCKCACHE";
83    public static final String CACHE_DATA_ON_WRITE = "CACHE_DATA_ON_WRITE";
84    public static final String CACHE_INDEX_ON_WRITE = "CACHE_INDEX_ON_WRITE";
85    public static final String CACHE_BLOOMS_ON_WRITE = "CACHE_BLOOMS_ON_WRITE";
86    public static final String EVICT_BLOCKS_ON_CLOSE = "EVICT_BLOCKS_ON_CLOSE";
87    /**
88     * Key for cache data into L1 if cache is set up with more than one tier.
89     * To set in the shell, do something like this:
90     * <code>hbase(main):003:0&gt; create 't',
91     *    {NAME =&gt; 't', CONFIGURATION =&gt; {CACHE_DATA_IN_L1 =&gt; 'true'}}</code>
92     */
93    public static final String CACHE_DATA_IN_L1 = "CACHE_DATA_IN_L1";
94
95    /**
96     * Key for the PREFETCH_BLOCKS_ON_OPEN attribute.
97     * If set, all INDEX, BLOOM, and DATA blocks of HFiles belonging to this
98     * family will be loaded into the cache as soon as the file is opened. These
99     * loads will not count as cache misses.
100    */
101   public static final String PREFETCH_BLOCKS_ON_OPEN = "PREFETCH_BLOCKS_ON_OPEN";
102
103   /**
104    * Size of storefile/hfile 'blocks'.  Default is {@link #DEFAULT_BLOCKSIZE}.
105    * Use smaller block sizes for faster random-access at expense of larger
106    * indices (more memory consumption).
107    */
108   public static final String BLOCKSIZE = "BLOCKSIZE";
109
110   public static final String LENGTH = "LENGTH";
111   public static final String TTL = "TTL";
112   public static final String BLOOMFILTER = "BLOOMFILTER";
113   public static final String FOREVER = "FOREVER";
114   public static final String REPLICATION_SCOPE = "REPLICATION_SCOPE";
115   public static final byte[] REPLICATION_SCOPE_BYTES = Bytes.toBytes(REPLICATION_SCOPE);
116   public static final String MIN_VERSIONS = "MIN_VERSIONS";
117   /**
118    * Retain all cells across flushes and compactions even if they fall behind
119    * a delete tombstone. To see all retained cells, do a 'raw' scan; see
120    * Scan#setRaw or pass RAW =&gt; true attribute in the shell.
121    */
122   public static final String KEEP_DELETED_CELLS = "KEEP_DELETED_CELLS";
123   public static final String COMPRESS_TAGS = "COMPRESS_TAGS";
124
125   public static final String ENCRYPTION = "ENCRYPTION";
126   public static final String ENCRYPTION_KEY = "ENCRYPTION_KEY";
127
128   public static final String IS_MOB = "IS_MOB";
129   public static final byte[] IS_MOB_BYTES = Bytes.toBytes(IS_MOB);
130   public static final String MOB_THRESHOLD = "MOB_THRESHOLD";
131   public static final byte[] MOB_THRESHOLD_BYTES = Bytes.toBytes(MOB_THRESHOLD);
132   public static final long DEFAULT_MOB_THRESHOLD = 100 * 1024; // 100k
133
134   public static final String DFS_REPLICATION = "DFS_REPLICATION";
135   public static final short DEFAULT_DFS_REPLICATION = 0;
136
137   /**
138    * Default compression type.
139    */
140   public static final String DEFAULT_COMPRESSION =
141     Compression.Algorithm.NONE.getName();
142
143   /**
144    * Default value of the flag that enables data block encoding on disk, as
145    * opposed to encoding in cache only. We encode blocks everywhere by default,
146    * as long as {@link #DATA_BLOCK_ENCODING} is not NONE.
147    */
148   public static final boolean DEFAULT_ENCODE_ON_DISK = true;
149
150   /** Default data block encoding algorithm. */
151   public static final String DEFAULT_DATA_BLOCK_ENCODING =
152       DataBlockEncoding.NONE.toString();
153
154   /**
155    * Default number of versions of a record to keep.
156    */
157   public static final int DEFAULT_VERSIONS = HBaseConfiguration.create().getInt(
158       "hbase.column.max.version", 1);
159
160   /**
161    * Default is not to keep a minimum of versions.
162    */
163   public static final int DEFAULT_MIN_VERSIONS = 0;
164
165   /*
166    * Cache here the HCD value.
167    * Question: its OK to cache since when we're reenable, we create a new HCD?
168    */
169   private volatile Integer blocksize = null;
170
171   /**
172    * Default setting for whether to try and serve this column family from memory or not.
173    */
174   public static final boolean DEFAULT_IN_MEMORY = false;
175
176   /**
177    * Default setting for whether to set the memstore of this column family as compacting or not.
178    */
179   public static final boolean DEFAULT_IN_MEMORY_COMPACTION = false;
180
181   /**
182    * Default setting for preventing deleted from being collected immediately.
183    */
184   public static final KeepDeletedCells DEFAULT_KEEP_DELETED = KeepDeletedCells.FALSE;
185
186   /**
187    * Default setting for whether to use a block cache or not.
188    */
189   public static final boolean DEFAULT_BLOCKCACHE = true;
190
191   /**
192    * Default setting for whether to cache data blocks on write if block caching
193    * is enabled.
194    */
195   public static final boolean DEFAULT_CACHE_DATA_ON_WRITE = false;
196
197   /**
198    * Default setting for whether to cache data blocks in L1 tier.  Only makes sense if more than
199    * one tier in operations: i.e. if we have an L1 and a L2.  This will be the cases if we are
200    * using BucketCache.
201    */
202   public static final boolean DEFAULT_CACHE_DATA_IN_L1 = false;
203
204   /**
205    * Default setting for whether to cache index blocks on write if block
206    * caching is enabled.
207    */
208   public static final boolean DEFAULT_CACHE_INDEX_ON_WRITE = false;
209
210   /**
211    * Default size of blocks in files stored to the filesytem (hfiles).
212    */
213   public static final int DEFAULT_BLOCKSIZE = HConstants.DEFAULT_BLOCKSIZE;
214
215   /**
216    * Default setting for whether or not to use bloomfilters.
217    */
218   public static final String DEFAULT_BLOOMFILTER = BloomType.ROW.toString();
219
220   /**
221    * Default setting for whether to cache bloom filter blocks on write if block
222    * caching is enabled.
223    */
224   public static final boolean DEFAULT_CACHE_BLOOMS_ON_WRITE = false;
225
226   /**
227    * Default time to live of cell contents.
228    */
229   public static final int DEFAULT_TTL = HConstants.FOREVER;
230
231   /**
232    * Default scope.
233    */
234   public static final int DEFAULT_REPLICATION_SCOPE = HConstants.REPLICATION_SCOPE_LOCAL;
235
236   /**
237    * Default setting for whether to evict cached blocks from the blockcache on
238    * close.
239    */
240   public static final boolean DEFAULT_EVICT_BLOCKS_ON_CLOSE = false;
241
242   /**
243    * Default compress tags along with any type of DataBlockEncoding.
244    */
245   public static final boolean DEFAULT_COMPRESS_TAGS = true;
246
247   /*
248    * Default setting for whether to prefetch blocks into the blockcache on open.
249    */
250   public static final boolean DEFAULT_PREFETCH_BLOCKS_ON_OPEN = false;
251
252   private final static Map<String, String> DEFAULT_VALUES
253     = new HashMap<String, String>();
254   private final static Set<Bytes> RESERVED_KEYWORDS
255       = new HashSet<Bytes>();
256
257   static {
258     DEFAULT_VALUES.put(BLOOMFILTER, DEFAULT_BLOOMFILTER);
259     DEFAULT_VALUES.put(REPLICATION_SCOPE, String.valueOf(DEFAULT_REPLICATION_SCOPE));
260     DEFAULT_VALUES.put(HConstants.VERSIONS, String.valueOf(DEFAULT_VERSIONS));
261     DEFAULT_VALUES.put(MIN_VERSIONS, String.valueOf(DEFAULT_MIN_VERSIONS));
262     DEFAULT_VALUES.put(COMPRESSION, DEFAULT_COMPRESSION);
263     DEFAULT_VALUES.put(TTL, String.valueOf(DEFAULT_TTL));
264     DEFAULT_VALUES.put(BLOCKSIZE, String.valueOf(DEFAULT_BLOCKSIZE));
265     DEFAULT_VALUES.put(HConstants.IN_MEMORY, String.valueOf(DEFAULT_IN_MEMORY));
266     DEFAULT_VALUES.put(IN_MEMORY_COMPACTION, String.valueOf(DEFAULT_IN_MEMORY_COMPACTION));
267     DEFAULT_VALUES.put(BLOCKCACHE, String.valueOf(DEFAULT_BLOCKCACHE));
268     DEFAULT_VALUES.put(KEEP_DELETED_CELLS, String.valueOf(DEFAULT_KEEP_DELETED));
269     DEFAULT_VALUES.put(DATA_BLOCK_ENCODING, String.valueOf(DEFAULT_DATA_BLOCK_ENCODING));
270     DEFAULT_VALUES.put(CACHE_DATA_ON_WRITE, String.valueOf(DEFAULT_CACHE_DATA_ON_WRITE));
271     DEFAULT_VALUES.put(CACHE_DATA_IN_L1, String.valueOf(DEFAULT_CACHE_DATA_IN_L1));
272     DEFAULT_VALUES.put(CACHE_INDEX_ON_WRITE, String.valueOf(DEFAULT_CACHE_INDEX_ON_WRITE));
273     DEFAULT_VALUES.put(CACHE_BLOOMS_ON_WRITE, String.valueOf(DEFAULT_CACHE_BLOOMS_ON_WRITE));
274     DEFAULT_VALUES.put(EVICT_BLOCKS_ON_CLOSE, String.valueOf(DEFAULT_EVICT_BLOCKS_ON_CLOSE));
275     DEFAULT_VALUES.put(PREFETCH_BLOCKS_ON_OPEN, String.valueOf(DEFAULT_PREFETCH_BLOCKS_ON_OPEN));
276     for (String s : DEFAULT_VALUES.keySet()) {
277       RESERVED_KEYWORDS.add(new Bytes(Bytes.toBytes(s)));
278     }
279     RESERVED_KEYWORDS.add(new Bytes(Bytes.toBytes(ENCRYPTION)));
280     RESERVED_KEYWORDS.add(new Bytes(Bytes.toBytes(ENCRYPTION_KEY)));
281     RESERVED_KEYWORDS.add(new Bytes(IS_MOB_BYTES));
282     RESERVED_KEYWORDS.add(new Bytes(MOB_THRESHOLD_BYTES));
283   }
284
285   private static final int UNINITIALIZED = -1;
286
287   // Column family name
288   private byte [] name;
289
290   // Column metadata
291   private final Map<Bytes, Bytes> values =
292       new HashMap<Bytes, Bytes>();
293
294   /**
295    * A map which holds the configuration specific to the column family.
296    * The keys of the map have the same names as config keys and override the defaults with
297    * cf-specific settings. Example usage may be for compactions, etc.
298    */
299   private final Map<String, String> configuration = new HashMap<String, String>();
300
301   /*
302    * Cache the max versions rather than calculate it every time.
303    */
304   private int cachedMaxVersions = UNINITIALIZED;
305
306   /**
307    * Construct a column descriptor specifying only the family name
308    * The other attributes are defaulted.
309    *
310    * @param familyName Column family name. Must be 'printable' -- digit or
311    * letter -- and may not contain a <code>:</code>
312    */
313   public HColumnDescriptor(final String familyName) {
314     this(Bytes.toBytes(familyName));
315   }
316
317   /**
318    * Construct a column descriptor specifying only the family name
319    * The other attributes are defaulted.
320    *
321    * @param familyName Column family name. Must be 'printable' -- digit or
322    * letter -- and may not contain a <code>:</code>
323    */
324   public HColumnDescriptor(final byte [] familyName) {
325     isLegalFamilyName(familyName);
326     this.name = familyName;
327
328     setMaxVersions(DEFAULT_VERSIONS);
329     setMinVersions(DEFAULT_MIN_VERSIONS);
330     setKeepDeletedCells(DEFAULT_KEEP_DELETED);
331     setInMemory(DEFAULT_IN_MEMORY);
332     setInMemoryCompaction(DEFAULT_IN_MEMORY_COMPACTION);
333     setBlockCacheEnabled(DEFAULT_BLOCKCACHE);
334     setTimeToLive(DEFAULT_TTL);
335     setCompressionType(Compression.Algorithm.valueOf(DEFAULT_COMPRESSION.toUpperCase(Locale.ROOT)));
336     setDataBlockEncoding(DataBlockEncoding.valueOf(DEFAULT_DATA_BLOCK_ENCODING.toUpperCase(Locale.ROOT)));
337     setBloomFilterType(BloomType.valueOf(DEFAULT_BLOOMFILTER.toUpperCase(Locale.ROOT)));
338     setBlocksize(DEFAULT_BLOCKSIZE);
339     setScope(DEFAULT_REPLICATION_SCOPE);
340   }
341
342   /**
343    * Constructor.
344    * Makes a deep copy of the supplied descriptor.
345    * Can make a modifiable descriptor from an UnmodifyableHColumnDescriptor.
346    * @param desc The descriptor.
347    */
348   public HColumnDescriptor(HColumnDescriptor desc) {
349     super();
350     this.name = desc.name.clone();
351     for (Map.Entry<Bytes, Bytes> e :
352         desc.values.entrySet()) {
353       this.values.put(e.getKey(), e.getValue());
354     }
355     for (Map.Entry<String, String> e : desc.configuration.entrySet()) {
356       this.configuration.put(e.getKey(), e.getValue());
357     }
358     setMaxVersions(desc.getMaxVersions());
359   }
360
361   /**
362    * @param b Family name.
363    * @return <code>b</code>
364    * @throws IllegalArgumentException If not null and not a legitimate family
365    * name: i.e. 'printable' and ends in a ':' (Null passes are allowed because
366    * <code>b</code> can be null when deserializing).  Cannot start with a '.'
367    * either. Also Family can not be an empty value or equal "recovered.edits".
368    */
369   public static byte [] isLegalFamilyName(final byte [] b) {
370     if (b == null) {
371       return b;
372     }
373     Preconditions.checkArgument(b.length != 0, "Family name can not be empty");
374     if (b[0] == '.') {
375       throw new IllegalArgumentException("Family names cannot start with a " +
376         "period: " + Bytes.toString(b));
377     }
378     for (int i = 0; i < b.length; i++) {
379       if (Character.isISOControl(b[i]) || b[i] == ':' || b[i] == '\\' || b[i] == '/') {
380         throw new IllegalArgumentException("Illegal character <" + b[i] +
381           ">. Family names cannot contain control characters or colons: " +
382           Bytes.toString(b));
383       }
384     }
385     byte[] recoveredEdit = Bytes.toBytes(HConstants.RECOVERED_EDITS_DIR);
386     if (Bytes.equals(recoveredEdit, b)) {
387       throw new IllegalArgumentException("Family name cannot be: " +
388           HConstants.RECOVERED_EDITS_DIR);
389     }
390     return b;
391   }
392
393   /**
394    * @return Name of this column family
395    */
396   public byte [] getName() {
397     return name;
398   }
399
400   /**
401    * @return Name of this column family
402    */
403   public String getNameAsString() {
404     return Bytes.toString(this.name);
405   }
406
407   /**
408    * @param key The key.
409    * @return The value.
410    */
411   public byte[] getValue(byte[] key) {
412     Bytes ibw = values.get(new Bytes(key));
413     if (ibw == null)
414       return null;
415     return ibw.get();
416   }
417
418   /**
419    * @param key The key.
420    * @return The value as a string.
421    */
422   public String getValue(String key) {
423     byte[] value = getValue(Bytes.toBytes(key));
424     if (value == null)
425       return null;
426     return Bytes.toString(value);
427   }
428
429   /**
430    * @return All values.
431    */
432   public Map<Bytes, Bytes> getValues() {
433     // shallow pointer copy
434     return Collections.unmodifiableMap(values);
435   }
436
437   /**
438    * @param key The key.
439    * @param value The value.
440    * @return this (for chained invocation)
441    */
442   public HColumnDescriptor setValue(byte[] key, byte[] value) {
443     if (Bytes.compareTo(Bytes.toBytes(HConstants.VERSIONS), key) == 0) {
444       cachedMaxVersions = UNINITIALIZED;
445     }
446     values.put(new Bytes(key),
447         new Bytes(value));
448     return this;
449   }
450
451   /**
452    * @param key Key whose key and value we're to remove from HCD parameters.
453    */
454   public void remove(final byte [] key) {
455     values.remove(new Bytes(key));
456   }
457
458   /**
459    * @param key The key.
460    * @param value The value.
461    * @return this (for chained invocation)
462    */
463   public HColumnDescriptor setValue(String key, String value) {
464     if (value == null) {
465       remove(Bytes.toBytes(key));
466     } else {
467       setValue(Bytes.toBytes(key), Bytes.toBytes(value));
468     }
469     return this;
470   }
471
472   /**
473    * @return compression type being used for the column family
474    * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0
475    *             (<a href="https://issues.apache.org/jira/browse/HBASE-13655">HBASE-13655</a>).
476    *             Use {@link #getCompressionType()}.
477    */
478   @Deprecated
479   public Compression.Algorithm getCompression() {
480     return getCompressionType();
481   }
482
483   /**
484    *  @return compression type being used for the column family for major compaction
485    *  @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0
486    *             (<a href="https://issues.apache.org/jira/browse/HBASE-13655">HBASE-13655</a>).
487    *             Use {@link #getCompactionCompressionType()}.
488    */
489   @Deprecated
490   public Compression.Algorithm getCompactionCompression() {
491     return getCompactionCompressionType();
492   }
493
494   /** @return maximum number of versions */
495   public int getMaxVersions() {
496     if (this.cachedMaxVersions == UNINITIALIZED) {
497       String v = getValue(HConstants.VERSIONS);
498       this.cachedMaxVersions = Integer.parseInt(v);
499     }
500     return this.cachedMaxVersions;
501   }
502
503   /**
504    * @param maxVersions maximum number of versions
505    * @return this (for chained invocation)
506    */
507   public HColumnDescriptor setMaxVersions(int maxVersions) {
508     if (maxVersions <= 0) {
509       // TODO: Allow maxVersion of 0 to be the way you say "Keep all versions".
510       // Until there is support, consider 0 or < 0 -- a configuration error.
511       throw new IllegalArgumentException("Maximum versions must be positive");
512     }
513     if (maxVersions < this.getMinVersions()) {
514         throw new IllegalArgumentException("Set MaxVersion to " + maxVersions
515             + " while minVersion is " + this.getMinVersions()
516             + ". Maximum versions must be >= minimum versions ");
517     }
518     setValue(HConstants.VERSIONS, Integer.toString(maxVersions));
519     cachedMaxVersions = maxVersions;
520     return this;
521   }
522
523   /**
524    * Set minimum and maximum versions to keep
525    *
526    * @param minVersions minimal number of versions
527    * @param maxVersions maximum number of versions
528    * @return this (for chained invocation)
529    */
530   public HColumnDescriptor setVersions(int minVersions, int maxVersions) {
531     if (minVersions <= 0) {
532       // TODO: Allow minVersion and maxVersion of 0 to be the way you say "Keep all versions".
533       // Until there is support, consider 0 or < 0 -- a configuration error.
534       throw new IllegalArgumentException("Minimum versions must be positive");
535     }
536
537     if (maxVersions < minVersions) {
538       throw new IllegalArgumentException("Unable to set MaxVersion to " + maxVersions
539         + " and set MinVersion to " + minVersions
540         + ", as maximum versions must be >= minimum versions.");
541     }
542     setMinVersions(minVersions);
543     setMaxVersions(maxVersions);
544     return this;
545   }
546
547   /**
548    * @return The storefile/hfile blocksize for this column family.
549    */
550   public synchronized int getBlocksize() {
551     if (this.blocksize == null) {
552       String value = getValue(BLOCKSIZE);
553       this.blocksize = (value != null)?
554         Integer.decode(value): Integer.valueOf(DEFAULT_BLOCKSIZE);
555     }
556     return this.blocksize.intValue();
557
558   }
559
560   /**
561    * @param s Blocksize to use when writing out storefiles/hfiles on this
562    * column family.
563    * @return this (for chained invocation)
564    */
565   public HColumnDescriptor setBlocksize(int s) {
566     setValue(BLOCKSIZE, Integer.toString(s));
567     this.blocksize = null;
568     return this;
569   }
570
571   /**
572    * @return Compression type setting.
573    */
574   public Compression.Algorithm getCompressionType() {
575     String n = getValue(COMPRESSION);
576     if (n == null) {
577       return Compression.Algorithm.NONE;
578     }
579     return Compression.Algorithm.valueOf(n.toUpperCase(Locale.ROOT));
580   }
581
582   /**
583    * Compression types supported in hbase.
584    * LZO is not bundled as part of the hbase distribution.
585    * See <a href="http://wiki.apache.org/hadoop/UsingLzoCompression">LZO Compression</a>
586    * for how to enable it.
587    * @param type Compression type setting.
588    * @return this (for chained invocation)
589    */
590   public HColumnDescriptor setCompressionType(Compression.Algorithm type) {
591     return setValue(COMPRESSION, type.getName().toUpperCase(Locale.ROOT));
592   }
593
594   /**
595    * @return the data block encoding algorithm used in block cache and
596    *         optionally on disk
597    */
598   public DataBlockEncoding getDataBlockEncoding() {
599     String type = getValue(DATA_BLOCK_ENCODING);
600     if (type == null) {
601       type = DEFAULT_DATA_BLOCK_ENCODING;
602     }
603     return DataBlockEncoding.valueOf(type);
604   }
605
606   /**
607    * Set data block encoding algorithm used in block cache.
608    * @param type What kind of data block encoding will be used.
609    * @return this (for chained invocation)
610    */
611   public HColumnDescriptor setDataBlockEncoding(DataBlockEncoding type) {
612     String name;
613     if (type != null) {
614       name = type.toString();
615     } else {
616       name = DataBlockEncoding.NONE.toString();
617     }
618     return setValue(DATA_BLOCK_ENCODING, name);
619   }
620
621   /**
622    * Set whether the tags should be compressed along with DataBlockEncoding. When no
623    * DataBlockEncoding is been used, this is having no effect.
624    *
625    * @param compressTags
626    * @return this (for chained invocation)
627    */
628   public HColumnDescriptor setCompressTags(boolean compressTags) {
629     return setValue(COMPRESS_TAGS, String.valueOf(compressTags));
630   }
631
632   /**
633    * @return Whether KV tags should be compressed along with DataBlockEncoding. When no
634    *         DataBlockEncoding is been used, this is having no effect.
635    */
636   public boolean isCompressTags() {
637     String compressTagsStr = getValue(COMPRESS_TAGS);
638     boolean compressTags = DEFAULT_COMPRESS_TAGS;
639     if (compressTagsStr != null) {
640       compressTags = Boolean.parseBoolean(compressTagsStr);
641     }
642     return compressTags;
643   }
644
645   /**
646    * @return Compression type setting.
647    */
648   public Compression.Algorithm getCompactionCompressionType() {
649     String n = getValue(COMPRESSION_COMPACT);
650     if (n == null) {
651       return getCompressionType();
652     }
653     return Compression.Algorithm.valueOf(n.toUpperCase(Locale.ROOT));
654   }
655
656   /**
657    * Compression types supported in hbase.
658    * LZO is not bundled as part of the hbase distribution.
659    * See <a href="http://wiki.apache.org/hadoop/UsingLzoCompression">LZO Compression</a>
660    * for how to enable it.
661    * @param type Compression type setting.
662    * @return this (for chained invocation)
663    */
664   public HColumnDescriptor setCompactionCompressionType(
665       Compression.Algorithm type) {
666     return setValue(COMPRESSION_COMPACT, type.getName().toUpperCase(Locale.ROOT));
667   }
668
669   /**
670    * @return True if we are to favor keeping all values for this column family in the
671    * HRegionServer cache.
672    */
673   public boolean isInMemory() {
674     String value = getValue(HConstants.IN_MEMORY);
675     if (value != null) {
676       return Boolean.parseBoolean(value);
677     }
678     return DEFAULT_IN_MEMORY;
679   }
680
681   /**
682    * @param inMemory True if we are to favor keeping all values for this column family in the
683    * HRegionServer cache
684    * @return this (for chained invocation)
685    */
686   public HColumnDescriptor setInMemory(boolean inMemory) {
687     return setValue(HConstants.IN_MEMORY, Boolean.toString(inMemory));
688   }
689
690   /**
691    * @return True if we prefer to keep the in-memory data compacted
692    *          for this column family
693    */
694   public boolean isInMemoryCompaction() {
695     String value = getValue(IN_MEMORY_COMPACTION);
696     if (value != null) {
697       return Boolean.parseBoolean(value);
698     }
699     return DEFAULT_IN_MEMORY_COMPACTION;
700   }
701
702   /**
703    * @param inMemoryCompaction True if we prefer to keep the in-memory data compacted
704    *                  for this column family
705    * @return this (for chained invocation)
706    */
707   public HColumnDescriptor setInMemoryCompaction(boolean inMemoryCompaction) {
708     return setValue(IN_MEMORY_COMPACTION, Boolean.toString(inMemoryCompaction));
709   }
710
711   public KeepDeletedCells getKeepDeletedCells() {
712     String value = getValue(KEEP_DELETED_CELLS);
713     if (value != null) {
714       // toUpperCase for backwards compatibility
715       return KeepDeletedCells.valueOf(value.toUpperCase(Locale.ROOT));
716     }
717     return DEFAULT_KEEP_DELETED;
718   }
719
720   /**
721    * @param keepDeletedCells True if deleted rows should not be collected
722    * immediately.
723    * @return this (for chained invocation)
724    */
725   public HColumnDescriptor setKeepDeletedCells(KeepDeletedCells keepDeletedCells) {
726     return setValue(KEEP_DELETED_CELLS, keepDeletedCells.toString());
727   }
728
729   /**
730    * @return Time-to-live of cell contents, in seconds.
731    */
732   public int getTimeToLive() {
733     String value = getValue(TTL);
734     return (value != null)? Integer.parseInt(value) : DEFAULT_TTL;
735   }
736
737   /**
738    * @param timeToLive Time-to-live of cell contents, in seconds.
739    * @return this (for chained invocation)
740    */
741   public HColumnDescriptor setTimeToLive(int timeToLive) {
742     return setValue(TTL, Integer.toString(timeToLive));
743   }
744
745   /**
746    * @param timeToLive Time to live of cell contents, in human readable format
747    *                   @see org.apache.hadoop.hbase.util.PrettyPrinter#format(String, Unit)
748    * @return this (for chained invocation)
749    */
750   public HColumnDescriptor setTimeToLive(String timeToLive) throws HBaseException {
751     return setValue(TTL, PrettyPrinter.valueOf(timeToLive, Unit.TIME_INTERVAL));
752   }
753
754   /**
755    * @return The minimum number of versions to keep.
756    */
757   public int getMinVersions() {
758     String value = getValue(MIN_VERSIONS);
759     return (value != null)? Integer.parseInt(value) : 0;
760   }
761
762   /**
763    * @param minVersions The minimum number of versions to keep.
764    * (used when timeToLive is set)
765    * @return this (for chained invocation)
766    */
767   public HColumnDescriptor setMinVersions(int minVersions) {
768     return setValue(MIN_VERSIONS, Integer.toString(minVersions));
769   }
770
771   /**
772    * @return True if hfile DATA type blocks should be cached (You cannot disable caching of INDEX
773    * and BLOOM type blocks).
774    */
775   public boolean isBlockCacheEnabled() {
776     String value = getValue(BLOCKCACHE);
777     if (value != null) {
778       return Boolean.parseBoolean(value);
779     }
780     return DEFAULT_BLOCKCACHE;
781   }
782
783   /**
784    * @param blockCacheEnabled True if hfile DATA type blocks should be cached (We always cache
785    * INDEX and BLOOM blocks; you cannot turn this off).
786    * @return this (for chained invocation)
787    */
788   public HColumnDescriptor setBlockCacheEnabled(boolean blockCacheEnabled) {
789     return setValue(BLOCKCACHE, Boolean.toString(blockCacheEnabled));
790   }
791
792   /**
793    * @return bloom filter type used for new StoreFiles in ColumnFamily
794    */
795   public BloomType getBloomFilterType() {
796     String n = getValue(BLOOMFILTER);
797     if (n == null) {
798       n = DEFAULT_BLOOMFILTER;
799     }
800     return BloomType.valueOf(n.toUpperCase(Locale.ROOT));
801   }
802
803   /**
804    * @param bt bloom filter type
805    * @return this (for chained invocation)
806    */
807   public HColumnDescriptor setBloomFilterType(final BloomType bt) {
808     return setValue(BLOOMFILTER, bt.toString());
809   }
810
811    /**
812     * @return the scope tag
813     */
814   public int getScope() {
815     byte[] value = getValue(REPLICATION_SCOPE_BYTES);
816     if (value != null) {
817       return Integer.parseInt(Bytes.toString(value));
818     }
819     return DEFAULT_REPLICATION_SCOPE;
820   }
821
822  /**
823   * @param scope the scope tag
824   * @return this (for chained invocation)
825   */
826   public HColumnDescriptor setScope(int scope) {
827     return setValue(REPLICATION_SCOPE, Integer.toString(scope));
828   }
829
830   /**
831    * @return true if we should cache data blocks on write
832    */
833   public boolean isCacheDataOnWrite() {
834     return setAndGetBoolean(CACHE_DATA_ON_WRITE, DEFAULT_CACHE_DATA_ON_WRITE);
835   }
836
837   /**
838    * @param value true if we should cache data blocks on write
839    * @return this (for chained invocation)
840    */
841   public HColumnDescriptor setCacheDataOnWrite(boolean value) {
842     return setValue(CACHE_DATA_ON_WRITE, Boolean.toString(value));
843   }
844
845   /**
846    * @return true if we should cache data blocks in the L1 cache (if block cache deploy has more
847    *         than one tier; e.g. we are using CombinedBlockCache).
848    */
849   public boolean isCacheDataInL1() {
850     return setAndGetBoolean(CACHE_DATA_IN_L1, DEFAULT_CACHE_DATA_IN_L1);
851   }
852
853   /**
854    * @param value true if we should cache data blocks in the L1 cache (if block cache deploy
855    * has more than one tier; e.g. we are using CombinedBlockCache).
856    * @return this (for chained invocation)
857    */
858   public HColumnDescriptor setCacheDataInL1(boolean value) {
859     return setValue(CACHE_DATA_IN_L1, Boolean.toString(value));
860   }
861
862   private boolean setAndGetBoolean(final String key, final boolean defaultSetting) {
863     String value = getValue(key);
864     if (value != null) {
865       return Boolean.parseBoolean(value);
866     }
867     return defaultSetting;
868   }
869
870   /**
871    * @return true if we should cache index blocks on write
872    */
873   public boolean isCacheIndexesOnWrite() {
874     return setAndGetBoolean(CACHE_INDEX_ON_WRITE, DEFAULT_CACHE_INDEX_ON_WRITE);
875   }
876
877   /**
878    * @param value true if we should cache index blocks on write
879    * @return this (for chained invocation)
880    */
881   public HColumnDescriptor setCacheIndexesOnWrite(boolean value) {
882     return setValue(CACHE_INDEX_ON_WRITE, Boolean.toString(value));
883   }
884
885   /**
886    * @return true if we should cache bloomfilter blocks on write
887    */
888   public boolean isCacheBloomsOnWrite() {
889     return setAndGetBoolean(CACHE_BLOOMS_ON_WRITE, DEFAULT_CACHE_BLOOMS_ON_WRITE);
890   }
891
892   /**
893    * @param value true if we should cache bloomfilter blocks on write
894    * @return this (for chained invocation)
895    */
896   public HColumnDescriptor setCacheBloomsOnWrite(boolean value) {
897     return setValue(CACHE_BLOOMS_ON_WRITE, Boolean.toString(value));
898   }
899
900   /**
901    * @return true if we should evict cached blocks from the blockcache on close
902    */
903   public boolean isEvictBlocksOnClose() {
904     return setAndGetBoolean(EVICT_BLOCKS_ON_CLOSE, DEFAULT_EVICT_BLOCKS_ON_CLOSE);
905   }
906
907   /**
908    * @param value true if we should evict cached blocks from the blockcache on
909    * close
910    * @return this (for chained invocation)
911    */
912   public HColumnDescriptor setEvictBlocksOnClose(boolean value) {
913     return setValue(EVICT_BLOCKS_ON_CLOSE, Boolean.toString(value));
914   }
915
916   /**
917    * @return true if we should prefetch blocks into the blockcache on open
918    */
919   public boolean isPrefetchBlocksOnOpen() {
920     return setAndGetBoolean(PREFETCH_BLOCKS_ON_OPEN, DEFAULT_PREFETCH_BLOCKS_ON_OPEN);
921   }
922
923   /**
924    * @param value true if we should prefetch blocks into the blockcache on open
925    * @return this (for chained invocation)
926    */
927   public HColumnDescriptor setPrefetchBlocksOnOpen(boolean value) {
928     return setValue(PREFETCH_BLOCKS_ON_OPEN, Boolean.toString(value));
929   }
930
931   /**
932    * @see java.lang.Object#toString()
933    */
934   @Override
935   public String toString() {
936     StringBuilder s = new StringBuilder();
937
938     s.append('{');
939     s.append(HConstants.NAME);
940     s.append(" => '");
941     s.append(Bytes.toString(name));
942     s.append("'");
943     s.append(getValues(true));
944     s.append('}');
945     return s.toString();
946   }
947
948   /**
949    * @return Column family descriptor with only the customized attributes.
950    */
951   public String toStringCustomizedValues() {
952     StringBuilder s = new StringBuilder();
953     s.append('{');
954     s.append(HConstants.NAME);
955     s.append(" => '");
956     s.append(Bytes.toString(name));
957     s.append("'");
958     s.append(getValues(false));
959     s.append('}');
960     return s.toString();
961   }
962
963   private StringBuilder getValues(boolean printDefaults) {
964     StringBuilder s = new StringBuilder();
965
966     boolean hasConfigKeys = false;
967
968     // print all reserved keys first
969     for (Map.Entry<Bytes, Bytes> entry : values.entrySet()) {
970       if (!RESERVED_KEYWORDS.contains(entry.getKey())) {
971         hasConfigKeys = true;
972         continue;
973       }
974       String key = Bytes.toString(entry.getKey().get());
975       String value = Bytes.toStringBinary(entry.getValue().get());
976       if (printDefaults
977           || !DEFAULT_VALUES.containsKey(key)
978           || !DEFAULT_VALUES.get(key).equalsIgnoreCase(value)) {
979         s.append(", ");
980         s.append(key);
981         s.append(" => ");
982         s.append('\'').append(PrettyPrinter.format(value, getUnit(key))).append('\'');
983       }
984     }
985
986     // print all non-reserved, advanced config keys as a separate subset
987     if (hasConfigKeys) {
988       s.append(", ");
989       s.append(HConstants.METADATA).append(" => ");
990       s.append('{');
991       boolean printComma = false;
992       for (Bytes k : values.keySet()) {
993         if (RESERVED_KEYWORDS.contains(k)) {
994           continue;
995         }
996         String key = Bytes.toString(k.get());
997         String value = Bytes.toStringBinary(values.get(k).get());
998         if (printComma) {
999           s.append(", ");
1000         }
1001         printComma = true;
1002         s.append('\'').append(key).append('\'');
1003         s.append(" => ");
1004         s.append('\'').append(PrettyPrinter.format(value, getUnit(key))).append('\'');
1005       }
1006       s.append('}');
1007     }
1008
1009     if (!configuration.isEmpty()) {
1010       s.append(", ");
1011       s.append(HConstants.CONFIGURATION).append(" => ");
1012       s.append('{');
1013       boolean printCommaForConfiguration = false;
1014       for (Map.Entry<String, String> e : configuration.entrySet()) {
1015         if (printCommaForConfiguration) s.append(", ");
1016         printCommaForConfiguration = true;
1017         s.append('\'').append(e.getKey()).append('\'');
1018         s.append(" => ");
1019         s.append('\'').append(PrettyPrinter.format(e.getValue(), getUnit(e.getKey()))).append('\'');
1020       }
1021       s.append("}");
1022     }
1023     return s;
1024   }
1025
1026   public static Unit getUnit(String key) {
1027     Unit unit;
1028       /* TTL for now, we can add more as we neeed */
1029     if (key.equals(HColumnDescriptor.TTL)) {
1030       unit = Unit.TIME_INTERVAL;
1031     } else if (key.equals(HColumnDescriptor.MOB_THRESHOLD)) {
1032       unit = Unit.LONG;
1033     } else if (key.equals(HColumnDescriptor.IS_MOB)) {
1034       unit = Unit.BOOLEAN;
1035     } else {
1036       unit = Unit.NONE;
1037     }
1038     return unit;
1039   }
1040
1041   public static Map<String, String> getDefaultValues() {
1042     return Collections.unmodifiableMap(DEFAULT_VALUES);
1043   }
1044
1045   /**
1046    * @see java.lang.Object#equals(java.lang.Object)
1047    */
1048   @Override
1049   public boolean equals(Object obj) {
1050     if (this == obj) {
1051       return true;
1052     }
1053     if (obj == null) {
1054       return false;
1055     }
1056     if (!(obj instanceof HColumnDescriptor)) {
1057       return false;
1058     }
1059     return compareTo((HColumnDescriptor)obj) == 0;
1060   }
1061
1062   /**
1063    * @see java.lang.Object#hashCode()
1064    */
1065   @Override
1066   public int hashCode() {
1067     int result = Bytes.hashCode(this.name);
1068     result ^= (int) COLUMN_DESCRIPTOR_VERSION;
1069     result ^= values.hashCode();
1070     result ^= configuration.hashCode();
1071     return result;
1072   }
1073
1074   // Comparable
1075   @Override
1076   public int compareTo(HColumnDescriptor o) {
1077     int result = Bytes.compareTo(this.name, o.getName());
1078     if (result == 0) {
1079       // punt on comparison for ordering, just calculate difference
1080       result = this.values.hashCode() - o.values.hashCode();
1081       if (result < 0)
1082         result = -1;
1083       else if (result > 0)
1084         result = 1;
1085     }
1086     if (result == 0) {
1087       result = this.configuration.hashCode() - o.configuration.hashCode();
1088       if (result < 0)
1089         result = -1;
1090       else if (result > 0)
1091         result = 1;
1092     }
1093     return result;
1094   }
1095
1096   /**
1097    * @return This instance serialized with pb with pb magic prefix
1098    * @see #parseFrom(byte[])
1099    */
1100   public byte[] toByteArray() {
1101     return ProtobufUtil
1102         .prependPBMagic(ProtobufUtil.convertToColumnFamilySchema(this).toByteArray());
1103   }
1104
1105   /**
1106    * @param bytes A pb serialized {@link HColumnDescriptor} instance with pb magic prefix
1107    * @return An instance of {@link HColumnDescriptor} made from <code>bytes</code>
1108    * @throws DeserializationException
1109    * @see #toByteArray()
1110    */
1111   public static HColumnDescriptor parseFrom(final byte [] bytes) throws DeserializationException {
1112     if (!ProtobufUtil.isPBMagicPrefix(bytes)) throw new DeserializationException("No magic");
1113     int pblen = ProtobufUtil.lengthOfPBMagic();
1114     ColumnFamilySchema.Builder builder = ColumnFamilySchema.newBuilder();
1115     ColumnFamilySchema cfs = null;
1116     try {
1117       ProtobufUtil.mergeFrom(builder, bytes, pblen, bytes.length - pblen);
1118       cfs = builder.build();
1119     } catch (IOException e) {
1120       throw new DeserializationException(e);
1121     }
1122     return ProtobufUtil.convertToHColumnDesc(cfs);
1123   }
1124
1125   /**
1126    * Getter for accessing the configuration value by key.
1127    */
1128   public String getConfigurationValue(String key) {
1129     return configuration.get(key);
1130   }
1131
1132   /**
1133    * Getter for fetching an unmodifiable {@link #configuration} map.
1134    */
1135   public Map<String, String> getConfiguration() {
1136     // shallow pointer copy
1137     return Collections.unmodifiableMap(configuration);
1138   }
1139
1140   /**
1141    * Setter for storing a configuration setting in {@link #configuration} map.
1142    * @param key Config key. Same as XML config key e.g. hbase.something.or.other.
1143    * @param value String value. If null, removes the configuration.
1144    */
1145   public HColumnDescriptor setConfiguration(String key, String value) {
1146     if (value == null) {
1147       removeConfiguration(key);
1148     } else {
1149       configuration.put(key, value);
1150     }
1151     return this;
1152   }
1153
1154   /**
1155    * Remove a configuration setting represented by the key from the {@link #configuration} map.
1156    */
1157   public void removeConfiguration(final String key) {
1158     configuration.remove(key);
1159   }
1160
1161   /**
1162    * Return the encryption algorithm in use by this family
1163    */
1164   public String getEncryptionType() {
1165     return getValue(ENCRYPTION);
1166   }
1167
1168   /**
1169    * Set the encryption algorithm for use with this family
1170    * @param algorithm
1171    */
1172   public HColumnDescriptor setEncryptionType(String algorithm) {
1173     setValue(ENCRYPTION, algorithm);
1174     return this;
1175   }
1176
1177   /** Return the raw crypto key attribute for the family, or null if not set  */
1178   public byte[] getEncryptionKey() {
1179     return getValue(Bytes.toBytes(ENCRYPTION_KEY));
1180   }
1181
1182   /** Set the raw crypto key attribute for the family */
1183   public HColumnDescriptor setEncryptionKey(byte[] keyBytes) {
1184     setValue(Bytes.toBytes(ENCRYPTION_KEY), keyBytes);
1185     return this;
1186   }
1187
1188   /**
1189    * Gets the mob threshold of the family.
1190    * If the size of a cell value is larger than this threshold, it's regarded as a mob.
1191    * The default threshold is 1024*100(100K)B.
1192    * @return The mob threshold.
1193    */
1194   public long getMobThreshold() {
1195     byte[] threshold = getValue(MOB_THRESHOLD_BYTES);
1196     return threshold != null && threshold.length == Bytes.SIZEOF_LONG ? Bytes.toLong(threshold)
1197         : DEFAULT_MOB_THRESHOLD;
1198   }
1199
1200   /**
1201    * Sets the mob threshold of the family.
1202    * @param threshold The mob threshold.
1203    * @return this (for chained invocation)
1204    */
1205   public HColumnDescriptor setMobThreshold(long threshold) {
1206     setValue(MOB_THRESHOLD_BYTES, Bytes.toBytes(threshold));
1207     return this;
1208   }
1209
1210   /**
1211    * Gets whether the mob is enabled for the family.
1212    * @return True if the mob is enabled for the family.
1213    */
1214   public boolean isMobEnabled() {
1215     byte[] isMobEnabled = getValue(IS_MOB_BYTES);
1216     return isMobEnabled != null && isMobEnabled.length == Bytes.SIZEOF_BOOLEAN
1217         && Bytes.toBoolean(isMobEnabled);
1218   }
1219
1220   /**
1221    * Enables the mob for the family.
1222    * @param isMobEnabled Whether to enable the mob for the family.
1223    * @return this (for chained invocation)
1224    */
1225   public HColumnDescriptor setMobEnabled(boolean isMobEnabled) {
1226     setValue(IS_MOB_BYTES, Bytes.toBytes(isMobEnabled));
1227     return this;
1228   }
1229
1230   /**
1231    * @return replication factor set for this CF or {@link #DEFAULT_DFS_REPLICATION} if not set.
1232    *         <p>
1233    *         {@link #DEFAULT_DFS_REPLICATION} value indicates that user has explicitly not set any
1234    *         block replication factor for this CF, hence use the default replication factor set in
1235    *         the file system.
1236    */
1237   public short getDFSReplication() {
1238     String rf = getValue(DFS_REPLICATION);
1239     return rf == null ? DEFAULT_DFS_REPLICATION : Short.valueOf(rf);
1240   }
1241
1242   /**
1243    * Set the replication factor to hfile(s) belonging to this family
1244    * @param replication number of replicas the blocks(s) belonging to this CF should have, or
1245    *          {@link #DEFAULT_DFS_REPLICATION} for the default replication factor set in the
1246    *          filesystem
1247    * @return this (for chained invocation)
1248    */
1249   public HColumnDescriptor setDFSReplication(short replication) {
1250     if (replication < 1 && replication != DEFAULT_DFS_REPLICATION) {
1251       throw new IllegalArgumentException(
1252           "DFS replication factor cannot be less than 1 if explictly set.");
1253     }
1254     setValue(DFS_REPLICATION, Short.toString(replication));
1255     return this;
1256   }
1257 }