View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import com.google.common.annotations.VisibleForTesting;
22  import com.google.common.base.Preconditions;
23  import java.io.IOException;
24  import java.util.Collections;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.Map;
28  import java.util.Set;
29
30  import org.apache.hadoop.hbase.classification.InterfaceAudience;
31  import org.apache.hadoop.hbase.classification.InterfaceStability;
32  import org.apache.hadoop.hbase.exceptions.DeserializationException;
33  import org.apache.hadoop.hbase.exceptions.HBaseException;
34  import org.apache.hadoop.hbase.io.compress.Compression;
35  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
36  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
37  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ColumnFamilySchema;
38  import org.apache.hadoop.hbase.regionserver.BloomType;
39  import org.apache.hadoop.hbase.util.Bytes;
40  import org.apache.hadoop.hbase.util.PrettyPrinter;
41  import org.apache.hadoop.hbase.util.PrettyPrinter.Unit;
42  
43
44  /**
45   * An HColumnDescriptor contains information about a column family such as the
46   * number of versions, compression settings, etc.
47   *
48   * It is used as input when creating a table or adding a column.
49   */
50  @InterfaceAudience.Public
51  @InterfaceStability.Evolving
52  public class HColumnDescriptor implements Comparable<HColumnDescriptor> {
53    // For future backward compatibility
54
55    // Version  3 was when column names become byte arrays and when we picked up
56    // Time-to-live feature.  Version 4 was when we moved to byte arrays, HBASE-82.
57    // Version  5 was when bloom filter descriptors were removed.
58    // Version  6 adds metadata as a map where keys and values are byte[].
59    // Version  7 -- add new compression and hfile blocksize to HColumnDescriptor (HBASE-1217)
60    // Version  8 -- reintroduction of bloom filters, changed from boolean to enum
61    // Version  9 -- add data block encoding
62    // Version 10 -- change metadata to standard type.
63    // Version 11 -- add column family level configuration.
64    private static final byte COLUMN_DESCRIPTOR_VERSION = (byte) 11;
65
66    private static final String IN_MEMORY_COMPACTION = "IN_MEMORY_COMPACTION";
67
68    // These constants are used as FileInfo keys
69    public static final String COMPRESSION = "COMPRESSION";
70    public static final String COMPRESSION_COMPACT = "COMPRESSION_COMPACT";
71    public static final String ENCODE_ON_DISK = // To be removed, it is not used anymore
72        "ENCODE_ON_DISK";
73    public static final String DATA_BLOCK_ENCODING =
74        "DATA_BLOCK_ENCODING";
75    /**
76     * Key for the BLOCKCACHE attribute.
77     * A more exact name would be CACHE_DATA_ON_READ because this flag sets whether or not we
78     * cache DATA blocks.  We always cache INDEX and BLOOM blocks; caching these blocks cannot be
79     * disabled.
80     */
81    public static final String BLOCKCACHE = "BLOCKCACHE";
82    public static final String CACHE_DATA_ON_WRITE = "CACHE_DATA_ON_WRITE";
83    public static final String CACHE_INDEX_ON_WRITE = "CACHE_INDEX_ON_WRITE";
84    public static final String CACHE_BLOOMS_ON_WRITE = "CACHE_BLOOMS_ON_WRITE";
85    public static final String EVICT_BLOCKS_ON_CLOSE = "EVICT_BLOCKS_ON_CLOSE";
86    /**
87     * Key for cache data into L1 if cache is set up with more than one tier.
88     * To set in the shell, do something like this:
89     * <code>hbase(main):003:0&gt; create 't',
90     *    {NAME =&gt; 't', CONFIGURATION =&gt; {CACHE_DATA_IN_L1 =&gt; 'true'}}</code>
91     */
92    public static final String CACHE_DATA_IN_L1 = "CACHE_DATA_IN_L1";
93
94    /**
95     * Key for the PREFETCH_BLOCKS_ON_OPEN attribute.
96     * If set, all INDEX, BLOOM, and DATA blocks of HFiles belonging to this
97     * family will be loaded into the cache as soon as the file is opened. These
98     * loads will not count as cache misses.
99     */
100   public static final String PREFETCH_BLOCKS_ON_OPEN = "PREFETCH_BLOCKS_ON_OPEN";
101
102   /**
103    * Size of storefile/hfile 'blocks'.  Default is {@link #DEFAULT_BLOCKSIZE}.
104    * Use smaller block sizes for faster random-access at expense of larger
105    * indices (more memory consumption).
106    */
107   public static final String BLOCKSIZE = "BLOCKSIZE";
108
109   public static final String LENGTH = "LENGTH";
110   public static final String TTL = "TTL";
111   public static final String BLOOMFILTER = "BLOOMFILTER";
112   public static final String FOREVER = "FOREVER";
113   public static final String REPLICATION_SCOPE = "REPLICATION_SCOPE";
114   public static final byte[] REPLICATION_SCOPE_BYTES = Bytes.toBytes(REPLICATION_SCOPE);
115   public static final String MIN_VERSIONS = "MIN_VERSIONS";
116   /**
117    * Retain all cells across flushes and compactions even if they fall behind
118    * a delete tombstone. To see all retained cells, do a 'raw' scan; see
119    * Scan#setRaw or pass RAW =&gt; true attribute in the shell.
120    */
121   public static final String KEEP_DELETED_CELLS = "KEEP_DELETED_CELLS";
122   public static final String COMPRESS_TAGS = "COMPRESS_TAGS";
123 
124   public static final String ENCRYPTION = "ENCRYPTION";
125   public static final String ENCRYPTION_KEY = "ENCRYPTION_KEY";
126
127   public static final String IS_MOB = "IS_MOB";
128   public static final byte[] IS_MOB_BYTES = Bytes.toBytes(IS_MOB);
129   public static final String MOB_THRESHOLD = "MOB_THRESHOLD";
130   public static final byte[] MOB_THRESHOLD_BYTES = Bytes.toBytes(MOB_THRESHOLD);
131   public static final long DEFAULT_MOB_THRESHOLD = 100 * 1024; // 100k
132 
133   public static final String DFS_REPLICATION = "DFS_REPLICATION";
134   public static final short DEFAULT_DFS_REPLICATION = 0;
135
136   /**
137    * Default compression type.
138    */
139   public static final String DEFAULT_COMPRESSION =
140     Compression.Algorithm.NONE.getName();
141
142   /**
143    * Default value of the flag that enables data block encoding on disk, as
144    * opposed to encoding in cache only. We encode blocks everywhere by default,
145    * as long as {@link #DATA_BLOCK_ENCODING} is not NONE.
146    */
147   public static final boolean DEFAULT_ENCODE_ON_DISK = true;
148
149   /** Default data block encoding algorithm. */
150   public static final String DEFAULT_DATA_BLOCK_ENCODING =
151       DataBlockEncoding.NONE.toString();
152
153   /**
154    * Default number of versions of a record to keep.
155    */
156   public static final int DEFAULT_VERSIONS = HBaseConfiguration.create().getInt(
157       "hbase.column.max.version", 1);
158
159   /**
160    * Default is not to keep a minimum of versions.
161    */
162   public static final int DEFAULT_MIN_VERSIONS = 0;
163
164   /*
165    * Cache here the HCD value.
166    * Question: its OK to cache since when we're reenable, we create a new HCD?
167    */
168   private volatile Integer blocksize = null;
169
170   /**
171    * Default setting for whether to try and serve this column family from memory or not.
172    */
173   public static final boolean DEFAULT_IN_MEMORY = false;
174
175   /**
176    * Default setting for whether to set the memstore of this column family as compacting or not.
177    */
178   public static final boolean DEFAULT_IN_MEMORY_COMPACTION = false;
179
180   /**
181    * Default setting for preventing deleted from being collected immediately.
182    */
183   public static final KeepDeletedCells DEFAULT_KEEP_DELETED = KeepDeletedCells.FALSE;
184
185   /**
186    * Default setting for whether to use a block cache or not.
187    */
188   public static final boolean DEFAULT_BLOCKCACHE = true;
189
190   /**
191    * Default setting for whether to cache data blocks on write if block caching
192    * is enabled.
193    */
194   public static final boolean DEFAULT_CACHE_DATA_ON_WRITE = false;
195
196   /**
197    * Default setting for whether to cache data blocks in L1 tier.  Only makes sense if more than
198    * one tier in operations: i.e. if we have an L1 and a L2.  This will be the cases if we are
199    * using BucketCache.
200    */
201   public static final boolean DEFAULT_CACHE_DATA_IN_L1 = false;
202
203   /**
204    * Default setting for whether to cache index blocks on write if block
205    * caching is enabled.
206    */
207   public static final boolean DEFAULT_CACHE_INDEX_ON_WRITE = false;
208
209   /**
210    * Default size of blocks in files stored to the filesytem (hfiles).
211    */
212   public static final int DEFAULT_BLOCKSIZE = HConstants.DEFAULT_BLOCKSIZE;
213
214   /**
215    * Default setting for whether or not to use bloomfilters.
216    */
217   public static final String DEFAULT_BLOOMFILTER = BloomType.ROW.toString();
218
219   /**
220    * Default setting for whether to cache bloom filter blocks on write if block
221    * caching is enabled.
222    */
223   public static final boolean DEFAULT_CACHE_BLOOMS_ON_WRITE = false;
224
225   /**
226    * Default time to live of cell contents.
227    */
228   public static final int DEFAULT_TTL = HConstants.FOREVER;
229
230   /**
231    * Default scope.
232    */
233   public static final int DEFAULT_REPLICATION_SCOPE = HConstants.REPLICATION_SCOPE_LOCAL;
234
235   /**
236    * Default setting for whether to evict cached blocks from the blockcache on
237    * close.
238    */
239   public static final boolean DEFAULT_EVICT_BLOCKS_ON_CLOSE = false;
240
241   /**
242    * Default compress tags along with any type of DataBlockEncoding.
243    */
244   public static final boolean DEFAULT_COMPRESS_TAGS = true;
245
246   /*
247    * Default setting for whether to prefetch blocks into the blockcache on open.
248    */
249   public static final boolean DEFAULT_PREFETCH_BLOCKS_ON_OPEN = false;
250
251   private final static Map<String, String> DEFAULT_VALUES
252     = new HashMap<String, String>();
253   private final static Set<Bytes> RESERVED_KEYWORDS
254       = new HashSet<Bytes>();
255
256   static {
257     DEFAULT_VALUES.put(BLOOMFILTER, DEFAULT_BLOOMFILTER);
258     DEFAULT_VALUES.put(REPLICATION_SCOPE, String.valueOf(DEFAULT_REPLICATION_SCOPE));
259     DEFAULT_VALUES.put(HConstants.VERSIONS, String.valueOf(DEFAULT_VERSIONS));
260     DEFAULT_VALUES.put(MIN_VERSIONS, String.valueOf(DEFAULT_MIN_VERSIONS));
261     DEFAULT_VALUES.put(COMPRESSION, DEFAULT_COMPRESSION);
262     DEFAULT_VALUES.put(TTL, String.valueOf(DEFAULT_TTL));
263     DEFAULT_VALUES.put(BLOCKSIZE, String.valueOf(DEFAULT_BLOCKSIZE));
264     DEFAULT_VALUES.put(HConstants.IN_MEMORY, String.valueOf(DEFAULT_IN_MEMORY));
265     DEFAULT_VALUES.put(IN_MEMORY_COMPACTION, String.valueOf(DEFAULT_IN_MEMORY_COMPACTION));
266     DEFAULT_VALUES.put(BLOCKCACHE, String.valueOf(DEFAULT_BLOCKCACHE));
267     DEFAULT_VALUES.put(KEEP_DELETED_CELLS, String.valueOf(DEFAULT_KEEP_DELETED));
268     DEFAULT_VALUES.put(DATA_BLOCK_ENCODING, String.valueOf(DEFAULT_DATA_BLOCK_ENCODING));
269     DEFAULT_VALUES.put(CACHE_DATA_ON_WRITE, String.valueOf(DEFAULT_CACHE_DATA_ON_WRITE));
270     DEFAULT_VALUES.put(CACHE_DATA_IN_L1, String.valueOf(DEFAULT_CACHE_DATA_IN_L1));
271     DEFAULT_VALUES.put(CACHE_INDEX_ON_WRITE, String.valueOf(DEFAULT_CACHE_INDEX_ON_WRITE));
272     DEFAULT_VALUES.put(CACHE_BLOOMS_ON_WRITE, String.valueOf(DEFAULT_CACHE_BLOOMS_ON_WRITE));
273     DEFAULT_VALUES.put(EVICT_BLOCKS_ON_CLOSE, String.valueOf(DEFAULT_EVICT_BLOCKS_ON_CLOSE));
274     DEFAULT_VALUES.put(PREFETCH_BLOCKS_ON_OPEN, String.valueOf(DEFAULT_PREFETCH_BLOCKS_ON_OPEN));
275     for (String s : DEFAULT_VALUES.keySet()) {
276       RESERVED_KEYWORDS.add(new Bytes(Bytes.toBytes(s)));
277     }
278     RESERVED_KEYWORDS.add(new Bytes(Bytes.toBytes(ENCRYPTION)));
279     RESERVED_KEYWORDS.add(new Bytes(Bytes.toBytes(ENCRYPTION_KEY)));
280     RESERVED_KEYWORDS.add(new Bytes(IS_MOB_BYTES));
281     RESERVED_KEYWORDS.add(new Bytes(MOB_THRESHOLD_BYTES));
282   }
283 
284   private static final int UNINITIALIZED = -1;
285
286   // Column family name
287   private byte [] name;
288
289   // Column metadata
290   private final Map<Bytes, Bytes> values =
291       new HashMap<Bytes, Bytes>();
292
293   /**
294    * A map which holds the configuration specific to the column family.
295    * The keys of the map have the same names as config keys and override the defaults with
296    * cf-specific settings. Example usage may be for compactions, etc.
297    */
298   private final Map<String, String> configuration = new HashMap<String, String>();
299
300   /*
301    * Cache the max versions rather than calculate it every time.
302    */
303   private int cachedMaxVersions = UNINITIALIZED;
304
305   /**
306    * Construct a column descriptor specifying only the family name
307    * The other attributes are defaulted.
308    *
309    * @param familyName Column family name. Must be 'printable' -- digit or
310    * letter -- and may not contain a <code>:</code>
311    */
312   public HColumnDescriptor(final String familyName) {
313     this(Bytes.toBytes(familyName));
314   }
315
316   /**
317    * Construct a column descriptor specifying only the family name
318    * The other attributes are defaulted.
319    *
320    * @param familyName Column family name. Must be 'printable' -- digit or
321    * letter -- and may not contain a <code>:</code>
322    */
323   public HColumnDescriptor(final byte [] familyName) {
324     isLegalFamilyName(familyName);
325     this.name = familyName;
326
327     setMaxVersions(DEFAULT_VERSIONS);
328     setMinVersions(DEFAULT_MIN_VERSIONS);
329     setKeepDeletedCells(DEFAULT_KEEP_DELETED);
330     setInMemory(DEFAULT_IN_MEMORY);
331     setInMemoryCompaction(DEFAULT_IN_MEMORY_COMPACTION);
332     setBlockCacheEnabled(DEFAULT_BLOCKCACHE);
333     setTimeToLive(DEFAULT_TTL);
334     setCompressionType(Compression.Algorithm.valueOf(DEFAULT_COMPRESSION.toUpperCase()));
335     setDataBlockEncoding(DataBlockEncoding.valueOf(DEFAULT_DATA_BLOCK_ENCODING.toUpperCase()));
336     setBloomFilterType(BloomType.valueOf(DEFAULT_BLOOMFILTER.toUpperCase()));
337     setBlocksize(DEFAULT_BLOCKSIZE);
338     setScope(DEFAULT_REPLICATION_SCOPE);
339   }
340
341   /**
342    * Constructor.
343    * Makes a deep copy of the supplied descriptor.
344    * Can make a modifiable descriptor from an UnmodifyableHColumnDescriptor.
345    * @param desc The descriptor.
346    */
347   public HColumnDescriptor(HColumnDescriptor desc) {
348     super();
349     this.name = desc.name.clone();
350     for (Map.Entry<Bytes, Bytes> e :
351         desc.values.entrySet()) {
352       this.values.put(e.getKey(), e.getValue());
353     }
354     for (Map.Entry<String, String> e : desc.configuration.entrySet()) {
355       this.configuration.put(e.getKey(), e.getValue());
356     }
357     setMaxVersions(desc.getMaxVersions());
358   }
359
360   /**
361    * @param b Family name.
362    * @return <code>b</code>
363    * @throws IllegalArgumentException If not null and not a legitimate family
364    * name: i.e. 'printable' and ends in a ':' (Null passes are allowed because
365    * <code>b</code> can be null when deserializing).  Cannot start with a '.'
366    * either. Also Family can not be an empty value or equal "recovered.edits".
367    */
368   public static byte [] isLegalFamilyName(final byte [] b) {
369     if (b == null) {
370       return b;
371     }
372     Preconditions.checkArgument(b.length != 0, "Family name can not be empty");
373     if (b[0] == '.') {
374       throw new IllegalArgumentException("Family names cannot start with a " +
375         "period: " + Bytes.toString(b));
376     }
377     for (int i = 0; i < b.length; i++) {
378       if (Character.isISOControl(b[i]) || b[i] == ':' || b[i] == '\\' || b[i] == '/') {
379         throw new IllegalArgumentException("Illegal character <" + b[i] +
380           ">. Family names cannot contain control characters or colons: " +
381           Bytes.toString(b));
382       }
383     }
384     byte[] recoveredEdit = Bytes.toBytes(HConstants.RECOVERED_EDITS_DIR);
385     if (Bytes.equals(recoveredEdit, b)) {
386       throw new IllegalArgumentException("Family name cannot be: " +
387           HConstants.RECOVERED_EDITS_DIR);
388     }
389     return b;
390   }
391
392   /**
393    * @return Name of this column family
394    */
395   public byte [] getName() {
396     return name;
397   }
398
399   /**
400    * @return Name of this column family
401    */
402   public String getNameAsString() {
403     return Bytes.toString(this.name);
404   }
405
406   /**
407    * @param key The key.
408    * @return The value.
409    */
410   public byte[] getValue(byte[] key) {
411     Bytes ibw = values.get(new Bytes(key));
412     if (ibw == null)
413       return null;
414     return ibw.get();
415   }
416
417   /**
418    * @param key The key.
419    * @return The value as a string.
420    */
421   public String getValue(String key) {
422     byte[] value = getValue(Bytes.toBytes(key));
423     if (value == null)
424       return null;
425     return Bytes.toString(value);
426   }
427
428   /**
429    * @return All values.
430    */
431   public Map<Bytes, Bytes> getValues() {
432     // shallow pointer copy
433     return Collections.unmodifiableMap(values);
434   }
435
436   /**
437    * @param key The key.
438    * @param value The value.
439    * @return this (for chained invocation)
440    */
441   public HColumnDescriptor setValue(byte[] key, byte[] value) {
442     if (Bytes.compareTo(Bytes.toBytes(HConstants.VERSIONS), key) == 0) {
443       cachedMaxVersions = UNINITIALIZED;
444     }
445     values.put(new Bytes(key),
446         new Bytes(value));
447     return this;
448   }
449
450   /**
451    * @param key Key whose key and value we're to remove from HCD parameters.
452    */
453   public void remove(final byte [] key) {
454     values.remove(new Bytes(key));
455   }
456
457   /**
458    * @param key The key.
459    * @param value The value.
460    * @return this (for chained invocation)
461    */
462   public HColumnDescriptor setValue(String key, String value) {
463     if (value == null) {
464       remove(Bytes.toBytes(key));
465     } else {
466       setValue(Bytes.toBytes(key), Bytes.toBytes(value));
467     }
468     return this;
469   }
470
471   /**
472    * @return compression type being used for the column family
473    * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0
474    *             (<a href="https://issues.apache.org/jira/browse/HBASE-13655">HBASE-13655</a>).
475    *             Use {@link #getCompressionType()}.
476    */
477   @Deprecated
478   public Compression.Algorithm getCompression() {
479     return getCompressionType();
480   }
481
482   /**
483    *  @return compression type being used for the column family for major compaction
484    *  @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0
485    *             (<a href="https://issues.apache.org/jira/browse/HBASE-13655">HBASE-13655</a>).
486    *             Use {@link #getCompactionCompressionType()}.
487    */
488   @Deprecated
489   public Compression.Algorithm getCompactionCompression() {
490     return getCompactionCompressionType();
491   }
492
493   /** @return maximum number of versions */
494   public int getMaxVersions() {
495     if (this.cachedMaxVersions == UNINITIALIZED) {
496       String v = getValue(HConstants.VERSIONS);
497       this.cachedMaxVersions = Integer.parseInt(v);
498     }
499     return this.cachedMaxVersions;
500   }
501
502   /**
503    * @param maxVersions maximum number of versions
504    * @return this (for chained invocation)
505    */
506   public HColumnDescriptor setMaxVersions(int maxVersions) {
507     if (maxVersions <= 0) {
508       // TODO: Allow maxVersion of 0 to be the way you say "Keep all versions".
509       // Until there is support, consider 0 or < 0 -- a configuration error.
510       throw new IllegalArgumentException("Maximum versions must be positive");
511     }
512     if (maxVersions < this.getMinVersions()) {
513         throw new IllegalArgumentException("Set MaxVersion to " + maxVersions
514             + " while minVersion is " + this.getMinVersions()
515             + ". Maximum versions must be >= minimum versions ");
516     }
517     setValue(HConstants.VERSIONS, Integer.toString(maxVersions));
518     cachedMaxVersions = maxVersions;
519     return this;
520   }
521
522   /**
523    * Set minimum and maximum versions to keep
524    *
525    * @param minVersions minimal number of versions
526    * @param maxVersions maximum number of versions
527    * @return this (for chained invocation)
528    */
529   public HColumnDescriptor setVersions(int minVersions, int maxVersions) {
530     if (minVersions <= 0) {
531       // TODO: Allow minVersion and maxVersion of 0 to be the way you say "Keep all versions".
532       // Until there is support, consider 0 or < 0 -- a configuration error.
533       throw new IllegalArgumentException("Minimum versions must be positive");
534     }
535 
536     if (maxVersions < minVersions) {
537       throw new IllegalArgumentException("Unable to set MaxVersion to " + maxVersions
538         + " and set MinVersion to " + minVersions
539         + ", as maximum versions must be >= minimum versions.");
540     }
541     setMinVersions(minVersions);
542     setMaxVersions(maxVersions);
543     return this;
544   }
545
546   /**
547    * @return The storefile/hfile blocksize for this column family.
548    */
549   public synchronized int getBlocksize() {
550     if (this.blocksize == null) {
551       String value = getValue(BLOCKSIZE);
552       this.blocksize = (value != null)?
553         Integer.decode(value): Integer.valueOf(DEFAULT_BLOCKSIZE);
554     }
555     return this.blocksize.intValue();
556
557   }
558
559   /**
560    * @param s Blocksize to use when writing out storefiles/hfiles on this
561    * column family.
562    * @return this (for chained invocation)
563    */
564   public HColumnDescriptor setBlocksize(int s) {
565     setValue(BLOCKSIZE, Integer.toString(s));
566     this.blocksize = null;
567     return this;
568   }
569
570   /**
571    * @return Compression type setting.
572    */
573   public Compression.Algorithm getCompressionType() {
574     String n = getValue(COMPRESSION);
575     if (n == null) {
576       return Compression.Algorithm.NONE;
577     }
578     return Compression.Algorithm.valueOf(n.toUpperCase());
579   }
580
581   /**
582    * Compression types supported in hbase.
583    * LZO is not bundled as part of the hbase distribution.
584    * See <a href="http://wiki.apache.org/hadoop/UsingLzoCompression">LZO Compression</a>
585    * for how to enable it.
586    * @param type Compression type setting.
587    * @return this (for chained invocation)
588    */
589   public HColumnDescriptor setCompressionType(Compression.Algorithm type) {
590     return setValue(COMPRESSION, type.getName().toUpperCase());
591   }
592
593   /**
594    * @return the data block encoding algorithm used in block cache and
595    *         optionally on disk
596    */
597   public DataBlockEncoding getDataBlockEncoding() {
598     String type = getValue(DATA_BLOCK_ENCODING);
599     if (type == null) {
600       type = DEFAULT_DATA_BLOCK_ENCODING;
601     }
602     return DataBlockEncoding.valueOf(type);
603   }
604
605   /**
606    * Set data block encoding algorithm used in block cache.
607    * @param type What kind of data block encoding will be used.
608    * @return this (for chained invocation)
609    */
610   public HColumnDescriptor setDataBlockEncoding(DataBlockEncoding type) {
611     String name;
612     if (type != null) {
613       name = type.toString();
614     } else {
615       name = DataBlockEncoding.NONE.toString();
616     }
617     return setValue(DATA_BLOCK_ENCODING, name);
618   }
619
620   /**
621    * Set whether the tags should be compressed along with DataBlockEncoding. When no
622    * DataBlockEncoding is been used, this is having no effect.
623    *
624    * @param compressTags
625    * @return this (for chained invocation)
626    */
627   public HColumnDescriptor setCompressTags(boolean compressTags) {
628     return setValue(COMPRESS_TAGS, String.valueOf(compressTags));
629   }
630
631   /**
632    * @return Whether KV tags should be compressed along with DataBlockEncoding. When no
633    *         DataBlockEncoding is been used, this is having no effect.
634    */
635   public boolean isCompressTags() {
636     String compressTagsStr = getValue(COMPRESS_TAGS);
637     boolean compressTags = DEFAULT_COMPRESS_TAGS;
638     if (compressTagsStr != null) {
639       compressTags = Boolean.parseBoolean(compressTagsStr);
640     }
641     return compressTags;
642   }
643
644   /**
645    * @return Compression type setting.
646    */
647   public Compression.Algorithm getCompactionCompressionType() {
648     String n = getValue(COMPRESSION_COMPACT);
649     if (n == null) {
650       return getCompressionType();
651     }
652     return Compression.Algorithm.valueOf(n.toUpperCase());
653   }
654
655   /**
656    * Compression types supported in hbase.
657    * LZO is not bundled as part of the hbase distribution.
658    * See <a href="http://wiki.apache.org/hadoop/UsingLzoCompression">LZO Compression</a>
659    * for how to enable it.
660    * @param type Compression type setting.
661    * @return this (for chained invocation)
662    */
663   public HColumnDescriptor setCompactionCompressionType(
664       Compression.Algorithm type) {
665     return setValue(COMPRESSION_COMPACT, type.getName().toUpperCase());
666   }
667
668   /**
669    * @return True if we are to favor keeping all values for this column family in the
670    * HRegionServer cache.
671    */
672   public boolean isInMemory() {
673     String value = getValue(HConstants.IN_MEMORY);
674     if (value != null) {
675       return Boolean.parseBoolean(value);
676     }
677     return DEFAULT_IN_MEMORY;
678   }
679
680   /**
681    * @param inMemory True if we are to favor keeping all values for this column family in the
682    * HRegionServer cache
683    * @return this (for chained invocation)
684    */
685   public HColumnDescriptor setInMemory(boolean inMemory) {
686     return setValue(HConstants.IN_MEMORY, Boolean.toString(inMemory));
687   }
688
689   /**
690    * @return True if we prefer to keep the in-memory data compacted
691    *          for this column family
692    */
693   public boolean isInMemoryCompaction() {
694     String value = getValue(IN_MEMORY_COMPACTION);
695     if (value != null) {
696       return Boolean.parseBoolean(value);
697     }
698     return DEFAULT_IN_MEMORY_COMPACTION;
699   }
700
701   /**
702    * @param inMemoryCompaction True if we prefer to keep the in-memory data compacted
703    *                  for this column family
704    * @return this (for chained invocation)
705    */
706   public HColumnDescriptor setInMemoryCompaction(boolean inMemoryCompaction) {
707     return setValue(IN_MEMORY_COMPACTION, Boolean.toString(inMemoryCompaction));
708   }
709
710   public KeepDeletedCells getKeepDeletedCells() {
711     String value = getValue(KEEP_DELETED_CELLS);
712     if (value != null) {
713       // toUpperCase for backwards compatibility
714       return KeepDeletedCells.valueOf(value.toUpperCase());
715     }
716     return DEFAULT_KEEP_DELETED;
717   }
718
719   /**
720    * @param keepDeletedCells True if deleted rows should not be collected
721    * immediately.
722    * @return this (for chained invocation)
723    */
724   public HColumnDescriptor setKeepDeletedCells(KeepDeletedCells keepDeletedCells) {
725     return setValue(KEEP_DELETED_CELLS, keepDeletedCells.toString());
726   }
727
728   /**
729    * @return Time-to-live of cell contents, in seconds.
730    */
731   public int getTimeToLive() {
732     String value = getValue(TTL);
733     return (value != null)? Integer.parseInt(value) : DEFAULT_TTL;
734   }
735
736   /**
737    * @param timeToLive Time-to-live of cell contents, in seconds.
738    * @return this (for chained invocation)
739    */
740   public HColumnDescriptor setTimeToLive(int timeToLive) {
741     return setValue(TTL, Integer.toString(timeToLive));
742   }
743
744   /**
745    * @param timeToLive Time to live of cell contents, in human readable format
746    *                   @see org.apache.hadoop.hbase.util.PrettyPrinter#format(String, Unit)
747    * @return this (for chained invocation)
748    */
749   public HColumnDescriptor setTimeToLive(String timeToLive) throws HBaseException {
750     return setValue(TTL, PrettyPrinter.valueOf(timeToLive, Unit.TIME_INTERVAL));
751   }
752
753   /**
754    * @return The minimum number of versions to keep.
755    */
756   public int getMinVersions() {
757     String value = getValue(MIN_VERSIONS);
758     return (value != null)? Integer.parseInt(value) : 0;
759   }
760
761   /**
762    * @param minVersions The minimum number of versions to keep.
763    * (used when timeToLive is set)
764    * @return this (for chained invocation)
765    */
766   public HColumnDescriptor setMinVersions(int minVersions) {
767     return setValue(MIN_VERSIONS, Integer.toString(minVersions));
768   }
769
770   /**
771    * @return True if hfile DATA type blocks should be cached (You cannot disable caching of INDEX
772    * and BLOOM type blocks).
773    */
774   public boolean isBlockCacheEnabled() {
775     String value = getValue(BLOCKCACHE);
776     if (value != null) {
777       return Boolean.parseBoolean(value);
778     }
779     return DEFAULT_BLOCKCACHE;
780   }
781
782   /**
783    * @param blockCacheEnabled True if hfile DATA type blocks should be cached (We always cache
784    * INDEX and BLOOM blocks; you cannot turn this off).
785    * @return this (for chained invocation)
786    */
787   public HColumnDescriptor setBlockCacheEnabled(boolean blockCacheEnabled) {
788     return setValue(BLOCKCACHE, Boolean.toString(blockCacheEnabled));
789   }
790
791   /**
792    * @return bloom filter type used for new StoreFiles in ColumnFamily
793    */
794   public BloomType getBloomFilterType() {
795     String n = getValue(BLOOMFILTER);
796     if (n == null) {
797       n = DEFAULT_BLOOMFILTER;
798     }
799     return BloomType.valueOf(n.toUpperCase());
800   }
801
802   /**
803    * @param bt bloom filter type
804    * @return this (for chained invocation)
805    */
806   public HColumnDescriptor setBloomFilterType(final BloomType bt) {
807     return setValue(BLOOMFILTER, bt.toString());
808   }
809
810    /**
811     * @return the scope tag
812     */
813   public int getScope() {
814     byte[] value = getValue(REPLICATION_SCOPE_BYTES);
815     if (value != null) {
816       return Integer.parseInt(Bytes.toString(value));
817     }
818     return DEFAULT_REPLICATION_SCOPE;
819   }
820 
821  /**
822   * @param scope the scope tag
823   * @return this (for chained invocation)
824   */
825   public HColumnDescriptor setScope(int scope) {
826     return setValue(REPLICATION_SCOPE, Integer.toString(scope));
827   }
828
829   /**
830    * @return true if we should cache data blocks on write
831    */
832   public boolean isCacheDataOnWrite() {
833     return setAndGetBoolean(CACHE_DATA_ON_WRITE, DEFAULT_CACHE_DATA_ON_WRITE);
834   }
835
836   /**
837    * @param value true if we should cache data blocks on write
838    * @return this (for chained invocation)
839    */
840   public HColumnDescriptor setCacheDataOnWrite(boolean value) {
841     return setValue(CACHE_DATA_ON_WRITE, Boolean.toString(value));
842   }
843
844   /**
845    * @return true if we should cache data blocks in the L1 cache (if block cache deploy has more
846    *         than one tier; e.g. we are using CombinedBlockCache).
847    */
848   public boolean isCacheDataInL1() {
849     return setAndGetBoolean(CACHE_DATA_IN_L1, DEFAULT_CACHE_DATA_IN_L1);
850   }
851
852   /**
853    * @param value true if we should cache data blocks in the L1 cache (if block cache deploy
854    * has more than one tier; e.g. we are using CombinedBlockCache).
855    * @return this (for chained invocation)
856    */
857   public HColumnDescriptor setCacheDataInL1(boolean value) {
858     return setValue(CACHE_DATA_IN_L1, Boolean.toString(value));
859   }
860
861   private boolean setAndGetBoolean(final String key, final boolean defaultSetting) {
862     String value = getValue(key);
863     if (value != null) {
864       return Boolean.parseBoolean(value);
865     }
866     return defaultSetting;
867   }
868
869   /**
870    * @return true if we should cache index blocks on write
871    */
872   public boolean isCacheIndexesOnWrite() {
873     return setAndGetBoolean(CACHE_INDEX_ON_WRITE, DEFAULT_CACHE_INDEX_ON_WRITE);
874   }
875
876   /**
877    * @param value true if we should cache index blocks on write
878    * @return this (for chained invocation)
879    */
880   public HColumnDescriptor setCacheIndexesOnWrite(boolean value) {
881     return setValue(CACHE_INDEX_ON_WRITE, Boolean.toString(value));
882   }
883 
884   /**
885    * @return true if we should cache bloomfilter blocks on write
886    */
887   public boolean isCacheBloomsOnWrite() {
888     return setAndGetBoolean(CACHE_BLOOMS_ON_WRITE, DEFAULT_CACHE_BLOOMS_ON_WRITE);
889   }
890 
891   /**
892    * @param value true if we should cache bloomfilter blocks on write
893    * @return this (for chained invocation)
894    */
895   public HColumnDescriptor setCacheBloomsOnWrite(boolean value) {
896     return setValue(CACHE_BLOOMS_ON_WRITE, Boolean.toString(value));
897   }
898 
899   /**
900    * @return true if we should evict cached blocks from the blockcache on close
901    */
902   public boolean isEvictBlocksOnClose() {
903     return setAndGetBoolean(EVICT_BLOCKS_ON_CLOSE, DEFAULT_EVICT_BLOCKS_ON_CLOSE);
904   }
905 
906   /**
907    * @param value true if we should evict cached blocks from the blockcache on
908    * close
909    * @return this (for chained invocation)
910    */
911   public HColumnDescriptor setEvictBlocksOnClose(boolean value) {
912     return setValue(EVICT_BLOCKS_ON_CLOSE, Boolean.toString(value));
913   }
914
915   /**
916    * @return true if we should prefetch blocks into the blockcache on open
917    */
918   public boolean isPrefetchBlocksOnOpen() {
919     return setAndGetBoolean(PREFETCH_BLOCKS_ON_OPEN, DEFAULT_PREFETCH_BLOCKS_ON_OPEN);
920   }
921
922   /**
923    * @param value true if we should prefetch blocks into the blockcache on open
924    * @return this (for chained invocation)
925    */
926   public HColumnDescriptor setPrefetchBlocksOnOpen(boolean value) {
927     return setValue(PREFETCH_BLOCKS_ON_OPEN, Boolean.toString(value));
928   }
929
930   /**
931    * @see java.lang.Object#toString()
932    */
933   @Override
934   public String toString() {
935     StringBuilder s = new StringBuilder();
936
937     s.append('{');
938     s.append(HConstants.NAME);
939     s.append(" => '");
940     s.append(Bytes.toString(name));
941     s.append("'");
942     s.append(getValues(true));
943     s.append('}');
944     return s.toString();
945   }
946
947   /**
948    * @return Column family descriptor with only the customized attributes.
949    */
950   public String toStringCustomizedValues() {
951     StringBuilder s = new StringBuilder();
952     s.append('{');
953     s.append(HConstants.NAME);
954     s.append(" => '");
955     s.append(Bytes.toString(name));
956     s.append("'");
957     s.append(getValues(false));
958     s.append('}');
959     return s.toString();
960   }
961
962   private StringBuilder getValues(boolean printDefaults) {
963     StringBuilder s = new StringBuilder();
964
965     boolean hasConfigKeys = false;
966
967     // print all reserved keys first
968     for (Map.Entry<Bytes, Bytes> entry : values.entrySet()) {
969       if (!RESERVED_KEYWORDS.contains(entry.getKey())) {
970         hasConfigKeys = true;
971         continue;
972       }
973       String key = Bytes.toString(entry.getKey().get());
974       String value = Bytes.toStringBinary(entry.getValue().get());
975       if (printDefaults
976           || !DEFAULT_VALUES.containsKey(key)
977           || !DEFAULT_VALUES.get(key).equalsIgnoreCase(value)) {
978         s.append(", ");
979         s.append(key);
980         s.append(" => ");
981         s.append('\'').append(PrettyPrinter.format(value, getUnit(key))).append('\'');
982       }
983     }
984
985     // print all non-reserved, advanced config keys as a separate subset
986     if (hasConfigKeys) {
987       s.append(", ");
988       s.append(HConstants.METADATA).append(" => ");
989       s.append('{');
990       boolean printComma = false;
991       for (Bytes k : values.keySet()) {
992         if (RESERVED_KEYWORDS.contains(k)) {
993           continue;
994         }
995         String key = Bytes.toString(k.get());
996         String value = Bytes.toStringBinary(values.get(k).get());
997         if (printComma) {
998           s.append(", ");
999         }
1000         printComma = true;
1001         s.append('\'').append(key).append('\'');
1002         s.append(" => ");
1003         s.append('\'').append(PrettyPrinter.format(value, getUnit(key))).append('\'');
1004       }
1005       s.append('}');
1006     }
1007
1008     if (!configuration.isEmpty()) {
1009       s.append(", ");
1010       s.append(HConstants.CONFIGURATION).append(" => ");
1011       s.append('{');
1012       boolean printCommaForConfiguration = false;
1013       for (Map.Entry<String, String> e : configuration.entrySet()) {
1014         if (printCommaForConfiguration) s.append(", ");
1015         printCommaForConfiguration = true;
1016         s.append('\'').append(e.getKey()).append('\'');
1017         s.append(" => ");
1018         s.append('\'').append(PrettyPrinter.format(e.getValue(), getUnit(e.getKey()))).append('\'');
1019       }
1020       s.append("}");
1021     }
1022     return s;
1023   }
1024
1025   public static Unit getUnit(String key) {
1026     Unit unit;
1027       /* TTL for now, we can add more as we neeed */
1028     if (key.equals(HColumnDescriptor.TTL)) {
1029       unit = Unit.TIME_INTERVAL;
1030     } else if (key.equals(HColumnDescriptor.MOB_THRESHOLD)) {
1031       unit = Unit.LONG;
1032     } else if (key.equals(HColumnDescriptor.IS_MOB)) {
1033       unit = Unit.BOOLEAN;
1034     } else {
1035       unit = Unit.NONE;
1036     }
1037     return unit;
1038   }
1039
1040   public static Map<String, String> getDefaultValues() {
1041     return Collections.unmodifiableMap(DEFAULT_VALUES);
1042   }
1043
1044   /**
1045    * @see java.lang.Object#equals(java.lang.Object)
1046    */
1047   @Override
1048   public boolean equals(Object obj) {
1049     if (this == obj) {
1050       return true;
1051     }
1052     if (obj == null) {
1053       return false;
1054     }
1055     if (!(obj instanceof HColumnDescriptor)) {
1056       return false;
1057     }
1058     return compareTo((HColumnDescriptor)obj) == 0;
1059   }
1060
1061   /**
1062    * @see java.lang.Object#hashCode()
1063    */
1064   @Override
1065   public int hashCode() {
1066     int result = Bytes.hashCode(this.name);
1067     result ^= (int) COLUMN_DESCRIPTOR_VERSION;
1068     result ^= values.hashCode();
1069     result ^= configuration.hashCode();
1070     return result;
1071   }
1072 
1073   // Comparable
1074   @Override
1075   public int compareTo(HColumnDescriptor o) {
1076     int result = Bytes.compareTo(this.name, o.getName());
1077     if (result == 0) {
1078       // punt on comparison for ordering, just calculate difference
1079       result = this.values.hashCode() - o.values.hashCode();
1080       if (result < 0)
1081         result = -1;
1082       else if (result > 0)
1083         result = 1;
1084     }
1085     if (result == 0) {
1086       result = this.configuration.hashCode() - o.configuration.hashCode();
1087       if (result < 0)
1088         result = -1;
1089       else if (result > 0)
1090         result = 1;
1091     }
1092     return result;
1093   }
1094
1095   /**
1096    * @return This instance serialized with pb with pb magic prefix
1097    * @see #parseFrom(byte[])
1098    */
1099   public byte[] toByteArray() {
1100     return ProtobufUtil
1101         .prependPBMagic(ProtobufUtil.convertToColumnFamilySchema(this).toByteArray());
1102   }
1103
1104   /**
1105    * @param bytes A pb serialized {@link HColumnDescriptor} instance with pb magic prefix
1106    * @return An instance of {@link HColumnDescriptor} made from <code>bytes</code>
1107    * @throws DeserializationException
1108    * @see #toByteArray()
1109    */
1110   public static HColumnDescriptor parseFrom(final byte [] bytes) throws DeserializationException {
1111     if (!ProtobufUtil.isPBMagicPrefix(bytes)) throw new DeserializationException("No magic");
1112     int pblen = ProtobufUtil.lengthOfPBMagic();
1113     ColumnFamilySchema.Builder builder = ColumnFamilySchema.newBuilder();
1114     ColumnFamilySchema cfs = null;
1115     try {
1116       ProtobufUtil.mergeFrom(builder, bytes, pblen, bytes.length - pblen);
1117       cfs = builder.build();
1118     } catch (IOException e) {
1119       throw new DeserializationException(e);
1120     }
1121     return ProtobufUtil.convertToHColumnDesc(cfs);
1122   }
1123
1124   /**
1125    * Getter for accessing the configuration value by key.
1126    */
1127   public String getConfigurationValue(String key) {
1128     return configuration.get(key);
1129   }
1130
1131   /**
1132    * Getter for fetching an unmodifiable {@link #configuration} map.
1133    */
1134   public Map<String, String> getConfiguration() {
1135     // shallow pointer copy
1136     return Collections.unmodifiableMap(configuration);
1137   }
1138
1139   /**
1140    * Setter for storing a configuration setting in {@link #configuration} map.
1141    * @param key Config key. Same as XML config key e.g. hbase.something.or.other.
1142    * @param value String value. If null, removes the configuration.
1143    */
1144   public HColumnDescriptor setConfiguration(String key, String value) {
1145     if (value == null) {
1146       removeConfiguration(key);
1147     } else {
1148       configuration.put(key, value);
1149     }
1150     return this;
1151   }
1152
1153   /**
1154    * Remove a configuration setting represented by the key from the {@link #configuration} map.
1155    */
1156   public void removeConfiguration(final String key) {
1157     configuration.remove(key);
1158   }
1159
1160   /**
1161    * Return the encryption algorithm in use by this family
1162    */
1163   public String getEncryptionType() {
1164     return getValue(ENCRYPTION);
1165   }
1166
1167   /**
1168    * Set the encryption algorithm for use with this family
1169    * @param algorithm
1170    */
1171   public HColumnDescriptor setEncryptionType(String algorithm) {
1172     setValue(ENCRYPTION, algorithm);
1173     return this;
1174   }
1175
1176   /** Return the raw crypto key attribute for the family, or null if not set  */
1177   public byte[] getEncryptionKey() {
1178     return getValue(Bytes.toBytes(ENCRYPTION_KEY));
1179   }
1180
1181   /** Set the raw crypto key attribute for the family */
1182   public HColumnDescriptor setEncryptionKey(byte[] keyBytes) {
1183     setValue(Bytes.toBytes(ENCRYPTION_KEY), keyBytes);
1184     return this;
1185   }
1186
1187   /**
1188    * Gets the mob threshold of the family.
1189    * If the size of a cell value is larger than this threshold, it's regarded as a mob.
1190    * The default threshold is 1024*100(100K)B.
1191    * @return The mob threshold.
1192    */
1193   public long getMobThreshold() {
1194     byte[] threshold = getValue(MOB_THRESHOLD_BYTES);
1195     return threshold != null && threshold.length == Bytes.SIZEOF_LONG ? Bytes.toLong(threshold)
1196         : DEFAULT_MOB_THRESHOLD;
1197   }
1198
1199   /**
1200    * Sets the mob threshold of the family.
1201    * @param threshold The mob threshold.
1202    * @return this (for chained invocation)
1203    */
1204   public HColumnDescriptor setMobThreshold(long threshold) {
1205     setValue(MOB_THRESHOLD_BYTES, Bytes.toBytes(threshold));
1206     return this;
1207   }
1208
1209   /**
1210    * Gets whether the mob is enabled for the family.
1211    * @return True if the mob is enabled for the family.
1212    */
1213   public boolean isMobEnabled() {
1214     byte[] isMobEnabled = getValue(IS_MOB_BYTES);
1215     return isMobEnabled != null && isMobEnabled.length == Bytes.SIZEOF_BOOLEAN
1216         && Bytes.toBoolean(isMobEnabled);
1217   }
1218
1219   /**
1220    * Enables the mob for the family.
1221    * @param isMobEnabled Whether to enable the mob for the family.
1222    * @return this (for chained invocation)
1223    */
1224   public HColumnDescriptor setMobEnabled(boolean isMobEnabled) {
1225     setValue(IS_MOB_BYTES, Bytes.toBytes(isMobEnabled));
1226     return this;
1227   }
1228
1229   /**
1230    * @return replication factor set for this CF or {@link #DEFAULT_DFS_REPLICATION} if not set.
1231    *         <p>
1232    *         {@link #DEFAULT_DFS_REPLICATION} value indicates that user has explicitly not set any
1233    *         block replication factor for this CF, hence use the default replication factor set in
1234    *         the file system.
1235    */
1236   public short getDFSReplication() {
1237     String rf = getValue(DFS_REPLICATION);
1238     return rf == null ? DEFAULT_DFS_REPLICATION : Short.valueOf(rf);
1239   }
1240
1241   /**
1242    * Set the replication factor to hfile(s) belonging to this family
1243    * @param replication number of replicas the blocks(s) belonging to this CF should have, or
1244    *          {@link #DEFAULT_DFS_REPLICATION} for the default replication factor set in the
1245    *          filesystem
1246    * @return this (for chained invocation)
1247    */
1248   public HColumnDescriptor setDFSReplication(short replication) {
1249     if (replication < 1 && replication != DEFAULT_DFS_REPLICATION) {
1250       throw new IllegalArgumentException(
1251           "DFS replication factor cannot be less than 1 if explictly set.");
1252     }
1253     setValue(DFS_REPLICATION, Short.toString(replication));
1254     return this;
1255   }
1256 }