View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase;
20  
21  import java.io.IOException;
22  import java.util.Collections;
23  import java.util.HashMap;
24  import java.util.HashSet;
25  import java.util.Map;
26  import java.util.Set;
27  
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.classification.InterfaceStability;
30  import org.apache.hadoop.hbase.exceptions.DeserializationException;
31  import org.apache.hadoop.hbase.exceptions.HBaseException;
32  import org.apache.hadoop.hbase.io.compress.Compression;
33  import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
34  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
35  import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ColumnFamilySchema;
36  import org.apache.hadoop.hbase.regionserver.BloomType;
37  import org.apache.hadoop.hbase.util.Bytes;
38  import org.apache.hadoop.hbase.util.PrettyPrinter;
39  import org.apache.hadoop.hbase.util.PrettyPrinter.Unit;
40  
41  import com.google.common.base.Preconditions;
42  
43  /**
44   * An HColumnDescriptor contains information about a column family such as the
45   * number of versions, compression settings, etc.
46   *
47   * It is used as input when creating a table or adding a column.
48   */
49  @InterfaceAudience.Public
50  @InterfaceStability.Evolving
51  public class HColumnDescriptor implements Comparable<HColumnDescriptor> {
52    // For future backward compatibility
53  
54    // Version  3 was when column names become byte arrays and when we picked up
55    // Time-to-live feature.  Version 4 was when we moved to byte arrays, HBASE-82.
56    // Version  5 was when bloom filter descriptors were removed.
57    // Version  6 adds metadata as a map where keys and values are byte[].
58    // Version  7 -- add new compression and hfile blocksize to HColumnDescriptor (HBASE-1217)
59    // Version  8 -- reintroduction of bloom filters, changed from boolean to enum
60    // Version  9 -- add data block encoding
61    // Version 10 -- change metadata to standard type.
62    // Version 11 -- add column family level configuration.
63    private static final byte COLUMN_DESCRIPTOR_VERSION = (byte) 11;
64  
65    // These constants are used as FileInfo keys
66    public static final String COMPRESSION = "COMPRESSION";
67    public static final String COMPRESSION_COMPACT = "COMPRESSION_COMPACT";
68    public static final String ENCODE_ON_DISK = // To be removed, it is not used anymore
69        "ENCODE_ON_DISK";
70    public static final String DATA_BLOCK_ENCODING =
71        "DATA_BLOCK_ENCODING";
72    /**
73     * Key for the BLOCKCACHE attribute.
74     * A more exact name would be CACHE_DATA_ON_READ because this flag sets whether or not we
75     * cache DATA blocks.  We always cache INDEX and BLOOM blocks; caching these blocks cannot be
76     * disabled.
77     */
78    public static final String BLOCKCACHE = "BLOCKCACHE";
79    public static final String CACHE_DATA_ON_WRITE = "CACHE_DATA_ON_WRITE";
80    public static final String CACHE_INDEX_ON_WRITE = "CACHE_INDEX_ON_WRITE";
81    public static final String CACHE_BLOOMS_ON_WRITE = "CACHE_BLOOMS_ON_WRITE";
82    public static final String EVICT_BLOCKS_ON_CLOSE = "EVICT_BLOCKS_ON_CLOSE";
83    /**
84     * Key for cache data into L1 if cache is set up with more than one tier.
85     * To set in the shell, do something like this:
86     * <code>hbase(main):003:0&gt; create 't',
87     *    {NAME =&gt; 't', CONFIGURATION =&gt; {CACHE_DATA_IN_L1 =&gt; 'true'}}</code>
88     */
89    public static final String CACHE_DATA_IN_L1 = "CACHE_DATA_IN_L1";
90  
91    /**
92     * Key for the PREFETCH_BLOCKS_ON_OPEN attribute.
93     * If set, all INDEX, BLOOM, and DATA blocks of HFiles belonging to this
94     * family will be loaded into the cache as soon as the file is opened. These
95     * loads will not count as cache misses.
96     */
97    public static final String PREFETCH_BLOCKS_ON_OPEN = "PREFETCH_BLOCKS_ON_OPEN";
98  
99    /**
100    * Size of storefile/hfile 'blocks'.  Default is {@link #DEFAULT_BLOCKSIZE}.
101    * Use smaller block sizes for faster random-access at expense of larger
102    * indices (more memory consumption).
103    */
104   public static final String BLOCKSIZE = "BLOCKSIZE";
105 
106   public static final String LENGTH = "LENGTH";
107   public static final String TTL = "TTL";
108   public static final String BLOOMFILTER = "BLOOMFILTER";
109   public static final String FOREVER = "FOREVER";
110   public static final String REPLICATION_SCOPE = "REPLICATION_SCOPE";
111   public static final byte[] REPLICATION_SCOPE_BYTES = Bytes.toBytes(REPLICATION_SCOPE);
112   public static final String MIN_VERSIONS = "MIN_VERSIONS";
113   /**
114    * Retain all cells across flushes and compactions even if they fall behind
115    * a delete tombstone. To see all retained cells, do a 'raw' scan; see
116    * Scan#setRaw or pass RAW =&gt; true attribute in the shell.
117    */
118   public static final String KEEP_DELETED_CELLS = "KEEP_DELETED_CELLS";
119   public static final String COMPRESS_TAGS = "COMPRESS_TAGS";
120 
121   public static final String ENCRYPTION = "ENCRYPTION";
122   public static final String ENCRYPTION_KEY = "ENCRYPTION_KEY";
123 
124   public static final String IS_MOB = "IS_MOB";
125   public static final byte[] IS_MOB_BYTES = Bytes.toBytes(IS_MOB);
126   public static final String MOB_THRESHOLD = "MOB_THRESHOLD";
127   public static final byte[] MOB_THRESHOLD_BYTES = Bytes.toBytes(MOB_THRESHOLD);
128   public static final long DEFAULT_MOB_THRESHOLD = 100 * 1024; // 100k
129 
130   public static final String DFS_REPLICATION = "DFS_REPLICATION";
131   public static final short DEFAULT_DFS_REPLICATION = 0;
132 
133   /**
134    * Default compression type.
135    */
136   public static final String DEFAULT_COMPRESSION =
137     Compression.Algorithm.NONE.getName();
138 
139   /**
140    * Default value of the flag that enables data block encoding on disk, as
141    * opposed to encoding in cache only. We encode blocks everywhere by default,
142    * as long as {@link #DATA_BLOCK_ENCODING} is not NONE.
143    */
144   public static final boolean DEFAULT_ENCODE_ON_DISK = true;
145 
146   /** Default data block encoding algorithm. */
147   public static final String DEFAULT_DATA_BLOCK_ENCODING =
148       DataBlockEncoding.NONE.toString();
149 
150   /**
151    * Default number of versions of a record to keep.
152    */
153   public static final int DEFAULT_VERSIONS = HBaseConfiguration.create().getInt(
154     "hbase.column.max.version", 1);
155 
156   /**
157    * Default is not to keep a minimum of versions.
158    */
159   public static final int DEFAULT_MIN_VERSIONS = 0;
160 
161   /*
162    * Cache here the HCD value.
163    * Question: its OK to cache since when we're reenable, we create a new HCD?
164    */
165   private volatile Integer blocksize = null;
166 
167   /**
168    * Default setting for whether to try and serve this column family from memory or not.
169    */
170   public static final boolean DEFAULT_IN_MEMORY = false;
171 
172   /**
173    * Default setting for preventing deleted from being collected immediately.
174    */
175   public static final KeepDeletedCells DEFAULT_KEEP_DELETED = KeepDeletedCells.FALSE;
176 
177   /**
178    * Default setting for whether to use a block cache or not.
179    */
180   public static final boolean DEFAULT_BLOCKCACHE = true;
181 
182   /**
183    * Default setting for whether to cache data blocks on write if block caching
184    * is enabled.
185    */
186   public static final boolean DEFAULT_CACHE_DATA_ON_WRITE = false;
187 
188   /**
189    * Default setting for whether to cache data blocks in L1 tier.  Only makes sense if more than
190    * one tier in operations: i.e. if we have an L1 and a L2.  This will be the cases if we are
191    * using BucketCache.
192    */
193   public static final boolean DEFAULT_CACHE_DATA_IN_L1 = false;
194 
195   /**
196    * Default setting for whether to cache index blocks on write if block
197    * caching is enabled.
198    */
199   public static final boolean DEFAULT_CACHE_INDEX_ON_WRITE = false;
200 
201   /**
202    * Default size of blocks in files stored to the filesytem (hfiles).
203    */
204   public static final int DEFAULT_BLOCKSIZE = HConstants.DEFAULT_BLOCKSIZE;
205 
206   /**
207    * Default setting for whether or not to use bloomfilters.
208    */
209   public static final String DEFAULT_BLOOMFILTER = BloomType.ROW.toString();
210 
211   /**
212    * Default setting for whether to cache bloom filter blocks on write if block
213    * caching is enabled.
214    */
215   public static final boolean DEFAULT_CACHE_BLOOMS_ON_WRITE = false;
216 
217   /**
218    * Default time to live of cell contents.
219    */
220   public static final int DEFAULT_TTL = HConstants.FOREVER;
221 
222   /**
223    * Default scope.
224    */
225   public static final int DEFAULT_REPLICATION_SCOPE = HConstants.REPLICATION_SCOPE_LOCAL;
226 
227   /**
228    * Default setting for whether to evict cached blocks from the blockcache on
229    * close.
230    */
231   public static final boolean DEFAULT_EVICT_BLOCKS_ON_CLOSE = false;
232 
233   /**
234    * Default compress tags along with any type of DataBlockEncoding.
235    */
236   public static final boolean DEFAULT_COMPRESS_TAGS = true;
237 
238   /*
239    * Default setting for whether to prefetch blocks into the blockcache on open.
240    */
241   public static final boolean DEFAULT_PREFETCH_BLOCKS_ON_OPEN = false;
242 
243   private final static Map<String, String> DEFAULT_VALUES
244     = new HashMap<String, String>();
245   private final static Set<Bytes> RESERVED_KEYWORDS
246       = new HashSet<Bytes>();
247 
248   static {
249       DEFAULT_VALUES.put(BLOOMFILTER, DEFAULT_BLOOMFILTER);
250       DEFAULT_VALUES.put(REPLICATION_SCOPE, String.valueOf(DEFAULT_REPLICATION_SCOPE));
251       DEFAULT_VALUES.put(HConstants.VERSIONS, String.valueOf(DEFAULT_VERSIONS));
252       DEFAULT_VALUES.put(MIN_VERSIONS, String.valueOf(DEFAULT_MIN_VERSIONS));
253       DEFAULT_VALUES.put(COMPRESSION, DEFAULT_COMPRESSION);
254       DEFAULT_VALUES.put(TTL, String.valueOf(DEFAULT_TTL));
255       DEFAULT_VALUES.put(BLOCKSIZE, String.valueOf(DEFAULT_BLOCKSIZE));
256       DEFAULT_VALUES.put(HConstants.IN_MEMORY, String.valueOf(DEFAULT_IN_MEMORY));
257       DEFAULT_VALUES.put(BLOCKCACHE, String.valueOf(DEFAULT_BLOCKCACHE));
258       DEFAULT_VALUES.put(KEEP_DELETED_CELLS, String.valueOf(DEFAULT_KEEP_DELETED));
259       DEFAULT_VALUES.put(DATA_BLOCK_ENCODING, String.valueOf(DEFAULT_DATA_BLOCK_ENCODING));
260       DEFAULT_VALUES.put(CACHE_DATA_ON_WRITE, String.valueOf(DEFAULT_CACHE_DATA_ON_WRITE));
261       DEFAULT_VALUES.put(CACHE_DATA_IN_L1, String.valueOf(DEFAULT_CACHE_DATA_IN_L1));
262       DEFAULT_VALUES.put(CACHE_INDEX_ON_WRITE, String.valueOf(DEFAULT_CACHE_INDEX_ON_WRITE));
263       DEFAULT_VALUES.put(CACHE_BLOOMS_ON_WRITE, String.valueOf(DEFAULT_CACHE_BLOOMS_ON_WRITE));
264       DEFAULT_VALUES.put(EVICT_BLOCKS_ON_CLOSE, String.valueOf(DEFAULT_EVICT_BLOCKS_ON_CLOSE));
265       DEFAULT_VALUES.put(PREFETCH_BLOCKS_ON_OPEN, String.valueOf(DEFAULT_PREFETCH_BLOCKS_ON_OPEN));
266       for (String s : DEFAULT_VALUES.keySet()) {
267         RESERVED_KEYWORDS.add(new Bytes(Bytes.toBytes(s)));
268       }
269       RESERVED_KEYWORDS.add(new Bytes(Bytes.toBytes(ENCRYPTION)));
270       RESERVED_KEYWORDS.add(new Bytes(Bytes.toBytes(ENCRYPTION_KEY)));
271       RESERVED_KEYWORDS.add(new Bytes(IS_MOB_BYTES));
272       RESERVED_KEYWORDS.add(new Bytes(MOB_THRESHOLD_BYTES));
273   }
274 
275   private static final int UNINITIALIZED = -1;
276 
277   // Column family name
278   private byte [] name;
279 
280   // Column metadata
281   private final Map<Bytes, Bytes> values =
282       new HashMap<Bytes, Bytes>();
283 
284   /**
285    * A map which holds the configuration specific to the column family.
286    * The keys of the map have the same names as config keys and override the defaults with
287    * cf-specific settings. Example usage may be for compactions, etc.
288    */
289   private final Map<String, String> configuration = new HashMap<String, String>();
290 
291   /*
292    * Cache the max versions rather than calculate it every time.
293    */
294   private int cachedMaxVersions = UNINITIALIZED;
295 
296   /**
297    * Construct a column descriptor specifying only the family name
298    * The other attributes are defaulted.
299    *
300    * @param familyName Column family name. Must be 'printable' -- digit or
301    * letter -- and may not contain a <code>:</code>
302    */
303   public HColumnDescriptor(final String familyName) {
304     this(Bytes.toBytes(familyName));
305   }
306 
307   /**
308    * Construct a column descriptor specifying only the family name
309    * The other attributes are defaulted.
310    *
311    * @param familyName Column family name. Must be 'printable' -- digit or
312    * letter -- and may not contain a <code>:</code>
313    */
314   public HColumnDescriptor(final byte [] familyName) {
315     isLegalFamilyName(familyName);
316     this.name = familyName;
317 
318     setMaxVersions(DEFAULT_VERSIONS);
319     setMinVersions(DEFAULT_MIN_VERSIONS);
320     setKeepDeletedCells(DEFAULT_KEEP_DELETED);
321     setInMemory(DEFAULT_IN_MEMORY);
322     setBlockCacheEnabled(DEFAULT_BLOCKCACHE);
323     setTimeToLive(DEFAULT_TTL);
324     setCompressionType(Compression.Algorithm.valueOf(DEFAULT_COMPRESSION.toUpperCase()));
325     setDataBlockEncoding(DataBlockEncoding.valueOf(DEFAULT_DATA_BLOCK_ENCODING.toUpperCase()));
326     setBloomFilterType(BloomType.valueOf(DEFAULT_BLOOMFILTER.toUpperCase()));
327     setBlocksize(DEFAULT_BLOCKSIZE);
328     setScope(DEFAULT_REPLICATION_SCOPE);
329   }
330 
331   /**
332    * Constructor.
333    * Makes a deep copy of the supplied descriptor.
334    * Can make a modifiable descriptor from an UnmodifyableHColumnDescriptor.
335    * @param desc The descriptor.
336    */
337   public HColumnDescriptor(HColumnDescriptor desc) {
338     super();
339     this.name = desc.name.clone();
340     for (Map.Entry<Bytes, Bytes> e :
341         desc.values.entrySet()) {
342       this.values.put(e.getKey(), e.getValue());
343     }
344     for (Map.Entry<String, String> e : desc.configuration.entrySet()) {
345       this.configuration.put(e.getKey(), e.getValue());
346     }
347     setMaxVersions(desc.getMaxVersions());
348   }
349 
350   /**
351    * @param b Family name.
352    * @return <code>b</code>
353    * @throws IllegalArgumentException If not null and not a legitimate family
354    * name: i.e. 'printable' and ends in a ':' (Null passes are allowed because
355    * <code>b</code> can be null when deserializing).  Cannot start with a '.'
356    * either. Also Family can not be an empty value or equal "recovered.edits".
357    */
358   public static byte [] isLegalFamilyName(final byte [] b) {
359     if (b == null) {
360       return b;
361     }
362     Preconditions.checkArgument(b.length != 0, "Family name can not be empty");
363     if (b[0] == '.') {
364       throw new IllegalArgumentException("Family names cannot start with a " +
365         "period: " + Bytes.toString(b));
366     }
367     for (int i = 0; i < b.length; i++) {
368       if (Character.isISOControl(b[i]) || b[i] == ':' || b[i] == '\\' || b[i] == '/') {
369         throw new IllegalArgumentException("Illegal character <" + b[i] +
370           ">. Family names cannot contain control characters or colons: " +
371           Bytes.toString(b));
372       }
373     }
374     byte[] recoveredEdit = Bytes.toBytes(HConstants.RECOVERED_EDITS_DIR);
375     if (Bytes.equals(recoveredEdit, b)) {
376       throw new IllegalArgumentException("Family name cannot be: " +
377           HConstants.RECOVERED_EDITS_DIR);
378     }
379     return b;
380   }
381 
382   /**
383    * @return Name of this column family
384    */
385   public byte [] getName() {
386     return name;
387   }
388 
389   /**
390    * @return Name of this column family
391    */
392   public String getNameAsString() {
393     return Bytes.toString(this.name);
394   }
395 
396   /**
397    * @param key The key.
398    * @return The value.
399    */
400   public byte[] getValue(byte[] key) {
401     Bytes ibw = values.get(new Bytes(key));
402     if (ibw == null)
403       return null;
404     return ibw.get();
405   }
406 
407   /**
408    * @param key The key.
409    * @return The value as a string.
410    */
411   public String getValue(String key) {
412     byte[] value = getValue(Bytes.toBytes(key));
413     if (value == null)
414       return null;
415     return Bytes.toString(value);
416   }
417 
418   /**
419    * @return All values.
420    */
421   public Map<Bytes, Bytes> getValues() {
422     // shallow pointer copy
423     return Collections.unmodifiableMap(values);
424   }
425 
426   /**
427    * @param key The key.
428    * @param value The value.
429    * @return this (for chained invocation)
430    */
431   public HColumnDescriptor setValue(byte[] key, byte[] value) {
432     if (Bytes.compareTo(Bytes.toBytes(HConstants.VERSIONS), key) == 0) {
433       cachedMaxVersions = UNINITIALIZED;
434     }
435     values.put(new Bytes(key),
436         new Bytes(value));
437     return this;
438   }
439 
440   /**
441    * @param key Key whose key and value we're to remove from HCD parameters.
442    */
443   public void remove(final byte [] key) {
444     values.remove(new Bytes(key));
445   }
446 
447   /**
448    * @param key The key.
449    * @param value The value.
450    * @return this (for chained invocation)
451    */
452   public HColumnDescriptor setValue(String key, String value) {
453     if (value == null) {
454       remove(Bytes.toBytes(key));
455     } else {
456       setValue(Bytes.toBytes(key), Bytes.toBytes(value));
457     }
458     return this;
459   }
460 
461   /**
462    * @return compression type being used for the column family
463    * @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0
464    *             (<a href="https://issues.apache.org/jira/browse/HBASE-13655">HBASE-13655</a>).
465    *             Use {@link #getCompressionType()}.
466    */
467   @Deprecated
468   public Compression.Algorithm getCompression() {
469     return getCompressionType();
470   }
471 
472   /**
473    *  @return compression type being used for the column family for major compaction
474    *  @deprecated As of release 2.0.0, this will be removed in HBase 3.0.0
475    *             (<a href="https://issues.apache.org/jira/browse/HBASE-13655">HBASE-13655</a>).
476    *             Use {@link #getCompactionCompressionType()}.
477    */
478   @Deprecated
479   public Compression.Algorithm getCompactionCompression() {
480     return getCompactionCompressionType();
481   }
482 
483   /** @return maximum number of versions */
484   public int getMaxVersions() {
485     if (this.cachedMaxVersions == UNINITIALIZED) {
486       String v = getValue(HConstants.VERSIONS);
487       this.cachedMaxVersions = Integer.parseInt(v);
488     }
489     return this.cachedMaxVersions;
490   }
491 
492   /**
493    * @param maxVersions maximum number of versions
494    * @return this (for chained invocation)
495    */
496   public HColumnDescriptor setMaxVersions(int maxVersions) {
497     if (maxVersions <= 0) {
498       // TODO: Allow maxVersion of 0 to be the way you say "Keep all versions".
499       // Until there is support, consider 0 or < 0 -- a configuration error.
500       throw new IllegalArgumentException("Maximum versions must be positive");
501     }
502     if (maxVersions < this.getMinVersions()) {
503         throw new IllegalArgumentException("Set MaxVersion to " + maxVersions
504             + " while minVersion is " + this.getMinVersions()
505             + ". Maximum versions must be >= minimum versions ");
506     }
507     setValue(HConstants.VERSIONS, Integer.toString(maxVersions));
508     cachedMaxVersions = maxVersions;
509     return this;
510   }
511 
512   /**
513    * Set minimum and maximum versions to keep
514    *
515    * @param minVersions minimal number of versions
516    * @param maxVersions maximum number of versions
517    * @return this (for chained invocation)
518    */
519   public HColumnDescriptor setVersions(int minVersions, int maxVersions) {
520     if (minVersions <= 0) {
521       // TODO: Allow minVersion and maxVersion of 0 to be the way you say "Keep all versions".
522       // Until there is support, consider 0 or < 0 -- a configuration error.
523       throw new IllegalArgumentException("Minimum versions must be positive");
524     }
525 
526     if (maxVersions < minVersions) {
527       throw new IllegalArgumentException("Unable to set MaxVersion to " + maxVersions
528         + " and set MinVersion to " + minVersions
529         + ", as maximum versions must be >= minimum versions.");
530     }
531     setMinVersions(minVersions);
532     setMaxVersions(maxVersions);
533     return this;
534   }
535 
536   /**
537    * @return The storefile/hfile blocksize for this column family.
538    */
539   public synchronized int getBlocksize() {
540     if (this.blocksize == null) {
541       String value = getValue(BLOCKSIZE);
542       this.blocksize = (value != null)?
543         Integer.decode(value): Integer.valueOf(DEFAULT_BLOCKSIZE);
544     }
545     return this.blocksize.intValue();
546 
547   }
548 
549   /**
550    * @param s Blocksize to use when writing out storefiles/hfiles on this
551    * column family.
552    * @return this (for chained invocation)
553    */
554   public HColumnDescriptor setBlocksize(int s) {
555     setValue(BLOCKSIZE, Integer.toString(s));
556     this.blocksize = null;
557     return this;
558   }
559 
560   /**
561    * @return Compression type setting.
562    */
563   public Compression.Algorithm getCompressionType() {
564     String n = getValue(COMPRESSION);
565     if (n == null) {
566       return Compression.Algorithm.NONE;
567     }
568     return Compression.Algorithm.valueOf(n.toUpperCase());
569   }
570 
571   /**
572    * Compression types supported in hbase.
573    * LZO is not bundled as part of the hbase distribution.
574    * See <a href="http://wiki.apache.org/hadoop/UsingLzoCompression">LZO Compression</a>
575    * for how to enable it.
576    * @param type Compression type setting.
577    * @return this (for chained invocation)
578    */
579   public HColumnDescriptor setCompressionType(Compression.Algorithm type) {
580     return setValue(COMPRESSION, type.getName().toUpperCase());
581   }
582 
583   /**
584    * @return the data block encoding algorithm used in block cache and
585    *         optionally on disk
586    */
587   public DataBlockEncoding getDataBlockEncoding() {
588     String type = getValue(DATA_BLOCK_ENCODING);
589     if (type == null) {
590       type = DEFAULT_DATA_BLOCK_ENCODING;
591     }
592     return DataBlockEncoding.valueOf(type);
593   }
594 
595   /**
596    * Set data block encoding algorithm used in block cache.
597    * @param type What kind of data block encoding will be used.
598    * @return this (for chained invocation)
599    */
600   public HColumnDescriptor setDataBlockEncoding(DataBlockEncoding type) {
601     String name;
602     if (type != null) {
603       name = type.toString();
604     } else {
605       name = DataBlockEncoding.NONE.toString();
606     }
607     return setValue(DATA_BLOCK_ENCODING, name);
608   }
609 
610   /**
611    * Set whether the tags should be compressed along with DataBlockEncoding. When no
612    * DataBlockEncoding is been used, this is having no effect.
613    *
614    * @param compressTags
615    * @return this (for chained invocation)
616    */
617   public HColumnDescriptor setCompressTags(boolean compressTags) {
618     return setValue(COMPRESS_TAGS, String.valueOf(compressTags));
619   }
620 
621   /**
622    * @return Whether KV tags should be compressed along with DataBlockEncoding. When no
623    *         DataBlockEncoding is been used, this is having no effect.
624    */
625   public boolean isCompressTags() {
626     String compressTagsStr = getValue(COMPRESS_TAGS);
627     boolean compressTags = DEFAULT_COMPRESS_TAGS;
628     if (compressTagsStr != null) {
629       compressTags = Boolean.parseBoolean(compressTagsStr);
630     }
631     return compressTags;
632   }
633 
634   /**
635    * @return Compression type setting.
636    */
637   public Compression.Algorithm getCompactionCompressionType() {
638     String n = getValue(COMPRESSION_COMPACT);
639     if (n == null) {
640       return getCompressionType();
641     }
642     return Compression.Algorithm.valueOf(n.toUpperCase());
643   }
644 
645   /**
646    * Compression types supported in hbase.
647    * LZO is not bundled as part of the hbase distribution.
648    * See <a href="http://wiki.apache.org/hadoop/UsingLzoCompression">LZO Compression</a>
649    * for how to enable it.
650    * @param type Compression type setting.
651    * @return this (for chained invocation)
652    */
653   public HColumnDescriptor setCompactionCompressionType(
654       Compression.Algorithm type) {
655     return setValue(COMPRESSION_COMPACT, type.getName().toUpperCase());
656   }
657 
658   /**
659    * @return True if we are to favor keeping all values for this column family in the
660    * HRegionServer cache.
661    */
662   public boolean isInMemory() {
663     String value = getValue(HConstants.IN_MEMORY);
664     if (value != null) {
665       return Boolean.parseBoolean(value);
666     }
667     return DEFAULT_IN_MEMORY;
668   }
669 
670   /**
671    * @param inMemory True if we are to favor keeping all values for this column family in the
672    * HRegionServer cache
673    * @return this (for chained invocation)
674    */
675   public HColumnDescriptor setInMemory(boolean inMemory) {
676     return setValue(HConstants.IN_MEMORY, Boolean.toString(inMemory));
677   }
678 
679   public KeepDeletedCells getKeepDeletedCells() {
680     String value = getValue(KEEP_DELETED_CELLS);
681     if (value != null) {
682       // toUpperCase for backwards compatibility
683       return KeepDeletedCells.valueOf(value.toUpperCase());
684     }
685     return DEFAULT_KEEP_DELETED;
686   }
687 
688   /**
689    * @param keepDeletedCells True if deleted rows should not be collected
690    * immediately.
691    * @return this (for chained invocation)
692    */
693   public HColumnDescriptor setKeepDeletedCells(KeepDeletedCells keepDeletedCells) {
694     return setValue(KEEP_DELETED_CELLS, keepDeletedCells.toString());
695   }
696 
697   /**
698    * @return Time-to-live of cell contents, in seconds.
699    */
700   public int getTimeToLive() {
701     String value = getValue(TTL);
702     return (value != null)? Integer.parseInt(value) : DEFAULT_TTL;
703   }
704 
705   /**
706    * @param timeToLive Time-to-live of cell contents, in seconds.
707    * @return this (for chained invocation)
708    */
709   public HColumnDescriptor setTimeToLive(int timeToLive) {
710     return setValue(TTL, Integer.toString(timeToLive));
711   }
712 
713   /**
714    * @param timeToLive Time to live of cell contents, in human readable format
715    *                   @see org.apache.hadoop.hbase.util.PrettyPrinter#format(String, Unit)
716    * @return this (for chained invocation)
717    */
718   public HColumnDescriptor setTimeToLive(String timeToLive) throws HBaseException {
719     return setValue(TTL, PrettyPrinter.valueOf(timeToLive, Unit.TIME_INTERVAL));
720   }
721 
722   /**
723    * @return The minimum number of versions to keep.
724    */
725   public int getMinVersions() {
726     String value = getValue(MIN_VERSIONS);
727     return (value != null)? Integer.parseInt(value) : 0;
728   }
729 
730   /**
731    * @param minVersions The minimum number of versions to keep.
732    * (used when timeToLive is set)
733    * @return this (for chained invocation)
734    */
735   public HColumnDescriptor setMinVersions(int minVersions) {
736     return setValue(MIN_VERSIONS, Integer.toString(minVersions));
737   }
738 
739   /**
740    * @return True if hfile DATA type blocks should be cached (You cannot disable caching of INDEX
741    * and BLOOM type blocks).
742    */
743   public boolean isBlockCacheEnabled() {
744     String value = getValue(BLOCKCACHE);
745     if (value != null) {
746       return Boolean.parseBoolean(value);
747     }
748     return DEFAULT_BLOCKCACHE;
749   }
750 
751   /**
752    * @param blockCacheEnabled True if hfile DATA type blocks should be cached (We always cache
753    * INDEX and BLOOM blocks; you cannot turn this off).
754    * @return this (for chained invocation)
755    */
756   public HColumnDescriptor setBlockCacheEnabled(boolean blockCacheEnabled) {
757     return setValue(BLOCKCACHE, Boolean.toString(blockCacheEnabled));
758   }
759 
760   /**
761    * @return bloom filter type used for new StoreFiles in ColumnFamily
762    */
763   public BloomType getBloomFilterType() {
764     String n = getValue(BLOOMFILTER);
765     if (n == null) {
766       n = DEFAULT_BLOOMFILTER;
767     }
768     return BloomType.valueOf(n.toUpperCase());
769   }
770 
771   /**
772    * @param bt bloom filter type
773    * @return this (for chained invocation)
774    */
775   public HColumnDescriptor setBloomFilterType(final BloomType bt) {
776     return setValue(BLOOMFILTER, bt.toString());
777   }
778 
779    /**
780     * @return the scope tag
781     */
782   public int getScope() {
783     byte[] value = getValue(REPLICATION_SCOPE_BYTES);
784     if (value != null) {
785       return Integer.parseInt(Bytes.toString(value));
786     }
787     return DEFAULT_REPLICATION_SCOPE;
788   }
789 
790  /**
791   * @param scope the scope tag
792   * @return this (for chained invocation)
793   */
794   public HColumnDescriptor setScope(int scope) {
795     return setValue(REPLICATION_SCOPE, Integer.toString(scope));
796   }
797 
798   /**
799    * @return true if we should cache data blocks on write
800    */
801   public boolean isCacheDataOnWrite() {
802     return setAndGetBoolean(CACHE_DATA_ON_WRITE, DEFAULT_CACHE_DATA_ON_WRITE);
803   }
804 
805   /**
806    * @param value true if we should cache data blocks on write
807    * @return this (for chained invocation)
808    */
809   public HColumnDescriptor setCacheDataOnWrite(boolean value) {
810     return setValue(CACHE_DATA_ON_WRITE, Boolean.toString(value));
811   }
812 
813   /**
814    * @return true if we should cache data blocks in the L1 cache (if block cache deploy has more
815    *         than one tier; e.g. we are using CombinedBlockCache).
816    */
817   public boolean isCacheDataInL1() {
818     return setAndGetBoolean(CACHE_DATA_IN_L1, DEFAULT_CACHE_DATA_IN_L1);
819   }
820 
821   /**
822    * @param value true if we should cache data blocks in the L1 cache (if block cache deploy
823    * has more than one tier; e.g. we are using CombinedBlockCache).
824    * @return this (for chained invocation)
825    */
826   public HColumnDescriptor setCacheDataInL1(boolean value) {
827     return setValue(CACHE_DATA_IN_L1, Boolean.toString(value));
828   }
829 
830   private boolean setAndGetBoolean(final String key, final boolean defaultSetting) {
831     String value = getValue(key);
832     if (value != null) {
833       return Boolean.parseBoolean(value);
834     }
835     return defaultSetting;
836   }
837 
838   /**
839    * @return true if we should cache index blocks on write
840    */
841   public boolean isCacheIndexesOnWrite() {
842     return setAndGetBoolean(CACHE_INDEX_ON_WRITE, DEFAULT_CACHE_INDEX_ON_WRITE);
843   }
844 
845   /**
846    * @param value true if we should cache index blocks on write
847    * @return this (for chained invocation)
848    */
849   public HColumnDescriptor setCacheIndexesOnWrite(boolean value) {
850     return setValue(CACHE_INDEX_ON_WRITE, Boolean.toString(value));
851   }
852 
853   /**
854    * @return true if we should cache bloomfilter blocks on write
855    */
856   public boolean isCacheBloomsOnWrite() {
857     return setAndGetBoolean(CACHE_BLOOMS_ON_WRITE, DEFAULT_CACHE_BLOOMS_ON_WRITE);
858   }
859 
860   /**
861    * @param value true if we should cache bloomfilter blocks on write
862    * @return this (for chained invocation)
863    */
864   public HColumnDescriptor setCacheBloomsOnWrite(boolean value) {
865     return setValue(CACHE_BLOOMS_ON_WRITE, Boolean.toString(value));
866   }
867 
868   /**
869    * @return true if we should evict cached blocks from the blockcache on close
870    */
871   public boolean isEvictBlocksOnClose() {
872     return setAndGetBoolean(EVICT_BLOCKS_ON_CLOSE, DEFAULT_EVICT_BLOCKS_ON_CLOSE);
873   }
874 
875   /**
876    * @param value true if we should evict cached blocks from the blockcache on
877    * close
878    * @return this (for chained invocation)
879    */
880   public HColumnDescriptor setEvictBlocksOnClose(boolean value) {
881     return setValue(EVICT_BLOCKS_ON_CLOSE, Boolean.toString(value));
882   }
883 
884   /**
885    * @return true if we should prefetch blocks into the blockcache on open
886    */
887   public boolean isPrefetchBlocksOnOpen() {
888     return setAndGetBoolean(PREFETCH_BLOCKS_ON_OPEN, DEFAULT_PREFETCH_BLOCKS_ON_OPEN);
889   }
890 
891   /**
892    * @param value true if we should prefetch blocks into the blockcache on open
893    * @return this (for chained invocation)
894    */
895   public HColumnDescriptor setPrefetchBlocksOnOpen(boolean value) {
896     return setValue(PREFETCH_BLOCKS_ON_OPEN, Boolean.toString(value));
897   }
898 
899   /**
900    * @see java.lang.Object#toString()
901    */
902   @Override
903   public String toString() {
904     StringBuilder s = new StringBuilder();
905 
906     s.append('{');
907     s.append(HConstants.NAME);
908     s.append(" => '");
909     s.append(Bytes.toString(name));
910     s.append("'");
911     s.append(getValues(true));
912     s.append('}');
913     return s.toString();
914   }
915 
916   /**
917    * @return Column family descriptor with only the customized attributes.
918    */
919   public String toStringCustomizedValues() {
920     StringBuilder s = new StringBuilder();
921     s.append('{');
922     s.append(HConstants.NAME);
923     s.append(" => '");
924     s.append(Bytes.toString(name));
925     s.append("'");
926     s.append(getValues(false));
927     s.append('}');
928     return s.toString();
929   }
930 
931   private StringBuilder getValues(boolean printDefaults) {
932     StringBuilder s = new StringBuilder();
933 
934     boolean hasConfigKeys = false;
935 
936     // print all reserved keys first
937     for (Map.Entry<Bytes, Bytes> entry : values.entrySet()) {
938       if (!RESERVED_KEYWORDS.contains(entry.getKey())) {
939         hasConfigKeys = true;
940         continue;
941       }
942       String key = Bytes.toString(entry.getKey().get());
943       String value = Bytes.toStringBinary(entry.getValue().get());
944       if (printDefaults
945           || !DEFAULT_VALUES.containsKey(key)
946           || !DEFAULT_VALUES.get(key).equalsIgnoreCase(value)) {
947         s.append(", ");
948         s.append(key);
949         s.append(" => ");
950         s.append('\'').append(PrettyPrinter.format(value, getUnit(key))).append('\'');
951       }
952     }
953 
954     // print all non-reserved, advanced config keys as a separate subset
955     if (hasConfigKeys) {
956       s.append(", ");
957       s.append(HConstants.METADATA).append(" => ");
958       s.append('{');
959       boolean printComma = false;
960       for (Bytes k : values.keySet()) {
961         if (RESERVED_KEYWORDS.contains(k)) {
962           continue;
963         }
964         String key = Bytes.toString(k.get());
965         String value = Bytes.toStringBinary(values.get(k).get());
966         if (printComma) {
967           s.append(", ");
968         }
969         printComma = true;
970         s.append('\'').append(key).append('\'');
971         s.append(" => ");
972         s.append('\'').append(PrettyPrinter.format(value, getUnit(key))).append('\'');
973       }
974       s.append('}');
975     }
976 
977     if (!configuration.isEmpty()) {
978       s.append(", ");
979       s.append(HConstants.CONFIGURATION).append(" => ");
980       s.append('{');
981       boolean printCommaForConfiguration = false;
982       for (Map.Entry<String, String> e : configuration.entrySet()) {
983         if (printCommaForConfiguration) s.append(", ");
984         printCommaForConfiguration = true;
985         s.append('\'').append(e.getKey()).append('\'');
986         s.append(" => ");
987         s.append('\'').append(PrettyPrinter.format(e.getValue(), getUnit(e.getKey()))).append('\'');
988       }
989       s.append("}");
990     }
991     return s;
992   }
993 
994   public static Unit getUnit(String key) {
995     Unit unit;
996       /* TTL for now, we can add more as we neeed */
997     if (key.equals(HColumnDescriptor.TTL)) {
998       unit = Unit.TIME_INTERVAL;
999     } else if (key.equals(HColumnDescriptor.MOB_THRESHOLD)) {
1000       unit = Unit.LONG;
1001     } else if (key.equals(HColumnDescriptor.IS_MOB)) {
1002       unit = Unit.BOOLEAN;
1003     } else {
1004       unit = Unit.NONE;
1005     }
1006     return unit;
1007   }
1008 
1009   public static Map<String, String> getDefaultValues() {
1010     return Collections.unmodifiableMap(DEFAULT_VALUES);
1011   }
1012 
1013   /**
1014    * @see java.lang.Object#equals(java.lang.Object)
1015    */
1016   @Override
1017   public boolean equals(Object obj) {
1018     if (this == obj) {
1019       return true;
1020     }
1021     if (obj == null) {
1022       return false;
1023     }
1024     if (!(obj instanceof HColumnDescriptor)) {
1025       return false;
1026     }
1027     return compareTo((HColumnDescriptor)obj) == 0;
1028   }
1029 
1030   /**
1031    * @see java.lang.Object#hashCode()
1032    */
1033   @Override
1034   public int hashCode() {
1035     int result = Bytes.hashCode(this.name);
1036     result ^= (int) COLUMN_DESCRIPTOR_VERSION;
1037     result ^= values.hashCode();
1038     result ^= configuration.hashCode();
1039     return result;
1040   }
1041 
1042   // Comparable
1043   @Override
1044   public int compareTo(HColumnDescriptor o) {
1045     int result = Bytes.compareTo(this.name, o.getName());
1046     if (result == 0) {
1047       // punt on comparison for ordering, just calculate difference
1048       result = this.values.hashCode() - o.values.hashCode();
1049       if (result < 0)
1050         result = -1;
1051       else if (result > 0)
1052         result = 1;
1053     }
1054     if (result == 0) {
1055       result = this.configuration.hashCode() - o.configuration.hashCode();
1056       if (result < 0)
1057         result = -1;
1058       else if (result > 0)
1059         result = 1;
1060     }
1061     return result;
1062   }
1063 
1064   /**
1065    * @return This instance serialized with pb with pb magic prefix
1066    * @see #parseFrom(byte[])
1067    */
1068   public byte[] toByteArray() {
1069     return ProtobufUtil
1070         .prependPBMagic(ProtobufUtil.convertToColumnFamilySchema(this).toByteArray());
1071   }
1072 
1073   /**
1074    * @param bytes A pb serialized {@link HColumnDescriptor} instance with pb magic prefix
1075    * @return An instance of {@link HColumnDescriptor} made from <code>bytes</code>
1076    * @throws DeserializationException
1077    * @see #toByteArray()
1078    */
1079   public static HColumnDescriptor parseFrom(final byte [] bytes) throws DeserializationException {
1080     if (!ProtobufUtil.isPBMagicPrefix(bytes)) throw new DeserializationException("No magic");
1081     int pblen = ProtobufUtil.lengthOfPBMagic();
1082     ColumnFamilySchema.Builder builder = ColumnFamilySchema.newBuilder();
1083     ColumnFamilySchema cfs = null;
1084     try {
1085       ProtobufUtil.mergeFrom(builder, bytes, pblen, bytes.length - pblen);
1086       cfs = builder.build();
1087     } catch (IOException e) {
1088       throw new DeserializationException(e);
1089     }
1090     return ProtobufUtil.convertToHColumnDesc(cfs);
1091   }
1092 
1093   /**
1094    * Getter for accessing the configuration value by key.
1095    */
1096   public String getConfigurationValue(String key) {
1097     return configuration.get(key);
1098   }
1099 
1100   /**
1101    * Getter for fetching an unmodifiable {@link #configuration} map.
1102    */
1103   public Map<String, String> getConfiguration() {
1104     // shallow pointer copy
1105     return Collections.unmodifiableMap(configuration);
1106   }
1107 
1108   /**
1109    * Setter for storing a configuration setting in {@link #configuration} map.
1110    * @param key Config key. Same as XML config key e.g. hbase.something.or.other.
1111    * @param value String value. If null, removes the configuration.
1112    */
1113   public HColumnDescriptor setConfiguration(String key, String value) {
1114     if (value == null) {
1115       removeConfiguration(key);
1116     } else {
1117       configuration.put(key, value);
1118     }
1119     return this;
1120   }
1121 
1122   /**
1123    * Remove a configuration setting represented by the key from the {@link #configuration} map.
1124    */
1125   public void removeConfiguration(final String key) {
1126     configuration.remove(key);
1127   }
1128 
1129   /**
1130    * Return the encryption algorithm in use by this family
1131    */
1132   public String getEncryptionType() {
1133     return getValue(ENCRYPTION);
1134   }
1135 
1136   /**
1137    * Set the encryption algorithm for use with this family
1138    * @param algorithm
1139    */
1140   public HColumnDescriptor setEncryptionType(String algorithm) {
1141     setValue(ENCRYPTION, algorithm);
1142     return this;
1143   }
1144 
1145   /** Return the raw crypto key attribute for the family, or null if not set  */
1146   public byte[] getEncryptionKey() {
1147     return getValue(Bytes.toBytes(ENCRYPTION_KEY));
1148   }
1149 
1150   /** Set the raw crypto key attribute for the family */
1151   public HColumnDescriptor setEncryptionKey(byte[] keyBytes) {
1152     setValue(Bytes.toBytes(ENCRYPTION_KEY), keyBytes);
1153     return this;
1154   }
1155 
1156   /**
1157    * Gets the mob threshold of the family.
1158    * If the size of a cell value is larger than this threshold, it's regarded as a mob.
1159    * The default threshold is 1024*100(100K)B.
1160    * @return The mob threshold.
1161    */
1162   public long getMobThreshold() {
1163     byte[] threshold = getValue(MOB_THRESHOLD_BYTES);
1164     return threshold != null && threshold.length == Bytes.SIZEOF_LONG ? Bytes.toLong(threshold)
1165         : DEFAULT_MOB_THRESHOLD;
1166   }
1167 
1168   /**
1169    * Sets the mob threshold of the family.
1170    * @param threshold The mob threshold.
1171    * @return this (for chained invocation)
1172    */
1173   public HColumnDescriptor setMobThreshold(long threshold) {
1174     setValue(MOB_THRESHOLD_BYTES, Bytes.toBytes(threshold));
1175     return this;
1176   }
1177 
1178   /**
1179    * Gets whether the mob is enabled for the family.
1180    * @return True if the mob is enabled for the family.
1181    */
1182   public boolean isMobEnabled() {
1183     byte[] isMobEnabled = getValue(IS_MOB_BYTES);
1184     return isMobEnabled != null && isMobEnabled.length == Bytes.SIZEOF_BOOLEAN
1185         && Bytes.toBoolean(isMobEnabled);
1186   }
1187 
1188   /**
1189    * Enables the mob for the family.
1190    * @param isMobEnabled Whether to enable the mob for the family.
1191    * @return this (for chained invocation)
1192    */
1193   public HColumnDescriptor setMobEnabled(boolean isMobEnabled) {
1194     setValue(IS_MOB_BYTES, Bytes.toBytes(isMobEnabled));
1195     return this;
1196   }
1197 
1198   /**
1199    * @return replication factor set for this CF or {@link #DEFAULT_DFS_REPLICATION} if not set.
1200    *         <p>
1201    *         {@link #DEFAULT_DFS_REPLICATION} value indicates that user has explicitly not set any
1202    *         block replication factor for this CF, hence use the default replication factor set in
1203    *         the file system.
1204    */
1205   public short getDFSReplication() {
1206     String rf = getValue(DFS_REPLICATION);
1207     return rf == null ? DEFAULT_DFS_REPLICATION : Short.valueOf(rf);
1208   }
1209 
1210   /**
1211    * Set the replication factor to hfile(s) belonging to this family
1212    * @param replication number of replicas the blocks(s) belonging to this CF should have, or
1213    *          {@link #DEFAULT_DFS_REPLICATION} for the default replication factor set in the
1214    *          filesystem
1215    * @return this (for chained invocation)
1216    */
1217   public HColumnDescriptor setDFSReplication(short replication) {
1218     if (replication < 1 && replication != DEFAULT_DFS_REPLICATION) {
1219       throw new IllegalArgumentException(
1220           "DFS replication factor cannot be less than 1 if explictly set.");
1221     }
1222     setValue(DFS_REPLICATION, Short.toString(replication));
1223     return this;
1224   }
1225 }