001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.types;
019
020import java.util.Iterator;
021import org.apache.hadoop.hbase.util.Order;
022import org.apache.hadoop.hbase.util.PositionedByteRange;
023import org.apache.yetus.audience.InterfaceAudience;
024
025/**
026 * <p>
027 * {@code Struct} is a simple {@link DataType} for implementing "compound rowkey" and "compound
028 * qualifier" schema design strategies.
029 * </p>
030 * <h3>Encoding</h3>
031 * <p>
032 * {@code Struct} member values are encoded onto the target byte[] in the order in which they are
033 * declared. A {@code Struct} may be used as a member of another {@code Struct}. {@code Struct}s are
034 * not {@code nullable} but their component fields may be.
035 * </p>
036 * <h3>Trailing Nulls</h3>
037 * <p>
038 * {@code Struct} treats the right-most nullable field members as special. Rather than writing null
039 * values to the output buffer, {@code Struct} omits those records all together. When reading back a
040 * value, it will look for the scenario where the end of the buffer has been reached but there are
041 * still nullable fields remaining in the {@code Struct} definition. When this happens, it will
042 * produce null entries for the remaining values. For example:
043 * </p>
044 *
045 * <pre>
046 * StructBuilder builder = new StructBuilder()
047 *     .add(OrderedNumeric.ASCENDING) // nullable
048 *     .add(OrderedString.ASCENDING)  // nullable
049 * Struct shorter = builder.toStruct();
050 * Struct longer = builder.add(OrderedNumeric.ASCENDING) // nullable
051 *     .toStruct();
052 *
053 * PositionedByteRange buf1 = new SimplePositionedByteRange(7);
054 * PositionedByteRange buf2 = new SimplePositionedByteRange(7);
055 * Object[] val = new Object[] { BigDecimal.ONE, "foo" };
056 * shorter.encode(buf1, val); // write short value with short Struct
057 * buf1.setPosition(0); // reset position marker, prepare for read
058 * longer.decode(buf1); // =&gt; { BigDecimal.ONE, "foo", null } ; long Struct reads implied null
059 * longer.encode(buf2, val); // write short value with long struct
060 * Bytes.equals(buf1.getBytes(), buf2.getBytes()); // =&gt; true; long Struct skips writing null
061 * </pre>
062 *
063 * <h3>Sort Order</h3>
064 * <p>
065 * {@code Struct} instances sort according to the composite order of their fields, that is,
066 * left-to-right and depth-first. This can also be thought of as lexicographic comparison of
067 * concatenated members.
068 * </p>
069 * <p>
070 * {@link StructIterator} is provided as a convenience for consuming the sequence of values. Users
071 * may find it more appropriate to provide their own custom {@link DataType} for encoding
072 * application objects rather than using this {@code Object[]} implementation. Examples are provided
073 * in test.
074 * </p>
075 * @see StructIterator
076 * @see DataType#isNullable()
077 */
078@InterfaceAudience.Public
079public class Struct implements DataType<Object[]> {
080
081  @SuppressWarnings("rawtypes")
082  protected final DataType[] fields;
083  protected final boolean isOrderPreserving;
084  protected final boolean isSkippable;
085
086  /**
087   * Create a new {@code Struct} instance defined as the sequence of {@code HDataType}s in
088   * {@code memberTypes}.
089   * <p>
090   * A {@code Struct} is {@code orderPreserving} when all of its fields are {@code orderPreserving}.
091   * A {@code Struct} is {@code skippable} when all of its fields are {@code skippable}.
092   * </p>
093   */
094  @SuppressWarnings("rawtypes")
095  public Struct(DataType[] memberTypes) {
096    this.fields = memberTypes;
097    // a Struct is not orderPreserving when any of its fields are not.
098    boolean preservesOrder = true;
099    // a Struct is not skippable when any of its fields are not.
100    boolean skippable = true;
101    for (int i = 0; i < this.fields.length; i++) {
102      DataType dt = this.fields[i];
103      if (!dt.isOrderPreserving()) {
104        preservesOrder = false;
105      }
106      if (i < this.fields.length - 2 && !dt.isSkippable()) {
107        throw new IllegalArgumentException("Field in position " + i
108          + " is not skippable. Non-right-most struct fields must be skippable.");
109      }
110      if (!dt.isSkippable()) {
111        skippable = false;
112      }
113    }
114    this.isOrderPreserving = preservesOrder;
115    this.isSkippable = skippable;
116  }
117
118  @Override
119  public boolean isOrderPreserving() {
120    return isOrderPreserving;
121  }
122
123  @Override
124  public Order getOrder() {
125    return null;
126  }
127
128  @Override
129  public boolean isNullable() {
130    return false;
131  }
132
133  @Override
134  public boolean isSkippable() {
135    return isSkippable;
136  }
137
138  @SuppressWarnings("unchecked")
139  @Override
140  public int encodedLength(Object[] val) {
141    assert fields.length >= val.length;
142    int sum = 0;
143    for (int i = 0; i < val.length; i++) {
144      sum += fields[i].encodedLength(val[i]);
145    }
146    return sum;
147  }
148
149  @Override
150  public Class<Object[]> encodedClass() {
151    return Object[].class;
152  }
153
154  /**
155   * Retrieve an {@link Iterator} over the values encoded in {@code src}. {@code src}'s position is
156   * consumed by consuming this iterator.
157   */
158  public StructIterator iterator(PositionedByteRange src) {
159    return new StructIterator(src, fields);
160  }
161
162  @Override
163  public int skip(PositionedByteRange src) {
164    StructIterator it = iterator(src);
165    int skipped = 0;
166    while (it.hasNext()) {
167      skipped += it.skip();
168    }
169    return skipped;
170  }
171
172  @Override
173  public Object[] decode(PositionedByteRange src) {
174    int i = 0;
175    Object[] ret = new Object[fields.length];
176    Iterator<Object> it = iterator(src);
177    while (it.hasNext()) {
178      ret[i++] = it.next();
179    }
180    return ret;
181  }
182
183  /**
184   * Read the field at {@code index}. {@code src}'s position is not affected.
185   */
186  public Object decode(PositionedByteRange src, int index) {
187    assert index >= 0;
188    StructIterator it = iterator(src.shallowCopy());
189    for (; index > 0; index--) {
190      it.skip();
191    }
192    return it.next();
193  }
194
195  @SuppressWarnings("unchecked")
196  @Override
197  public int encode(PositionedByteRange dst, Object[] val) {
198    if (val.length == 0) {
199      return 0;
200    }
201    assert fields.length >= val.length;
202    int end, written = 0;
203    // find the last occurrence of a non-null or null and non-nullable value
204    for (end = val.length - 1; end > -1; end--) {
205      if (null != val[end] || (null == val[end] && !fields[end].isNullable())) {
206        break;
207      }
208    }
209    for (int i = 0; i <= end; i++) {
210      written += fields[i].encode(dst, val[i]);
211    }
212    return written;
213  }
214}