View Javadoc

1   /**
2    * Copyright 2011 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  
21  package org.apache.hadoop.hbase.coprocessor;
22  
23  import java.io.IOException;
24  import java.util.List;
25  
26  import org.apache.hadoop.hbase.client.Scan;
27  import org.apache.hadoop.hbase.client.coprocessor.AggregationClient;
28  import org.apache.hadoop.hbase.ipc.CoprocessorProtocol;
29  import org.apache.hadoop.hbase.util.Pair;
30  
31  /**
32   * Defines the aggregation functions that are to be supported in this
33   * Coprocessor. For each method, it takes a Scan object and a columnInterpreter.
34   * The scan object should have a column family (else an exception will be
35   * thrown), and an optional column qualifier. In the current implementation
36   * {@link AggregateImplementation}, only one column family and column qualifier
37   * combination is served. In case there are more than one, only first one will
38   * be picked. Refer to {@link AggregationClient} for some general conditions on
39   * input parameters.
40   */
41  public interface AggregateProtocol extends CoprocessorProtocol {
42    public static final long VERSION = 1L;
43  
44    /**
45     * Gives the maximum for a given combination of column qualifier and column
46     * family, in the given row range as defined in the Scan object. In its
47     * current implementation, it takes one column family and one column qualifier
48     * (if provided). In case of null column qualifier, maximum value for the
49     * entire column family will be returned.
50     * @param ci
51     * @param scan
52     * @return max value as mentioned above
53     * @throws IOException
54     */
55    <T, S> T getMax(ColumnInterpreter<T, S> ci, Scan scan) throws IOException;
56  
57    /**
58     * Gives the minimum for a given combination of column qualifier and column
59     * family, in the given row range as defined in the Scan object. In its
60     * current implementation, it takes one column family and one column qualifier
61     * (if provided). In case of null column qualifier, minimum value for the
62     * entire column family will be returned.
63     * @param ci
64     * @param scan
65     * @return min as mentioned above
66     * @throws IOException
67     */
68    <T, S> T getMin(ColumnInterpreter<T, S> ci, Scan scan) throws IOException;
69  
70    /**
71     * Gives the sum for a given combination of column qualifier and column
72     * family, in the given row range as defined in the Scan object. In its
73     * current implementation, it takes one column family and one column qualifier
74     * (if provided). In case of null column qualifier, sum for the entire column
75     * family will be returned.
76     * @param ci
77     * @param scan
78     * @return sum of values as defined by the column interpreter
79     * @throws IOException
80     */
81    <T, S> S getSum(ColumnInterpreter<T, S> ci, Scan scan) throws IOException;
82  
83    /**
84     * @param ci
85     * @param scan
86     * @return Row count for the given column family and column qualifier, in
87     * the given row range as defined in the Scan object.
88     * @throws IOException
89     */
90    <T, S> long getRowNum(ColumnInterpreter<T, S> ci, Scan scan)
91        throws IOException;
92  
93    /**
94     * Gives a Pair with first object as Sum and second object as row count,
95     * computed for a given combination of column qualifier and column family in
96     * the given row range as defined in the Scan object. In its current
97     * implementation, it takes one column family and one column qualifier (if
98     * provided). In case of null column qualifier, an aggregate sum over all the
99     * entire column family will be returned.
100    * <p>
101    * The average is computed in
102    * {@link AggregationClient#avg(byte[], ColumnInterpreter, Scan)} by
103    * processing results from all regions, so its "ok" to pass sum and a Long
104    * type.
105    * @param ci
106    * @param scan
107    * @return Average
108    * @throws IOException
109    */
110   <T, S> Pair<S, Long> getAvg(ColumnInterpreter<T, S> ci, Scan scan)
111       throws IOException;
112 
113   /**
114    * Gives a Pair with first object a List containing Sum and sum of squares,
115    * and the second object as row count. It is computed for a given combination of
116    * column qualifier and column family in the given row range as defined in the
117    * Scan object. In its current implementation, it takes one column family and
118    * one column qualifier (if provided). The idea is get the value of variance first:
119    * the average of the squares less the square of the average a standard
120    * deviation is square root of variance.
121    * @param ci
122    * @param scan
123    * @return STD
124    * @throws IOException
125    */
126   <T, S> Pair<List<S>, Long> getStd(ColumnInterpreter<T, S> ci, Scan scan)
127       throws IOException;
128 
129   /**
130    * Gives a List containing sum of values and sum of weights.
131    * It is computed for the combination of column
132    * family and column qualifier(s) in the given row range as defined in the
133    * Scan object. In its current implementation, it takes one column family and
134    * two column qualifiers. The first qualifier is for values column and 
135    * the second qualifier (optional) is for weight column.
136    * @param ci
137    * @param scan
138    * @return Pair
139    * @throws IOException
140    */
141   <T, S> List<S> getMedian(ColumnInterpreter<T, S> ci, Scan scan)
142       throws IOException;
143 
144 }