001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.mapreduce;
019
020import java.io.IOException;
021import java.util.TreeSet;
022import org.apache.hadoop.hbase.Cell;
023import org.apache.hadoop.hbase.CellComparator;
024import org.apache.hadoop.hbase.PrivateCellUtil;
025import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
026import org.apache.hadoop.hbase.util.MapReduceExtendedCell;
027import org.apache.hadoop.mapreduce.Reducer;
028import org.apache.yetus.audience.InterfaceAudience;
029
030/**
031 * Emits sorted Cells. Reads in all Cells from passed Iterator, sorts them, then emits Cells in
032 * sorted order. If lots of columns per row, it will use lots of memory sorting.
033 * @see HFileOutputFormat2
034 */
035@InterfaceAudience.Public
036public class CellSortReducer
037  extends Reducer<ImmutableBytesWritable, Cell, ImmutableBytesWritable, Cell> {
038  protected void reduce(ImmutableBytesWritable row, Iterable<Cell> kvs,
039    Reducer<ImmutableBytesWritable, Cell, ImmutableBytesWritable, Cell>.Context context)
040    throws java.io.IOException, InterruptedException {
041    TreeSet<Cell> map = new TreeSet<>(CellComparator.getInstance());
042    for (Cell kv : kvs) {
043      try {
044        map.add(PrivateCellUtil.deepClone(kv));
045      } catch (CloneNotSupportedException e) {
046        throw new IOException(e);
047      }
048    }
049    context.setStatus("Read " + map.getClass());
050    int index = 0;
051    for (Cell kv : map) {
052      context.write(row, new MapReduceExtendedCell(kv));
053      if (++index % 100 == 0) context.setStatus("Wrote " + index);
054    }
055  }
056}