001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.mapreduce; 020 021import java.io.IOException; 022import java.util.TreeSet; 023 024import org.apache.hadoop.hbase.Cell; 025import org.apache.hadoop.hbase.CellComparator; 026import org.apache.hadoop.hbase.PrivateCellUtil; 027import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 028import org.apache.hadoop.hbase.util.MapReduceExtendedCell; 029import org.apache.hadoop.mapreduce.Reducer; 030import org.apache.yetus.audience.InterfaceAudience; 031 032/** 033 * Emits sorted Cells. 034 * Reads in all Cells from passed Iterator, sorts them, then emits 035 * Cells in sorted order. If lots of columns per row, it will use lots of 036 * memory sorting. 037 * @see HFileOutputFormat2 038 */ 039@InterfaceAudience.Public 040public class CellSortReducer 041 extends Reducer<ImmutableBytesWritable, Cell, ImmutableBytesWritable, Cell> { 042 protected void reduce(ImmutableBytesWritable row, Iterable<Cell> kvs, 043 Reducer<ImmutableBytesWritable, Cell, ImmutableBytesWritable, Cell>.Context context) 044 throws java.io.IOException, InterruptedException { 045 TreeSet<Cell> map = new TreeSet<>(CellComparator.getInstance()); 046 for (Cell kv : kvs) { 047 try { 048 map.add(PrivateCellUtil.deepClone(kv)); 049 } catch (CloneNotSupportedException e) { 050 throw new IOException(e); 051 } 052 } 053 context.setStatus("Read " + map.getClass()); 054 int index = 0; 055 for (Cell kv: map) { 056 context.write(row, new MapReduceExtendedCell(kv)); 057 if (++index % 100 == 0) context.setStatus("Wrote " + index); 058 } 059 } 060}