001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.mapreduce; 019 020import java.io.DataInputStream; 021import java.io.DataOutputStream; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.OutputStream; 025import org.apache.hadoop.hbase.ExtendedCell; 026import org.apache.hadoop.hbase.KeyValue; 027import org.apache.hadoop.hbase.KeyValueUtil; 028import org.apache.hadoop.hbase.PrivateCellUtil; 029import org.apache.hadoop.hbase.util.Bytes; 030import org.apache.hadoop.io.serializer.Deserializer; 031import org.apache.hadoop.io.serializer.Serialization; 032import org.apache.hadoop.io.serializer.Serializer; 033import org.apache.yetus.audience.InterfaceAudience; 034 035/** 036 * Similar to CellSerialization, but includes the sequenceId from an ExtendedCell. This is necessary 037 * so that CellSortReducer can sort by sequenceId, if applicable. Note that these two serializations 038 * are not compatible -- data serialized by CellSerialization cannot be deserialized with 039 * ExtendedCellSerialization and vice versa. This is ok for {@link HFileOutputFormat2} because the 040 * serialization is not actually used for the actual written HFiles, just intermediate data (between 041 * mapper and reducer of a single job). 042 */ 043@InterfaceAudience.Private 044public class ExtendedCellSerialization implements Serialization<ExtendedCell> { 045 @Override 046 public boolean accept(Class<?> c) { 047 return ExtendedCell.class.isAssignableFrom(c); 048 } 049 050 @Override 051 public ExtendedCellDeserializer getDeserializer(Class<ExtendedCell> t) { 052 return new ExtendedCellDeserializer(); 053 } 054 055 @Override 056 public ExtendedCellSerializer getSerializer(Class<ExtendedCell> c) { 057 return new ExtendedCellSerializer(); 058 } 059 060 public static class ExtendedCellDeserializer implements Deserializer<ExtendedCell> { 061 private DataInputStream dis; 062 063 @Override 064 public void close() throws IOException { 065 this.dis.close(); 066 } 067 068 @Override 069 public KeyValue deserialize(ExtendedCell ignore) throws IOException { 070 KeyValue kv = KeyValueUtil.create(this.dis); 071 PrivateCellUtil.setSequenceId(kv, this.dis.readLong()); 072 return kv; 073 } 074 075 @Override 076 public void open(InputStream is) throws IOException { 077 this.dis = new DataInputStream(is); 078 } 079 } 080 081 public static class ExtendedCellSerializer implements Serializer<ExtendedCell> { 082 private DataOutputStream dos; 083 084 @Override 085 public void close() throws IOException { 086 this.dos.close(); 087 } 088 089 @Override 090 public void open(OutputStream os) throws IOException { 091 this.dos = new DataOutputStream(os); 092 } 093 094 @Override 095 public void serialize(ExtendedCell kv) throws IOException { 096 dos.writeInt(PrivateCellUtil.estimatedSerializedSizeOf(kv) - Bytes.SIZEOF_INT); 097 kv.write(dos, true); 098 dos.writeLong(kv.getSequenceId()); 099 } 100 } 101}