1 /** 2 * 3 * Licensed to the Apache Software Foundation (ASF) under one 4 * or more contributor license agreements. See the NOTICE file 5 * distributed with this work for additional information 6 * regarding copyright ownership. The ASF licenses this file 7 * to you under the Apache License, Version 2.0 (the 8 * "License"); you may not use this file except in compliance 9 * with the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 package org.apache.hadoop.hbase.client; 20 21 import org.apache.hadoop.hbase.classification.InterfaceAudience; 22 import org.apache.hadoop.hbase.HRegionLocation; 23 24 import java.io.IOException; 25 import java.io.InterruptedIOException; 26 import java.util.ArrayList; 27 import java.util.HashMap; 28 import java.util.List; 29 import java.util.Map; 30 31 /** 32 * Utility class for HTable. 33 * 34 * @deprecated since 1.0 35 */ 36 @InterfaceAudience.Private 37 @Deprecated 38 public class HTableUtil { 39 40 private static final int INITIAL_LIST_SIZE = 250; 41 42 /** 43 * Processes a List of Puts and writes them to an HTable instance in RegionServer buckets via the htable.put method. 44 * This will utilize the writeBuffer, thus the writeBuffer flush frequency may be tuned accordingly via htable.setWriteBufferSize. 45 * <br><br> 46 * The benefit of submitting Puts in this manner is to minimize the number of RegionServer RPCs in each flush. 47 * <br><br> 48 * Assumption #1: Regions have been pre-created for the table. If they haven't, then all of the Puts will go to the same region, 49 * defeating the purpose of this utility method. See the Apache HBase book for an explanation of how to do this. 50 * <br> 51 * Assumption #2: Row-keys are not monotonically increasing. See the Apache HBase book for an explanation of this problem. 52 * <br> 53 * Assumption #3: That the input list of Puts is big enough to be useful (in the thousands or more). The intent of this 54 * method is to process larger chunks of data. 55 * <br> 56 * Assumption #4: htable.setAutoFlush(false) has been set. This is a requirement to use the writeBuffer. 57 * <br><br> 58 * @param htable HTable instance for target HBase table 59 * @param puts List of Put instances 60 * @throws IOException if a remote or network exception occurs 61 * 62 */ 63 public static void bucketRsPut(HTable htable, List<Put> puts) throws IOException { 64 65 Map<String, List<Put>> putMap = createRsPutMap(htable.getRegionLocator(), puts); 66 for (List<Put> rsPuts: putMap.values()) { 67 htable.put( rsPuts ); 68 } 69 htable.flushCommits(); 70 } 71 72 /** 73 * Processes a List of Rows (Put, Delete) and writes them to an HTable instance in RegionServer buckets via the htable.batch method. 74 * <br><br> 75 * The benefit of submitting Puts in this manner is to minimize the number of RegionServer RPCs, thus this will 76 * produce one RPC of Puts per RegionServer. 77 * <br><br> 78 * Assumption #1: Regions have been pre-created for the table. If they haven't, then all of the Puts will go to the same region, 79 * defeating the purpose of this utility method. See the Apache HBase book for an explanation of how to do this. 80 * <br> 81 * Assumption #2: Row-keys are not monotonically increasing. See the Apache HBase book for an explanation of this problem. 82 * <br> 83 * Assumption #3: That the input list of Rows is big enough to be useful (in the thousands or more). The intent of this 84 * method is to process larger chunks of data. 85 * <br><br> 86 * This method accepts a list of Row objects because the underlying .batch method accepts a list of Row objects. 87 * <br><br> 88 * @param htable HTable instance for target HBase table 89 * @param rows List of Row instances 90 * @throws IOException if a remote or network exception occurs 91 */ 92 public static void bucketRsBatch(HTable htable, List<Row> rows) throws IOException { 93 94 try { 95 Map<String, List<Row>> rowMap = createRsRowMap(htable.getRegionLocator(), rows); 96 for (List<Row> rsRows: rowMap.values()) { 97 htable.batch( rsRows ); 98 } 99 } catch (InterruptedException e) { 100 throw (InterruptedIOException)new InterruptedIOException().initCause(e); 101 } 102 103 } 104 105 private static Map<String,List<Put>> createRsPutMap(RegionLocator htable, List<Put> puts) throws IOException { 106 107 Map<String, List<Put>> putMap = new HashMap<String, List<Put>>(); 108 for (Put put: puts) { 109 HRegionLocation rl = htable.getRegionLocation( put.getRow() ); 110 String hostname = rl.getHostname(); 111 List<Put> recs = putMap.get( hostname); 112 if (recs == null) { 113 recs = new ArrayList<Put>(INITIAL_LIST_SIZE); 114 putMap.put( hostname, recs); 115 } 116 recs.add(put); 117 } 118 return putMap; 119 } 120 121 private static Map<String,List<Row>> createRsRowMap(RegionLocator htable, List<Row> rows) throws IOException { 122 123 Map<String, List<Row>> rowMap = new HashMap<String, List<Row>>(); 124 for (Row row: rows) { 125 HRegionLocation rl = htable.getRegionLocation( row.getRow() ); 126 String hostname = rl.getHostname(); 127 List<Row> recs = rowMap.get( hostname); 128 if (recs == null) { 129 recs = new ArrayList<Row>(INITIAL_LIST_SIZE); 130 rowMap.put( hostname, recs); 131 } 132 recs.add(row); 133 } 134 return rowMap; 135 } 136 137 }