001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collection; 023import java.util.List; 024import java.util.Optional; 025import java.util.OptionalInt; 026import java.util.OptionalLong; 027import java.util.function.Predicate; 028import java.util.function.ToLongFunction; 029import java.util.stream.Collectors; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.hbase.Cell; 032import org.apache.hadoop.hbase.CellComparator; 033import org.apache.hadoop.hbase.CellUtil; 034import org.apache.hadoop.hbase.CompoundConfiguration; 035import org.apache.hadoop.hbase.ExtendedCell; 036import org.apache.hadoop.hbase.HConstants; 037import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 038import org.apache.hadoop.hbase.client.TableDescriptor; 039import org.apache.hadoop.hbase.io.hfile.CacheConfig; 040import org.apache.hadoop.hbase.io.hfile.HFile; 041import org.apache.hadoop.hbase.util.ChecksumType; 042import org.apache.yetus.audience.InterfaceAudience; 043import org.slf4j.Logger; 044import org.slf4j.LoggerFactory; 045 046/** 047 * Utility functions for region server storage layer. 048 */ 049@InterfaceAudience.Private 050public final class StoreUtils { 051 052 private static final Logger LOG = LoggerFactory.getLogger(StoreUtils.class); 053 054 private StoreUtils() { 055 } 056 057 /** 058 * Creates a deterministic hash code for store file collection. 059 */ 060 public static OptionalInt getDeterministicRandomSeed(Collection<HStoreFile> files) { 061 return files.stream().mapToInt(f -> f.getPath().getName().hashCode()).findFirst(); 062 } 063 064 /** 065 * Determines whether any files in the collection are references. 066 * @param files The files. 067 */ 068 public static boolean hasReferences(Collection<HStoreFile> files) { 069 // TODO: make sure that we won't pass null here in the future. 070 return files != null && files.stream().anyMatch(HStoreFile::isReference); 071 } 072 073 /** 074 * Gets lowest timestamp from candidate StoreFiles 075 */ 076 public static long getLowestTimestamp(Collection<HStoreFile> candidates) throws IOException { 077 long minTs = Long.MAX_VALUE; 078 for (HStoreFile storeFile : candidates) { 079 minTs = Math.min(minTs, storeFile.getModificationTimestamp()); 080 } 081 return minTs; 082 } 083 084 /** 085 * Gets the largest file (with reader) out of the list of files. 086 * @param candidates The files to choose from. 087 * @return The largest file; null if no file has a reader. 088 */ 089 static Optional<HStoreFile> getLargestFile(Collection<HStoreFile> candidates) { 090 return candidates.stream().filter(f -> f.getReader() != null) 091 .max((f1, f2) -> Long.compare(f1.getReader().length(), f2.getReader().length())); 092 } 093 094 /** 095 * Return the largest memstoreTS found across all storefiles in the given list. Store files that 096 * were created by a mapreduce bulk load are ignored, as they do not correspond to any specific 097 * put operation, and thus do not have a memstoreTS associated with them. 098 */ 099 public static OptionalLong getMaxMemStoreTSInList(Collection<HStoreFile> sfs) { 100 return sfs.stream().filter(sf -> !sf.isBulkLoadResult()).mapToLong(HStoreFile::getMaxMemStoreTS) 101 .max(); 102 } 103 104 /** 105 * Return the highest sequence ID found across all storefiles in the given list. 106 */ 107 public static OptionalLong getMaxSequenceIdInList(Collection<HStoreFile> sfs) { 108 return sfs.stream().mapToLong(HStoreFile::getMaxSequenceId).max(); 109 } 110 111 /** 112 * Gets the approximate mid-point of the given file that is optimal for use in splitting it. 113 * @param file the store file 114 * @param comparator Comparator used to compare KVs. 115 * @return The split point row, or null if splitting is not possible, or reader is null. 116 */ 117 static Optional<byte[]> getFileSplitPoint(HStoreFile file, CellComparator comparator) 118 throws IOException { 119 StoreFileReader reader = file.getReader(); 120 if (reader == null) { 121 LOG.warn("Storefile " + file + " Reader is null; cannot get split point"); 122 return Optional.empty(); 123 } 124 // Get first, last, and mid keys. Midkey is the key that starts block 125 // in middle of hfile. Has column and timestamp. Need to return just 126 // the row we want to split on as midkey. 127 Optional<ExtendedCell> optionalMidKey = reader.midKey(); 128 if (!optionalMidKey.isPresent()) { 129 return Optional.empty(); 130 } 131 Cell midKey = optionalMidKey.get(); 132 Cell firstKey = reader.getFirstKey().get(); 133 Cell lastKey = reader.getLastKey().get(); 134 // if the midkey is the same as the first or last keys, we cannot (ever) split this region. 135 if ( 136 comparator.compareRows(midKey, firstKey) == 0 || comparator.compareRows(midKey, lastKey) == 0 137 ) { 138 if (LOG.isDebugEnabled()) { 139 LOG.debug("cannot split {} because midkey is the same as first or last row", file); 140 } 141 return Optional.empty(); 142 } 143 return Optional.of(CellUtil.cloneRow(midKey)); 144 } 145 146 /** 147 * Gets the mid point of the largest file passed in as split point. 148 */ 149 static Optional<byte[]> getSplitPoint(Collection<HStoreFile> storefiles, 150 CellComparator comparator) throws IOException { 151 Optional<HStoreFile> largestFile = StoreUtils.getLargestFile(storefiles); 152 return largestFile.isPresent() 153 ? StoreUtils.getFileSplitPoint(largestFile.get(), comparator) 154 : Optional.empty(); 155 } 156 157 /** 158 * Returns the configured checksum algorithm. 159 * @param conf The configuration 160 * @return The checksum algorithm that is set in the configuration 161 */ 162 public static ChecksumType getChecksumType(Configuration conf) { 163 return ChecksumType.nameToType( 164 conf.get(HConstants.CHECKSUM_TYPE_NAME, ChecksumType.getDefaultChecksumType().getName())); 165 } 166 167 /** 168 * Returns the configured bytesPerChecksum value. 169 * @param conf The configuration 170 * @return The bytesPerChecksum that is set in the configuration 171 */ 172 public static int getBytesPerChecksum(Configuration conf) { 173 return conf.getInt(HConstants.BYTES_PER_CHECKSUM, HFile.DEFAULT_BYTES_PER_CHECKSUM); 174 } 175 176 public static Configuration createStoreConfiguration(Configuration conf, TableDescriptor td, 177 ColumnFamilyDescriptor cfd) { 178 // CompoundConfiguration will look for keys in reverse order of addition, so we'd 179 // add global config first, then table and cf overrides, then cf metadata. 180 return new CompoundConfiguration().add(conf).addBytesMap(td.getValues()) 181 .addStringMap(cfd.getConfiguration()).addBytesMap(cfd.getValues()); 182 } 183 184 public static List<StoreFileInfo> toStoreFileInfo(Collection<HStoreFile> storefiles) { 185 return storefiles.stream().map(HStoreFile::getFileInfo).collect(Collectors.toList()); 186 } 187 188 public static List<HStoreFile> toHStoreFile(List<StoreFileInfo> storeFileInfoList, 189 BloomType bloomType, CacheConfig cacheConf) throws IOException { 190 List<HStoreFile> hStoreFiles = new ArrayList<HStoreFile>(); 191 for (StoreFileInfo storeFileInfo : storeFileInfoList) { 192 hStoreFiles.add(new HStoreFile(storeFileInfo, bloomType, cacheConf)); 193 } 194 return hStoreFiles; 195 } 196 197 public static long getTotalUncompressedBytes(List<HStoreFile> files) { 198 return files.stream() 199 .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::getTotalUncompressedBytes)) 200 .sum(); 201 } 202 203 public static long getStorefilesSize(Collection<HStoreFile> files, 204 Predicate<HStoreFile> predicate) { 205 return files.stream().filter(predicate) 206 .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::length)).sum(); 207 } 208 209 public static long getStorefileFieldSize(HStoreFile file, ToLongFunction<StoreFileReader> f) { 210 if (file == null) { 211 return 0L; 212 } 213 StoreFileReader reader = file.getReader(); 214 if (reader == null) { 215 return 0L; 216 } 217 return f.applyAsLong(reader); 218 } 219}