001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.Collection; 022import java.util.List; 023import java.util.Optional; 024import java.util.OptionalInt; 025import java.util.OptionalLong; 026import java.util.function.Predicate; 027import java.util.function.ToLongFunction; 028import java.util.stream.Collectors; 029import org.apache.hadoop.conf.Configuration; 030import org.apache.hadoop.hbase.Cell; 031import org.apache.hadoop.hbase.CellComparator; 032import org.apache.hadoop.hbase.CellUtil; 033import org.apache.hadoop.hbase.CompoundConfiguration; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 036import org.apache.hadoop.hbase.client.TableDescriptor; 037import org.apache.hadoop.hbase.io.hfile.HFile; 038import org.apache.hadoop.hbase.util.ChecksumType; 039import org.apache.yetus.audience.InterfaceAudience; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042 043/** 044 * Utility functions for region server storage layer. 045 */ 046@InterfaceAudience.Private 047public final class StoreUtils { 048 049 private static final Logger LOG = LoggerFactory.getLogger(StoreUtils.class); 050 051 private StoreUtils() { 052 } 053 054 /** 055 * Creates a deterministic hash code for store file collection. 056 */ 057 public static OptionalInt getDeterministicRandomSeed(Collection<HStoreFile> files) { 058 return files.stream().mapToInt(f -> f.getPath().getName().hashCode()).findFirst(); 059 } 060 061 /** 062 * Determines whether any files in the collection are references. 063 * @param files The files. 064 */ 065 public static boolean hasReferences(Collection<HStoreFile> files) { 066 // TODO: make sure that we won't pass null here in the future. 067 return files != null && files.stream().anyMatch(HStoreFile::isReference); 068 } 069 070 /** 071 * Gets lowest timestamp from candidate StoreFiles 072 */ 073 public static long getLowestTimestamp(Collection<HStoreFile> candidates) throws IOException { 074 long minTs = Long.MAX_VALUE; 075 for (HStoreFile storeFile : candidates) { 076 minTs = Math.min(minTs, storeFile.getModificationTimestamp()); 077 } 078 return minTs; 079 } 080 081 /** 082 * Gets the largest file (with reader) out of the list of files. 083 * @param candidates The files to choose from. 084 * @return The largest file; null if no file has a reader. 085 */ 086 static Optional<HStoreFile> getLargestFile(Collection<HStoreFile> candidates) { 087 return candidates.stream().filter(f -> f.getReader() != null) 088 .max((f1, f2) -> Long.compare(f1.getReader().length(), f2.getReader().length())); 089 } 090 091 /** 092 * Return the largest memstoreTS found across all storefiles in the given list. Store files that 093 * were created by a mapreduce bulk load are ignored, as they do not correspond to any specific 094 * put operation, and thus do not have a memstoreTS associated with them. 095 */ 096 public static OptionalLong getMaxMemStoreTSInList(Collection<HStoreFile> sfs) { 097 return sfs.stream().filter(sf -> !sf.isBulkLoadResult()).mapToLong(HStoreFile::getMaxMemStoreTS) 098 .max(); 099 } 100 101 /** 102 * Return the highest sequence ID found across all storefiles in the given list. 103 */ 104 public static OptionalLong getMaxSequenceIdInList(Collection<HStoreFile> sfs) { 105 return sfs.stream().mapToLong(HStoreFile::getMaxSequenceId).max(); 106 } 107 108 /** 109 * Gets the approximate mid-point of the given file that is optimal for use in splitting it. 110 * @param file the store file 111 * @param comparator Comparator used to compare KVs. 112 * @return The split point row, or null if splitting is not possible, or reader is null. 113 */ 114 static Optional<byte[]> getFileSplitPoint(HStoreFile file, CellComparator comparator) 115 throws IOException { 116 StoreFileReader reader = file.getReader(); 117 if (reader == null) { 118 LOG.warn("Storefile " + file + " Reader is null; cannot get split point"); 119 return Optional.empty(); 120 } 121 // Get first, last, and mid keys. Midkey is the key that starts block 122 // in middle of hfile. Has column and timestamp. Need to return just 123 // the row we want to split on as midkey. 124 Optional<Cell> optionalMidKey = reader.midKey(); 125 if (!optionalMidKey.isPresent()) { 126 return Optional.empty(); 127 } 128 Cell midKey = optionalMidKey.get(); 129 Cell firstKey = reader.getFirstKey().get(); 130 Cell lastKey = reader.getLastKey().get(); 131 // if the midkey is the same as the first or last keys, we cannot (ever) split this region. 132 if ( 133 comparator.compareRows(midKey, firstKey) == 0 || comparator.compareRows(midKey, lastKey) == 0 134 ) { 135 if (LOG.isDebugEnabled()) { 136 LOG.debug("cannot split {} because midkey is the same as first or last row", file); 137 } 138 return Optional.empty(); 139 } 140 return Optional.of(CellUtil.cloneRow(midKey)); 141 } 142 143 /** 144 * Gets the mid point of the largest file passed in as split point. 145 */ 146 static Optional<byte[]> getSplitPoint(Collection<HStoreFile> storefiles, 147 CellComparator comparator) throws IOException { 148 Optional<HStoreFile> largestFile = StoreUtils.getLargestFile(storefiles); 149 return largestFile.isPresent() 150 ? StoreUtils.getFileSplitPoint(largestFile.get(), comparator) 151 : Optional.empty(); 152 } 153 154 /** 155 * Returns the configured checksum algorithm. 156 * @param conf The configuration 157 * @return The checksum algorithm that is set in the configuration 158 */ 159 public static ChecksumType getChecksumType(Configuration conf) { 160 return ChecksumType.nameToType( 161 conf.get(HConstants.CHECKSUM_TYPE_NAME, ChecksumType.getDefaultChecksumType().getName())); 162 } 163 164 /** 165 * Returns the configured bytesPerChecksum value. 166 * @param conf The configuration 167 * @return The bytesPerChecksum that is set in the configuration 168 */ 169 public static int getBytesPerChecksum(Configuration conf) { 170 return conf.getInt(HConstants.BYTES_PER_CHECKSUM, HFile.DEFAULT_BYTES_PER_CHECKSUM); 171 } 172 173 public static Configuration createStoreConfiguration(Configuration conf, TableDescriptor td, 174 ColumnFamilyDescriptor cfd) { 175 // CompoundConfiguration will look for keys in reverse order of addition, so we'd 176 // add global config first, then table and cf overrides, then cf metadata. 177 return new CompoundConfiguration().add(conf).addBytesMap(td.getValues()) 178 .addStringMap(cfd.getConfiguration()).addBytesMap(cfd.getValues()); 179 } 180 181 public static List<StoreFileInfo> toStoreFileInfo(Collection<HStoreFile> storefiles) { 182 return storefiles.stream().map(HStoreFile::getFileInfo).collect(Collectors.toList()); 183 } 184 185 public static long getTotalUncompressedBytes(List<HStoreFile> files) { 186 return files.stream() 187 .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::getTotalUncompressedBytes)) 188 .sum(); 189 } 190 191 public static long getStorefilesSize(Collection<HStoreFile> files, 192 Predicate<HStoreFile> predicate) { 193 return files.stream().filter(predicate) 194 .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::length)).sum(); 195 } 196 197 public static long getStorefileFieldSize(HStoreFile file, ToLongFunction<StoreFileReader> f) { 198 if (file == null) { 199 return 0L; 200 } 201 StoreFileReader reader = file.getReader(); 202 if (reader == null) { 203 return 0L; 204 } 205 return f.applyAsLong(reader); 206 } 207}