001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.regionserver; 020 021import java.io.IOException; 022import java.util.Collection; 023import java.util.Optional; 024import java.util.OptionalInt; 025import java.util.OptionalLong; 026 027import org.apache.hadoop.hbase.Cell; 028import org.apache.hadoop.hbase.CellComparator; 029import org.apache.hadoop.hbase.CellUtil; 030import org.apache.yetus.audience.InterfaceAudience; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034/** 035 * Utility functions for region server storage layer. 036 */ 037@InterfaceAudience.Private 038public class StoreUtils { 039 040 private static final Logger LOG = LoggerFactory.getLogger(StoreUtils.class); 041 042 /** 043 * Creates a deterministic hash code for store file collection. 044 */ 045 public static OptionalInt getDeterministicRandomSeed(Collection<HStoreFile> files) { 046 return files.stream().mapToInt(f -> f.getPath().getName().hashCode()).findFirst(); 047 } 048 049 /** 050 * Determines whether any files in the collection are references. 051 * @param files The files. 052 */ 053 public static boolean hasReferences(Collection<HStoreFile> files) { 054 // TODO: make sure that we won't pass null here in the future. 055 return files != null ? files.stream().anyMatch(HStoreFile::isReference) : false; 056 } 057 058 /** 059 * Gets lowest timestamp from candidate StoreFiles 060 */ 061 public static long getLowestTimestamp(Collection<HStoreFile> candidates) throws IOException { 062 long minTs = Long.MAX_VALUE; 063 for (HStoreFile storeFile : candidates) { 064 minTs = Math.min(minTs, storeFile.getModificationTimestamp()); 065 } 066 return minTs; 067 } 068 069 /** 070 * Gets the largest file (with reader) out of the list of files. 071 * @param candidates The files to choose from. 072 * @return The largest file; null if no file has a reader. 073 */ 074 static Optional<HStoreFile> getLargestFile(Collection<HStoreFile> candidates) { 075 return candidates.stream().filter(f -> f.getReader() != null) 076 .max((f1, f2) -> Long.compare(f1.getReader().length(), f2.getReader().length())); 077 } 078 079 /** 080 * Return the largest memstoreTS found across all storefiles in the given list. Store files that 081 * were created by a mapreduce bulk load are ignored, as they do not correspond to any specific 082 * put operation, and thus do not have a memstoreTS associated with them. 083 */ 084 public static OptionalLong getMaxMemStoreTSInList(Collection<HStoreFile> sfs) { 085 return sfs.stream().filter(sf -> !sf.isBulkLoadResult()).mapToLong(HStoreFile::getMaxMemStoreTS) 086 .max(); 087 } 088 089 /** 090 * Return the highest sequence ID found across all storefiles in the given list. 091 */ 092 public static OptionalLong getMaxSequenceIdInList(Collection<HStoreFile> sfs) { 093 return sfs.stream().mapToLong(HStoreFile::getMaxSequenceId).max(); 094 } 095 096 /** 097 * Gets the approximate mid-point of the given file that is optimal for use in splitting it. 098 * @param file the store file 099 * @param comparator Comparator used to compare KVs. 100 * @return The split point row, or null if splitting is not possible, or reader is null. 101 */ 102 static Optional<byte[]> getFileSplitPoint(HStoreFile file, CellComparator comparator) 103 throws IOException { 104 StoreFileReader reader = file.getReader(); 105 if (reader == null) { 106 LOG.warn("Storefile " + file + " Reader is null; cannot get split point"); 107 return Optional.empty(); 108 } 109 // Get first, last, and mid keys. Midkey is the key that starts block 110 // in middle of hfile. Has column and timestamp. Need to return just 111 // the row we want to split on as midkey. 112 Optional<Cell> optionalMidKey = reader.midKey(); 113 if (!optionalMidKey.isPresent()) { 114 return Optional.empty(); 115 } 116 Cell midKey = optionalMidKey.get(); 117 Cell firstKey = reader.getFirstKey().get(); 118 Cell lastKey = reader.getLastKey().get(); 119 // if the midkey is the same as the first or last keys, we cannot (ever) split this region. 120 if (comparator.compareRows(midKey, firstKey) == 0 || 121 comparator.compareRows(midKey, lastKey) == 0) { 122 if (LOG.isDebugEnabled()) { 123 LOG.debug("cannot split {} because midkey is the same as first or last row", file); 124 } 125 return Optional.empty(); 126 } 127 return Optional.of(CellUtil.cloneRow(midKey)); 128 } 129 130 /** 131 * Gets the mid point of the largest file passed in as split point. 132 */ 133 static Optional<byte[]> getSplitPoint(Collection<HStoreFile> storefiles, 134 CellComparator comparator) throws IOException { 135 Optional<HStoreFile> largestFile = StoreUtils.getLargestFile(storefiles); 136 return largestFile.isPresent() ? StoreUtils.getFileSplitPoint(largestFile.get(), comparator) 137 : Optional.empty(); 138 } 139}