001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collection;
023import java.util.List;
024import java.util.Optional;
025import java.util.OptionalInt;
026import java.util.OptionalLong;
027import java.util.function.Predicate;
028import java.util.function.ToLongFunction;
029import java.util.stream.Collectors;
030import org.apache.hadoop.conf.Configuration;
031import org.apache.hadoop.hbase.Cell;
032import org.apache.hadoop.hbase.CellComparator;
033import org.apache.hadoop.hbase.CellUtil;
034import org.apache.hadoop.hbase.CompoundConfiguration;
035import org.apache.hadoop.hbase.ExtendedCell;
036import org.apache.hadoop.hbase.HConstants;
037import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
038import org.apache.hadoop.hbase.client.TableDescriptor;
039import org.apache.hadoop.hbase.io.hfile.CacheConfig;
040import org.apache.hadoop.hbase.io.hfile.HFile;
041import org.apache.hadoop.hbase.util.ChecksumType;
042import org.apache.yetus.audience.InterfaceAudience;
043import org.slf4j.Logger;
044import org.slf4j.LoggerFactory;
045
046/**
047 * Utility functions for region server storage layer.
048 */
049@InterfaceAudience.Private
050public final class StoreUtils {
051
052  private static final Logger LOG = LoggerFactory.getLogger(StoreUtils.class);
053
054  private StoreUtils() {
055  }
056
057  /**
058   * Creates a deterministic hash code for store file collection.
059   */
060  public static OptionalInt getDeterministicRandomSeed(Collection<HStoreFile> files) {
061    return files.stream().mapToInt(f -> f.getPath().getName().hashCode()).findFirst();
062  }
063
064  /**
065   * Determines whether any files in the collection are references.
066   * @param files The files.
067   */
068  public static boolean hasReferences(Collection<HStoreFile> files) {
069    // TODO: make sure that we won't pass null here in the future.
070    return files != null && files.stream().anyMatch(HStoreFile::isReference);
071  }
072
073  /**
074   * Gets lowest timestamp from candidate StoreFiles
075   */
076  public static long getLowestTimestamp(Collection<HStoreFile> candidates) throws IOException {
077    long minTs = Long.MAX_VALUE;
078    for (HStoreFile storeFile : candidates) {
079      minTs = Math.min(minTs, storeFile.getModificationTimestamp());
080    }
081    return minTs;
082  }
083
084  /**
085   * Gets the largest file (with reader) out of the list of files.
086   * @param candidates The files to choose from.
087   * @return The largest file; null if no file has a reader.
088   */
089  static Optional<HStoreFile> getLargestFile(Collection<HStoreFile> candidates) {
090    return candidates.stream().filter(f -> f.getReader() != null)
091      .max((f1, f2) -> Long.compare(f1.getReader().length(), f2.getReader().length()));
092  }
093
094  /**
095   * Return the largest memstoreTS found across all storefiles in the given list. Store files that
096   * were created by a mapreduce bulk load are ignored, as they do not correspond to any specific
097   * put operation, and thus do not have a memstoreTS associated with them.
098   */
099  public static OptionalLong getMaxMemStoreTSInList(Collection<HStoreFile> sfs) {
100    return sfs.stream().filter(sf -> !sf.isBulkLoadResult()).mapToLong(HStoreFile::getMaxMemStoreTS)
101      .max();
102  }
103
104  /**
105   * Return the highest sequence ID found across all storefiles in the given list.
106   */
107  public static OptionalLong getMaxSequenceIdInList(Collection<HStoreFile> sfs) {
108    return sfs.stream().mapToLong(HStoreFile::getMaxSequenceId).max();
109  }
110
111  /**
112   * Gets the approximate mid-point of the given file that is optimal for use in splitting it.
113   * @param file       the store file
114   * @param comparator Comparator used to compare KVs.
115   * @return The split point row, or null if splitting is not possible, or reader is null.
116   */
117  static Optional<byte[]> getFileSplitPoint(HStoreFile file, CellComparator comparator)
118    throws IOException {
119    StoreFileReader reader = file.getReader();
120    if (reader == null) {
121      LOG.warn("Storefile " + file + " Reader is null; cannot get split point");
122      return Optional.empty();
123    }
124    // Get first, last, and mid keys. Midkey is the key that starts block
125    // in middle of hfile. Has column and timestamp. Need to return just
126    // the row we want to split on as midkey.
127    Optional<ExtendedCell> optionalMidKey = reader.midKey();
128    if (!optionalMidKey.isPresent()) {
129      return Optional.empty();
130    }
131    Cell midKey = optionalMidKey.get();
132    Cell firstKey = reader.getFirstKey().get();
133    Cell lastKey = reader.getLastKey().get();
134    // if the midkey is the same as the first or last keys, we cannot (ever) split this region.
135    if (
136      comparator.compareRows(midKey, firstKey) == 0 || comparator.compareRows(midKey, lastKey) == 0
137    ) {
138      if (LOG.isDebugEnabled()) {
139        LOG.debug("cannot split {} because midkey is the same as first or last row", file);
140      }
141      return Optional.empty();
142    }
143    return Optional.of(CellUtil.cloneRow(midKey));
144  }
145
146  /**
147   * Gets the mid point of the largest file passed in as split point.
148   */
149  static Optional<byte[]> getSplitPoint(Collection<HStoreFile> storefiles,
150    CellComparator comparator) throws IOException {
151    Optional<HStoreFile> largestFile = StoreUtils.getLargestFile(storefiles);
152    return largestFile.isPresent()
153      ? StoreUtils.getFileSplitPoint(largestFile.get(), comparator)
154      : Optional.empty();
155  }
156
157  /**
158   * Returns the configured checksum algorithm.
159   * @param conf The configuration
160   * @return The checksum algorithm that is set in the configuration
161   */
162  public static ChecksumType getChecksumType(Configuration conf) {
163    return ChecksumType.nameToType(
164      conf.get(HConstants.CHECKSUM_TYPE_NAME, ChecksumType.getDefaultChecksumType().getName()));
165  }
166
167  /**
168   * Returns the configured bytesPerChecksum value.
169   * @param conf The configuration
170   * @return The bytesPerChecksum that is set in the configuration
171   */
172  public static int getBytesPerChecksum(Configuration conf) {
173    return conf.getInt(HConstants.BYTES_PER_CHECKSUM, HFile.DEFAULT_BYTES_PER_CHECKSUM);
174  }
175
176  public static Configuration createStoreConfiguration(Configuration conf, TableDescriptor td,
177    ColumnFamilyDescriptor cfd) {
178    // CompoundConfiguration will look for keys in reverse order of addition, so we'd
179    // add global config first, then table and cf overrides, then cf metadata.
180    return new CompoundConfiguration().add(conf).addBytesMap(td.getValues())
181      .addStringMap(cfd.getConfiguration()).addBytesMap(cfd.getValues());
182  }
183
184  public static List<StoreFileInfo> toStoreFileInfo(Collection<HStoreFile> storefiles) {
185    return storefiles.stream().map(HStoreFile::getFileInfo).collect(Collectors.toList());
186  }
187
188  public static List<HStoreFile> toHStoreFile(List<StoreFileInfo> storeFileInfoList,
189    BloomType bloomType, CacheConfig cacheConf) throws IOException {
190    List<HStoreFile> hStoreFiles = new ArrayList<HStoreFile>();
191    for (StoreFileInfo storeFileInfo : storeFileInfoList) {
192      hStoreFiles.add(new HStoreFile(storeFileInfo, bloomType, cacheConf));
193    }
194    return hStoreFiles;
195  }
196
197  public static long getTotalUncompressedBytes(List<HStoreFile> files) {
198    return files.stream()
199      .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::getTotalUncompressedBytes))
200      .sum();
201  }
202
203  public static long getStorefilesSize(Collection<HStoreFile> files,
204    Predicate<HStoreFile> predicate) {
205    return files.stream().filter(predicate)
206      .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::length)).sum();
207  }
208
209  public static long getStorefileFieldSize(HStoreFile file, ToLongFunction<StoreFileReader> f) {
210    if (file == null) {
211      return 0L;
212    }
213    StoreFileReader reader = file.getReader();
214    if (reader == null) {
215      return 0L;
216    }
217    return f.applyAsLong(reader);
218  }
219}