001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.Collection;
022import java.util.List;
023import java.util.Optional;
024import java.util.OptionalInt;
025import java.util.OptionalLong;
026import java.util.function.Predicate;
027import java.util.function.ToLongFunction;
028import java.util.stream.Collectors;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.hbase.Cell;
031import org.apache.hadoop.hbase.CellComparator;
032import org.apache.hadoop.hbase.CellUtil;
033import org.apache.hadoop.hbase.CompoundConfiguration;
034import org.apache.hadoop.hbase.HConstants;
035import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
036import org.apache.hadoop.hbase.client.TableDescriptor;
037import org.apache.hadoop.hbase.io.hfile.HFile;
038import org.apache.hadoop.hbase.util.ChecksumType;
039import org.apache.yetus.audience.InterfaceAudience;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043/**
044 * Utility functions for region server storage layer.
045 */
046@InterfaceAudience.Private
047public final class StoreUtils {
048
049  private static final Logger LOG = LoggerFactory.getLogger(StoreUtils.class);
050
051  private StoreUtils() {
052  }
053
054  /**
055   * Creates a deterministic hash code for store file collection.
056   */
057  public static OptionalInt getDeterministicRandomSeed(Collection<HStoreFile> files) {
058    return files.stream().mapToInt(f -> f.getPath().getName().hashCode()).findFirst();
059  }
060
061  /**
062   * Determines whether any files in the collection are references.
063   * @param files The files.
064   */
065  public static boolean hasReferences(Collection<HStoreFile> files) {
066    // TODO: make sure that we won't pass null here in the future.
067    return files != null && files.stream().anyMatch(HStoreFile::isReference);
068  }
069
070  /**
071   * Gets lowest timestamp from candidate StoreFiles
072   */
073  public static long getLowestTimestamp(Collection<HStoreFile> candidates) throws IOException {
074    long minTs = Long.MAX_VALUE;
075    for (HStoreFile storeFile : candidates) {
076      minTs = Math.min(minTs, storeFile.getModificationTimestamp());
077    }
078    return minTs;
079  }
080
081  /**
082   * Gets the largest file (with reader) out of the list of files.
083   * @param candidates The files to choose from.
084   * @return The largest file; null if no file has a reader.
085   */
086  static Optional<HStoreFile> getLargestFile(Collection<HStoreFile> candidates) {
087    return candidates.stream().filter(f -> f.getReader() != null)
088      .max((f1, f2) -> Long.compare(f1.getReader().length(), f2.getReader().length()));
089  }
090
091  /**
092   * Return the largest memstoreTS found across all storefiles in the given list. Store files that
093   * were created by a mapreduce bulk load are ignored, as they do not correspond to any specific
094   * put operation, and thus do not have a memstoreTS associated with them.
095   */
096  public static OptionalLong getMaxMemStoreTSInList(Collection<HStoreFile> sfs) {
097    return sfs.stream().filter(sf -> !sf.isBulkLoadResult()).mapToLong(HStoreFile::getMaxMemStoreTS)
098      .max();
099  }
100
101  /**
102   * Return the highest sequence ID found across all storefiles in the given list.
103   */
104  public static OptionalLong getMaxSequenceIdInList(Collection<HStoreFile> sfs) {
105    return sfs.stream().mapToLong(HStoreFile::getMaxSequenceId).max();
106  }
107
108  /**
109   * Gets the approximate mid-point of the given file that is optimal for use in splitting it.
110   * @param file       the store file
111   * @param comparator Comparator used to compare KVs.
112   * @return The split point row, or null if splitting is not possible, or reader is null.
113   */
114  static Optional<byte[]> getFileSplitPoint(HStoreFile file, CellComparator comparator)
115    throws IOException {
116    StoreFileReader reader = file.getReader();
117    if (reader == null) {
118      LOG.warn("Storefile " + file + " Reader is null; cannot get split point");
119      return Optional.empty();
120    }
121    // Get first, last, and mid keys. Midkey is the key that starts block
122    // in middle of hfile. Has column and timestamp. Need to return just
123    // the row we want to split on as midkey.
124    Optional<Cell> optionalMidKey = reader.midKey();
125    if (!optionalMidKey.isPresent()) {
126      return Optional.empty();
127    }
128    Cell midKey = optionalMidKey.get();
129    Cell firstKey = reader.getFirstKey().get();
130    Cell lastKey = reader.getLastKey().get();
131    // if the midkey is the same as the first or last keys, we cannot (ever) split this region.
132    if (
133      comparator.compareRows(midKey, firstKey) == 0 || comparator.compareRows(midKey, lastKey) == 0
134    ) {
135      if (LOG.isDebugEnabled()) {
136        LOG.debug("cannot split {} because midkey is the same as first or last row", file);
137      }
138      return Optional.empty();
139    }
140    return Optional.of(CellUtil.cloneRow(midKey));
141  }
142
143  /**
144   * Gets the mid point of the largest file passed in as split point.
145   */
146  static Optional<byte[]> getSplitPoint(Collection<HStoreFile> storefiles,
147    CellComparator comparator) throws IOException {
148    Optional<HStoreFile> largestFile = StoreUtils.getLargestFile(storefiles);
149    return largestFile.isPresent()
150      ? StoreUtils.getFileSplitPoint(largestFile.get(), comparator)
151      : Optional.empty();
152  }
153
154  /**
155   * Returns the configured checksum algorithm.
156   * @param conf The configuration
157   * @return The checksum algorithm that is set in the configuration
158   */
159  public static ChecksumType getChecksumType(Configuration conf) {
160    return ChecksumType.nameToType(
161      conf.get(HConstants.CHECKSUM_TYPE_NAME, ChecksumType.getDefaultChecksumType().getName()));
162  }
163
164  /**
165   * Returns the configured bytesPerChecksum value.
166   * @param conf The configuration
167   * @return The bytesPerChecksum that is set in the configuration
168   */
169  public static int getBytesPerChecksum(Configuration conf) {
170    return conf.getInt(HConstants.BYTES_PER_CHECKSUM, HFile.DEFAULT_BYTES_PER_CHECKSUM);
171  }
172
173  public static Configuration createStoreConfiguration(Configuration conf, TableDescriptor td,
174    ColumnFamilyDescriptor cfd) {
175    // CompoundConfiguration will look for keys in reverse order of addition, so we'd
176    // add global config first, then table and cf overrides, then cf metadata.
177    return new CompoundConfiguration().add(conf).addBytesMap(td.getValues())
178      .addStringMap(cfd.getConfiguration()).addBytesMap(cfd.getValues());
179  }
180
181  public static List<StoreFileInfo> toStoreFileInfo(Collection<HStoreFile> storefiles) {
182    return storefiles.stream().map(HStoreFile::getFileInfo).collect(Collectors.toList());
183  }
184
185  public static long getTotalUncompressedBytes(List<HStoreFile> files) {
186    return files.stream()
187      .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::getTotalUncompressedBytes))
188      .sum();
189  }
190
191  public static long getStorefilesSize(Collection<HStoreFile> files,
192    Predicate<HStoreFile> predicate) {
193    return files.stream().filter(predicate)
194      .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::length)).sum();
195  }
196
197  public static long getStorefileFieldSize(HStoreFile file, ToLongFunction<StoreFileReader> f) {
198    if (file == null) {
199      return 0L;
200    }
201    StoreFileReader reader = file.getReader();
202    if (reader == null) {
203      return 0L;
204    }
205    return f.applyAsLong(reader);
206  }
207}