Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.Collection;
022import java.util.List;
023import java.util.Optional;
024import java.util.OptionalInt;
025import java.util.OptionalLong;
026import java.util.function.Predicate;
027import java.util.function.ToLongFunction;
028import java.util.stream.Collectors;
029import org.apache.hadoop.conf.Configuration;
030import org.apache.hadoop.hbase.Cell;
031import org.apache.hadoop.hbase.CellComparator;
032import org.apache.hadoop.hbase.CellUtil;
033import org.apache.hadoop.hbase.CompoundConfiguration;
034import org.apache.hadoop.hbase.ExtendedCell;
035import org.apache.hadoop.hbase.HConstants;
036import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
037import org.apache.hadoop.hbase.client.TableDescriptor;
038import org.apache.hadoop.hbase.io.hfile.HFile;
039import org.apache.hadoop.hbase.util.ChecksumType;
040import org.apache.yetus.audience.InterfaceAudience;
041import org.slf4j.Logger;
042import org.slf4j.LoggerFactory;
043
044/**
045 * Utility functions for region server storage layer.
046 */
047@InterfaceAudience.Private
048public final class StoreUtils {
049
050  private static final Logger LOG = LoggerFactory.getLogger(StoreUtils.class);
051
052  private StoreUtils() {
053  }
054
055  /**
056   * Creates a deterministic hash code for store file collection.
057   */
058  public static OptionalInt getDeterministicRandomSeed(Collection<HStoreFile> files) {
059    return files.stream().mapToInt(f -> f.getPath().getName().hashCode()).findFirst();
060  }
061
062  /**
063   * Determines whether any files in the collection are references.
064   * @param files The files.
065   */
066  public static boolean hasReferences(Collection<HStoreFile> files) {
067    // TODO: make sure that we won't pass null here in the future.
068    return files != null && files.stream().anyMatch(HStoreFile::isReference);
069  }
070
071  /**
072   * Gets lowest timestamp from candidate StoreFiles
073   */
074  public static long getLowestTimestamp(Collection<HStoreFile> candidates) throws IOException {
075    long minTs = Long.MAX_VALUE;
076    for (HStoreFile storeFile : candidates) {
077      minTs = Math.min(minTs, storeFile.getModificationTimestamp());
078    }
079    return minTs;
080  }
081
082  /**
083   * Gets the largest file (with reader) out of the list of files.
084   * @param candidates The files to choose from.
085   * @return The largest file; null if no file has a reader.
086   */
087  static Optional<HStoreFile> getLargestFile(Collection<HStoreFile> candidates) {
088    return candidates.stream().filter(f -> f.getReader() != null)
089      .max((f1, f2) -> Long.compare(f1.getReader().length(), f2.getReader().length()));
090  }
091
092  /**
093   * Return the largest memstoreTS found across all storefiles in the given list. Store files that
094   * were created by a mapreduce bulk load are ignored, as they do not correspond to any specific
095   * put operation, and thus do not have a memstoreTS associated with them.
096   */
097  public static OptionalLong getMaxMemStoreTSInList(Collection<HStoreFile> sfs) {
098    return sfs.stream().filter(sf -> !sf.isBulkLoadResult()).mapToLong(HStoreFile::getMaxMemStoreTS)
099      .max();
100  }
101
102  /**
103   * Return the highest sequence ID found across all storefiles in the given list.
104   */
105  public static OptionalLong getMaxSequenceIdInList(Collection<HStoreFile> sfs) {
106    return sfs.stream().mapToLong(HStoreFile::getMaxSequenceId).max();
107  }
108
109  /**
110   * Gets the approximate mid-point of the given file that is optimal for use in splitting it.
111   * @param file       the store file
112   * @param comparator Comparator used to compare KVs.
113   * @return The split point row, or null if splitting is not possible, or reader is null.
114   */
115  static Optional<byte[]> getFileSplitPoint(HStoreFile file, CellComparator comparator)
116    throws IOException {
117    StoreFileReader reader = file.getReader();
118    if (reader == null) {
119      LOG.warn("Storefile " + file + " Reader is null; cannot get split point");
120      return Optional.empty();
121    }
122    // Get first, last, and mid keys. Midkey is the key that starts block
123    // in middle of hfile. Has column and timestamp. Need to return just
124    // the row we want to split on as midkey.
125    Optional<ExtendedCell> optionalMidKey = reader.midKey();
126    if (!optionalMidKey.isPresent()) {
127      return Optional.empty();
128    }
129    Cell midKey = optionalMidKey.get();
130    Cell firstKey = reader.getFirstKey().get();
131    Cell lastKey = reader.getLastKey().get();
132    // if the midkey is the same as the first or last keys, we cannot (ever) split this region.
133    if (
134      comparator.compareRows(midKey, firstKey) == 0 || comparator.compareRows(midKey, lastKey) == 0
135    ) {
136      if (LOG.isDebugEnabled()) {
137        LOG.debug("cannot split {} because midkey is the same as first or last row", file);
138      }
139      return Optional.empty();
140    }
141    return Optional.of(CellUtil.cloneRow(midKey));
142  }
143
144  /**
145   * Gets the mid point of the largest file passed in as split point.
146   */
147  static Optional<byte[]> getSplitPoint(Collection<HStoreFile> storefiles,
148    CellComparator comparator) throws IOException {
149    Optional<HStoreFile> largestFile = StoreUtils.getLargestFile(storefiles);
150    return largestFile.isPresent()
151      ? StoreUtils.getFileSplitPoint(largestFile.get(), comparator)
152      : Optional.empty();
153  }
154
155  /**
156   * Returns the configured checksum algorithm.
157   * @param conf The configuration
158   * @return The checksum algorithm that is set in the configuration
159   */
160  public static ChecksumType getChecksumType(Configuration conf) {
161    return ChecksumType.nameToType(
162      conf.get(HConstants.CHECKSUM_TYPE_NAME, ChecksumType.getDefaultChecksumType().getName()));
163  }
164
165  /**
166   * Returns the configured bytesPerChecksum value.
167   * @param conf The configuration
168   * @return The bytesPerChecksum that is set in the configuration
169   */
170  public static int getBytesPerChecksum(Configuration conf) {
171    return conf.getInt(HConstants.BYTES_PER_CHECKSUM, HFile.DEFAULT_BYTES_PER_CHECKSUM);
172  }
173
174  public static Configuration createStoreConfiguration(Configuration conf, TableDescriptor td,
175    ColumnFamilyDescriptor cfd) {
176    // CompoundConfiguration will look for keys in reverse order of addition, so we'd
177    // add global config first, then table and cf overrides, then cf metadata.
178    return new CompoundConfiguration().add(conf).addBytesMap(td.getValues())
179      .addStringMap(cfd.getConfiguration()).addBytesMap(cfd.getValues());
180  }
181
182  public static List<StoreFileInfo> toStoreFileInfo(Collection<HStoreFile> storefiles) {
183    return storefiles.stream().map(HStoreFile::getFileInfo).collect(Collectors.toList());
184  }
185
186  public static long getTotalUncompressedBytes(List<HStoreFile> files) {
187    return files.stream()
188      .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::getTotalUncompressedBytes))
189      .sum();
190  }
191
192  public static long getStorefilesSize(Collection<HStoreFile> files,
193    Predicate<HStoreFile> predicate) {
194    return files.stream().filter(predicate)
195      .mapToLong(file -> getStorefileFieldSize(file, StoreFileReader::length)).sum();
196  }
197
198  public static long getStorefileFieldSize(HStoreFile file, ToLongFunction<StoreFileReader> f) {
199    if (file == null) {
200      return 0L;
201    }
202    StoreFileReader reader = file.getReader();
203    if (reader == null) {
204      return 0L;
205    }
206    return f.applyAsLong(reader);
207  }
208}