001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.regionserver;
020
021import java.io.IOException;
022import java.util.Collection;
023import java.util.Optional;
024import java.util.OptionalInt;
025import java.util.OptionalLong;
026
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.Cell;
029import org.apache.hadoop.hbase.CellComparator;
030import org.apache.hadoop.hbase.CellUtil;
031import org.apache.hadoop.hbase.CompoundConfiguration;
032import org.apache.hadoop.hbase.HConstants;
033import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
034import org.apache.hadoop.hbase.client.TableDescriptor;
035import org.apache.hadoop.hbase.io.hfile.HFile;
036import org.apache.hadoop.hbase.util.ChecksumType;
037import org.apache.yetus.audience.InterfaceAudience;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041/**
042 * Utility functions for region server storage layer.
043 */
044@InterfaceAudience.Private
045public class StoreUtils {
046
047  private static final Logger LOG = LoggerFactory.getLogger(StoreUtils.class);
048
049  /**
050   * Creates a deterministic hash code for store file collection.
051   */
052  public static OptionalInt getDeterministicRandomSeed(Collection<HStoreFile> files) {
053    return files.stream().mapToInt(f -> f.getPath().getName().hashCode()).findFirst();
054  }
055
056  /**
057   * Determines whether any files in the collection are references.
058   * @param files The files.
059   */
060  public static boolean hasReferences(Collection<HStoreFile> files) {
061    // TODO: make sure that we won't pass null here in the future.
062    return files != null && files.stream().anyMatch(HStoreFile::isReference);
063  }
064
065  /**
066   * Gets lowest timestamp from candidate StoreFiles
067   */
068  public static long getLowestTimestamp(Collection<HStoreFile> candidates) throws IOException {
069    long minTs = Long.MAX_VALUE;
070    for (HStoreFile storeFile : candidates) {
071      minTs = Math.min(minTs, storeFile.getModificationTimestamp());
072    }
073    return minTs;
074  }
075
076  /**
077   * Gets the largest file (with reader) out of the list of files.
078   * @param candidates The files to choose from.
079   * @return The largest file; null if no file has a reader.
080   */
081  static Optional<HStoreFile> getLargestFile(Collection<HStoreFile> candidates) {
082    return candidates.stream().filter(f -> f.getReader() != null)
083        .max((f1, f2) -> Long.compare(f1.getReader().length(), f2.getReader().length()));
084  }
085
086  /**
087   * Return the largest memstoreTS found across all storefiles in the given list. Store files that
088   * were created by a mapreduce bulk load are ignored, as they do not correspond to any specific
089   * put operation, and thus do not have a memstoreTS associated with them.
090   */
091  public static OptionalLong getMaxMemStoreTSInList(Collection<HStoreFile> sfs) {
092    return sfs.stream().filter(sf -> !sf.isBulkLoadResult()).mapToLong(HStoreFile::getMaxMemStoreTS)
093        .max();
094  }
095
096  /**
097   * Return the highest sequence ID found across all storefiles in the given list.
098   */
099  public static OptionalLong getMaxSequenceIdInList(Collection<HStoreFile> sfs) {
100    return sfs.stream().mapToLong(HStoreFile::getMaxSequenceId).max();
101  }
102
103  /**
104   * Gets the approximate mid-point of the given file that is optimal for use in splitting it.
105   * @param file the store file
106   * @param comparator Comparator used to compare KVs.
107   * @return The split point row, or null if splitting is not possible, or reader is null.
108   */
109  static Optional<byte[]> getFileSplitPoint(HStoreFile file, CellComparator comparator)
110      throws IOException {
111    StoreFileReader reader = file.getReader();
112    if (reader == null) {
113      LOG.warn("Storefile " + file + " Reader is null; cannot get split point");
114      return Optional.empty();
115    }
116    // Get first, last, and mid keys. Midkey is the key that starts block
117    // in middle of hfile. Has column and timestamp. Need to return just
118    // the row we want to split on as midkey.
119    Optional<Cell> optionalMidKey = reader.midKey();
120    if (!optionalMidKey.isPresent()) {
121      return Optional.empty();
122    }
123    Cell midKey = optionalMidKey.get();
124    Cell firstKey = reader.getFirstKey().get();
125    Cell lastKey = reader.getLastKey().get();
126    // if the midkey is the same as the first or last keys, we cannot (ever) split this region.
127    if (comparator.compareRows(midKey, firstKey) == 0 ||
128        comparator.compareRows(midKey, lastKey) == 0) {
129      if (LOG.isDebugEnabled()) {
130        LOG.debug("cannot split {} because midkey is the same as first or last row", file);
131      }
132      return Optional.empty();
133    }
134    return Optional.of(CellUtil.cloneRow(midKey));
135  }
136
137  /**
138   * Gets the mid point of the largest file passed in as split point.
139   */
140  static Optional<byte[]> getSplitPoint(Collection<HStoreFile> storefiles,
141      CellComparator comparator) throws IOException {
142    Optional<HStoreFile> largestFile = StoreUtils.getLargestFile(storefiles);
143    return largestFile.isPresent() ? StoreUtils.getFileSplitPoint(largestFile.get(), comparator)
144        : Optional.empty();
145  }
146
147  /**
148   * Returns the configured checksum algorithm.
149   * @param conf The configuration
150   * @return The checksum algorithm that is set in the configuration
151   */
152  public static ChecksumType getChecksumType(Configuration conf) {
153    return ChecksumType.nameToType(
154      conf.get(HConstants.CHECKSUM_TYPE_NAME, ChecksumType.getDefaultChecksumType().getName()));
155  }
156
157  /**
158   * Returns the configured bytesPerChecksum value.
159   * @param conf The configuration
160   * @return The bytesPerChecksum that is set in the configuration
161   */
162  public static int getBytesPerChecksum(Configuration conf) {
163    return conf.getInt(HConstants.BYTES_PER_CHECKSUM,
164        HFile.DEFAULT_BYTES_PER_CHECKSUM);
165  }
166
167  public static Configuration createStoreConfiguration(Configuration conf, TableDescriptor td,
168      ColumnFamilyDescriptor cfd) {
169    // CompoundConfiguration will look for keys in reverse order of addition, so we'd
170    // add global config first, then table and cf overrides, then cf metadata.
171    return new CompoundConfiguration().add(conf).addBytesMap(td.getValues())
172        .addStringMap(cfd.getConfiguration()).addBytesMap(cfd.getValues());
173  }
174}