Source code

001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.regionserver;
020
021import java.io.IOException;
022import java.util.Collection;
023import java.util.Optional;
024import java.util.OptionalInt;
025import java.util.OptionalLong;
026
027import org.apache.hadoop.hbase.Cell;
028import org.apache.hadoop.hbase.CellComparator;
029import org.apache.hadoop.hbase.CellUtil;
030import org.apache.yetus.audience.InterfaceAudience;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034/**
035 * Utility functions for region server storage layer.
036 */
037@InterfaceAudience.Private
038public class StoreUtils {
039
040  private static final Logger LOG = LoggerFactory.getLogger(StoreUtils.class);
041
042  /**
043   * Creates a deterministic hash code for store file collection.
044   */
045  public static OptionalInt getDeterministicRandomSeed(Collection<HStoreFile> files) {
046    return files.stream().mapToInt(f -> f.getPath().getName().hashCode()).findFirst();
047  }
048
049  /**
050   * Determines whether any files in the collection are references.
051   * @param files The files.
052   */
053  public static boolean hasReferences(Collection<HStoreFile> files) {
054    // TODO: make sure that we won't pass null here in the future.
055    return files != null && files.stream().anyMatch(HStoreFile::isReference);
056  }
057
058  /**
059   * Gets lowest timestamp from candidate StoreFiles
060   */
061  public static long getLowestTimestamp(Collection<HStoreFile> candidates) throws IOException {
062    long minTs = Long.MAX_VALUE;
063    for (HStoreFile storeFile : candidates) {
064      minTs = Math.min(minTs, storeFile.getModificationTimestamp());
065    }
066    return minTs;
067  }
068
069  /**
070   * Gets the largest file (with reader) out of the list of files.
071   * @param candidates The files to choose from.
072   * @return The largest file; null if no file has a reader.
073   */
074  static Optional<HStoreFile> getLargestFile(Collection<HStoreFile> candidates) {
075    return candidates.stream().filter(f -> f.getReader() != null)
076        .max((f1, f2) -> Long.compare(f1.getReader().length(), f2.getReader().length()));
077  }
078
079  /**
080   * Return the largest memstoreTS found across all storefiles in the given list. Store files that
081   * were created by a mapreduce bulk load are ignored, as they do not correspond to any specific
082   * put operation, and thus do not have a memstoreTS associated with them.
083   */
084  public static OptionalLong getMaxMemStoreTSInList(Collection<HStoreFile> sfs) {
085    return sfs.stream().filter(sf -> !sf.isBulkLoadResult()).mapToLong(HStoreFile::getMaxMemStoreTS)
086        .max();
087  }
088
089  /**
090   * Return the highest sequence ID found across all storefiles in the given list.
091   */
092  public static OptionalLong getMaxSequenceIdInList(Collection<HStoreFile> sfs) {
093    return sfs.stream().mapToLong(HStoreFile::getMaxSequenceId).max();
094  }
095
096  /**
097   * Gets the approximate mid-point of the given file that is optimal for use in splitting it.
098   * @param file the store file
099   * @param comparator Comparator used to compare KVs.
100   * @return The split point row, or null if splitting is not possible, or reader is null.
101   */
102  static Optional<byte[]> getFileSplitPoint(HStoreFile file, CellComparator comparator)
103      throws IOException {
104    StoreFileReader reader = file.getReader();
105    if (reader == null) {
106      LOG.warn("Storefile " + file + " Reader is null; cannot get split point");
107      return Optional.empty();
108    }
109    // Get first, last, and mid keys. Midkey is the key that starts block
110    // in middle of hfile. Has column and timestamp. Need to return just
111    // the row we want to split on as midkey.
112    Optional<Cell> optionalMidKey = reader.midKey();
113    if (!optionalMidKey.isPresent()) {
114      return Optional.empty();
115    }
116    Cell midKey = optionalMidKey.get();
117    Cell firstKey = reader.getFirstKey().get();
118    Cell lastKey = reader.getLastKey().get();
119    // if the midkey is the same as the first or last keys, we cannot (ever) split this region.
120    if (comparator.compareRows(midKey, firstKey) == 0 ||
121        comparator.compareRows(midKey, lastKey) == 0) {
122      if (LOG.isDebugEnabled()) {
123        LOG.debug("cannot split {} because midkey is the same as first or last row", file);
124      }
125      return Optional.empty();
126    }
127    return Optional.of(CellUtil.cloneRow(midKey));
128  }
129
130  /**
131   * Gets the mid point of the largest file passed in as split point.
132   */
133  static Optional<byte[]> getSplitPoint(Collection<HStoreFile> storefiles,
134      CellComparator comparator) throws IOException {
135    Optional<HStoreFile> largestFile = StoreUtils.getLargestFile(storefiles);
136    return largestFile.isPresent() ? StoreUtils.getFileSplitPoint(largestFile.get(), comparator)
137        : Optional.empty();
138  }
139}