001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.HashMap;
022import java.util.Iterator;
023import java.util.Map;
024import org.apache.hadoop.hbase.ScheduledChore;
025import org.apache.hadoop.hbase.Stoppable;
026import org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner;
027import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
028import org.apache.hadoop.util.StringUtils;
029import org.apache.yetus.audience.InterfaceAudience;
030import org.slf4j.Logger;
031import org.slf4j.LoggerFactory;
032
033/**
034 * A chore for refreshing the store files for secondary regions hosted in the region server. This
035 * chore should run periodically with a shorter interval than HFile TTL
036 * ("hbase.master.hfilecleaner.ttl", default 5 minutes). It ensures that if we cannot refresh files
037 * longer than that amount, the region will stop serving read requests because the referenced files
038 * might have been deleted (by the primary region).
039 */
040@InterfaceAudience.Private
041public class StorefileRefresherChore extends ScheduledChore {
042
043  private static final Logger LOG = LoggerFactory.getLogger(StorefileRefresherChore.class);
044
045  /**
046   * The period (in milliseconds) for refreshing the store files for the secondary regions.
047   */
048  public static final String REGIONSERVER_STOREFILE_REFRESH_PERIOD =
049    "hbase.regionserver.storefile.refresh.period";
050  static final int DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD = 0; // disabled by default
051
052  /**
053   * Whether all storefiles should be refreshed, as opposed to just hbase:meta's Meta region doesn't
054   * have WAL replication for replicas enabled yet
055   */
056  public static final String REGIONSERVER_META_STOREFILE_REFRESH_PERIOD =
057    "hbase.regionserver.meta.storefile.refresh.period";
058  private HRegionServer regionServer;
059  private long hfileTtl;
060  private int period;
061  private boolean onlyMetaRefresh = true;
062
063  // ts of last time regions store files are refreshed
064  private Map<String, Long> lastRefreshTimes; // encodedName -> long
065
066  public StorefileRefresherChore(int period, boolean onlyMetaRefresh, HRegionServer regionServer,
067    Stoppable stoppable) {
068    super("StorefileRefresherChore", stoppable, period);
069    this.period = period;
070    this.regionServer = regionServer;
071    this.hfileTtl = this.regionServer.getConfiguration()
072      .getLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, TimeToLiveHFileCleaner.DEFAULT_TTL);
073    this.onlyMetaRefresh = onlyMetaRefresh;
074    if (period > hfileTtl / 2) {
075      throw new RuntimeException(REGIONSERVER_STOREFILE_REFRESH_PERIOD
076        + " should be set smaller than half of " + TimeToLiveHFileCleaner.TTL_CONF_KEY);
077    }
078    lastRefreshTimes = new HashMap<>();
079  }
080
081  @Override
082  protected void chore() {
083    for (Region r : regionServer.getOnlineRegionsLocalContext()) {
084      if (!r.isReadOnly()) {
085        // skip checking for this region if it can accept writes
086        continue;
087      }
088      // don't refresh unless enabled for all files, or it the meta region
089      // meta region don't have WAL replication for replicas enabled yet
090      if (onlyMetaRefresh && !r.getRegionInfo().isMetaRegion()) continue;
091      String encodedName = r.getRegionInfo().getEncodedName();
092      long time = EnvironmentEdgeManager.currentTime();
093      if (!lastRefreshTimes.containsKey(encodedName)) {
094        lastRefreshTimes.put(encodedName, time);
095      }
096      try {
097        for (Store store : r.getStores()) {
098          // TODO: some stores might see new data from flush, while others do not which
099          // MIGHT break atomic edits across column families. We can fix this with setting
100          // mvcc read numbers that we know every store has seen
101          store.refreshStoreFiles();
102        }
103      } catch (IOException ex) {
104        LOG.warn("Exception while trying to refresh store files for region:" + r.getRegionInfo()
105          + ", exception:" + StringUtils.stringifyException(ex));
106
107        // Store files have a TTL in the archive directory. If we fail to refresh for that long, we
108        // stop serving reads
109        if (isRegionStale(encodedName, time)) {
110          ((HRegion) r).setReadsEnabled(false); // stop serving reads
111        }
112        continue;
113      }
114      lastRefreshTimes.put(encodedName, time);
115      ((HRegion) r).setReadsEnabled(true); // restart serving reads
116    }
117
118    // remove closed regions
119    Iterator<String> lastRefreshTimesIter = lastRefreshTimes.keySet().iterator();
120    while (lastRefreshTimesIter.hasNext()) {
121      String encodedName = lastRefreshTimesIter.next();
122      if (regionServer.getRegion(encodedName) == null) {
123        lastRefreshTimesIter.remove();
124      }
125    }
126  }
127
128  protected boolean isRegionStale(String encodedName, long time) {
129    long lastRefreshTime = lastRefreshTimes.get(encodedName);
130    return time - lastRefreshTime > hfileTtl - period;
131  }
132}