001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.regionserver;
020
021import java.io.IOException;
022import java.util.HashMap;
023import java.util.Iterator;
024import java.util.Map;
025
026import org.apache.hadoop.hbase.ScheduledChore;
027import org.apache.hadoop.hbase.Stoppable;
028import org.apache.yetus.audience.InterfaceAudience;
029import org.slf4j.Logger;
030import org.slf4j.LoggerFactory;
031import org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner;
032import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
033import org.apache.hadoop.util.StringUtils;
034
035/**
036 * A chore for refreshing the store files for secondary regions hosted in the region server.
037 *
038 * This chore should run periodically with a shorter interval than HFile TTL
039 * ("hbase.master.hfilecleaner.ttl", default 5 minutes).
040 * It ensures that if we cannot refresh files longer than that amount, the region
041 * will stop serving read requests because the referenced files might have been deleted (by the
042 * primary region).
043 */
044@InterfaceAudience.Private
045public class StorefileRefresherChore extends ScheduledChore {
046
047  private static final Logger LOG = LoggerFactory.getLogger(StorefileRefresherChore.class);
048
049  /**
050   * The period (in milliseconds) for refreshing the store files for the secondary regions.
051   */
052  public static final String REGIONSERVER_STOREFILE_REFRESH_PERIOD
053    = "hbase.regionserver.storefile.refresh.period";
054  static final int DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD = 0; //disabled by default
055
056  /**
057   * Whether all storefiles should be refreshed, as opposed to just hbase:meta's
058   * Meta region doesn't have WAL replication for replicas enabled yet
059   */
060  public static final String REGIONSERVER_META_STOREFILE_REFRESH_PERIOD
061     = "hbase.regionserver.meta.storefile.refresh.period";
062  private HRegionServer regionServer;
063  private long hfileTtl;
064  private int period;
065  private boolean onlyMetaRefresh = true;
066
067  //ts of last time regions store files are refreshed
068  private Map<String, Long> lastRefreshTimes; // encodedName -> long
069
070  public StorefileRefresherChore(int period, boolean onlyMetaRefresh, HRegionServer regionServer,
071      Stoppable stoppable) {
072    super("StorefileRefresherChore", stoppable, period);
073    this.period = period;
074    this.regionServer = regionServer;
075    this.hfileTtl = this.regionServer.getConfiguration().getLong(
076      TimeToLiveHFileCleaner.TTL_CONF_KEY, TimeToLiveHFileCleaner.DEFAULT_TTL);
077    this.onlyMetaRefresh = onlyMetaRefresh;
078    if (period > hfileTtl / 2) {
079      throw new RuntimeException(REGIONSERVER_STOREFILE_REFRESH_PERIOD +
080        " should be set smaller than half of " + TimeToLiveHFileCleaner.TTL_CONF_KEY);
081    }
082    lastRefreshTimes = new HashMap<>();
083  }
084
085  @Override
086  protected void chore() {
087    for (Region r : regionServer.getOnlineRegionsLocalContext()) {
088      if (!r.isReadOnly()) {
089        // skip checking for this region if it can accept writes
090        continue;
091      }
092      // don't refresh unless enabled for all files, or it the meta region
093      // meta region don't have WAL replication for replicas enabled yet
094      if (onlyMetaRefresh && !r.getRegionInfo().isMetaRegion()) continue;
095      String encodedName = r.getRegionInfo().getEncodedName();
096      long time = EnvironmentEdgeManager.currentTime();
097      if (!lastRefreshTimes.containsKey(encodedName)) {
098        lastRefreshTimes.put(encodedName, time);
099      }
100      try {
101        for (Store store : r.getStores()) {
102          // TODO: some stores might see new data from flush, while others do not which
103          // MIGHT break atomic edits across column families. We can fix this with setting
104          // mvcc read numbers that we know every store has seen
105          store.refreshStoreFiles();
106        }
107      } catch (IOException ex) {
108        LOG.warn("Exception while trying to refresh store files for region:" + r.getRegionInfo()
109          + ", exception:" + StringUtils.stringifyException(ex));
110
111        // Store files have a TTL in the archive directory. If we fail to refresh for that long, we stop serving reads
112        if (isRegionStale(encodedName, time)) {
113          ((HRegion)r).setReadsEnabled(false); // stop serving reads
114        }
115        continue;
116      }
117      lastRefreshTimes.put(encodedName, time);
118      ((HRegion)r).setReadsEnabled(true); // restart serving reads
119    }
120
121    // remove closed regions
122    Iterator<String> lastRefreshTimesIter = lastRefreshTimes.keySet().iterator();
123    while (lastRefreshTimesIter.hasNext()) {
124      String encodedName = lastRefreshTimesIter.next();
125      if (regionServer.getRegion(encodedName) == null) {
126        lastRefreshTimesIter.remove();
127      }
128    }
129  }
130
131  protected boolean isRegionStale(String encodedName, long time) {
132    long lastRefreshTime = lastRefreshTimes.get(encodedName);
133    return time - lastRefreshTime > hfileTtl - period;
134  }
135}