001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver;
019
020import java.io.IOException;
021import java.util.HashMap;
022import java.util.Iterator;
023import java.util.Map;
024import org.apache.hadoop.hbase.ScheduledChore;
025import org.apache.hadoop.hbase.Stoppable;
026import org.apache.hadoop.hbase.client.RegionInfo;
027import org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner;
028import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
029import org.apache.hadoop.util.StringUtils;
030import org.apache.yetus.audience.InterfaceAudience;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034/**
035 * A chore for refreshing the store files for secondary regions hosted in the region server. This
036 * chore should run periodically with a shorter interval than HFile TTL
037 * ("hbase.master.hfilecleaner.ttl", default 5 minutes). It ensures that if we cannot refresh files
038 * longer than that amount, the region will stop serving read requests because the referenced files
039 * might have been deleted (by the primary region).
040 */
041@InterfaceAudience.Private
042public class StorefileRefresherChore extends ScheduledChore {
043
044  private static final Logger LOG = LoggerFactory.getLogger(StorefileRefresherChore.class);
045
046  /**
047   * The period (in milliseconds) for refreshing the store files for the secondary regions.
048   */
049  public static final String REGIONSERVER_STOREFILE_REFRESH_PERIOD =
050    "hbase.regionserver.storefile.refresh.period";
051  static final int DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD = 0; // disabled by default
052
053  /**
054   * Whether all storefiles should be refreshed, as opposed to just hbase:meta's Meta region doesn't
055   * have WAL replication for replicas enabled yet
056   */
057  public static final String REGIONSERVER_META_STOREFILE_REFRESH_PERIOD =
058    "hbase.regionserver.meta.storefile.refresh.period";
059  private HRegionServer regionServer;
060  private long hfileTtl;
061  private int period;
062  private boolean onlyMetaRefresh = true;
063
064  // ts of last time regions store files are refreshed
065  private Map<String, Long> lastRefreshTimes; // encodedName -> long
066
067  public StorefileRefresherChore(int period, boolean onlyMetaRefresh, HRegionServer regionServer,
068    Stoppable stoppable) {
069    super("StorefileRefresherChore", stoppable, period);
070    this.period = period;
071    this.regionServer = regionServer;
072    this.hfileTtl = this.regionServer.getConfiguration()
073      .getLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, TimeToLiveHFileCleaner.DEFAULT_TTL);
074    this.onlyMetaRefresh = onlyMetaRefresh;
075    if (period > hfileTtl / 2) {
076      throw new RuntimeException(REGIONSERVER_STOREFILE_REFRESH_PERIOD
077        + " should be set smaller than half of " + TimeToLiveHFileCleaner.TTL_CONF_KEY);
078    }
079    lastRefreshTimes = new HashMap<>();
080  }
081
082  @Override
083  protected void chore() {
084    for (Region r : regionServer.getOnlineRegionsLocalContext()) {
085      if (
086        !r.isReadOnly() || r.getRegionInfo().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID
087          || r.getTableDescriptor().isReadOnly()
088      ) {
089        // Skip checking for this region if it can accept writes.
090        // The refresher is only for refreshing secondary replicas. And if the table is readonly,
091        // meaning no writes to the primary replica, skip checking the secondary replicas as well.
092        continue;
093      }
094      // don't refresh unless enabled for all files, or it the meta region
095      // meta region don't have WAL replication for replicas enabled yet
096      if (onlyMetaRefresh && !r.getRegionInfo().isMetaRegion()) continue;
097      String encodedName = r.getRegionInfo().getEncodedName();
098      long time = EnvironmentEdgeManager.currentTime();
099      if (!lastRefreshTimes.containsKey(encodedName)) {
100        lastRefreshTimes.put(encodedName, time);
101      }
102      try {
103        for (Store store : r.getStores()) {
104          // TODO: some stores might see new data from flush, while others do not which
105          // MIGHT break atomic edits across column families. We can fix this with setting
106          // mvcc read numbers that we know every store has seen
107          store.refreshStoreFiles();
108        }
109      } catch (IOException ex) {
110        LOG.warn("Exception while trying to refresh store files for region:" + r.getRegionInfo()
111          + ", exception:" + StringUtils.stringifyException(ex));
112
113        // Store files have a TTL in the archive directory. If we fail to refresh for that long, we
114        // stop serving reads
115        if (isRegionStale(encodedName, time)) {
116          ((HRegion) r).setReadsEnabled(false); // stop serving reads
117        }
118        continue;
119      }
120      lastRefreshTimes.put(encodedName, time);
121      ((HRegion) r).setReadsEnabled(true); // restart serving reads
122    }
123
124    // remove closed regions
125    Iterator<String> lastRefreshTimesIter = lastRefreshTimes.keySet().iterator();
126    while (lastRefreshTimesIter.hasNext()) {
127      String encodedName = lastRefreshTimesIter.next();
128      if (regionServer.getRegion(encodedName) == null) {
129        lastRefreshTimesIter.remove();
130      }
131    }
132  }
133
134  protected boolean isRegionStale(String encodedName, long time) {
135    long lastRefreshTime = lastRefreshTimes.get(encodedName);
136    return time - lastRefreshTime > hfileTtl - period;
137  }
138}