001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.HashMap; 022import java.util.Iterator; 023import java.util.Map; 024import org.apache.hadoop.hbase.ScheduledChore; 025import org.apache.hadoop.hbase.Stoppable; 026import org.apache.hadoop.hbase.client.RegionInfo; 027import org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner; 028import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 029import org.apache.hadoop.util.StringUtils; 030import org.apache.yetus.audience.InterfaceAudience; 031import org.slf4j.Logger; 032import org.slf4j.LoggerFactory; 033 034/** 035 * A chore for refreshing the store files for secondary regions hosted in the region server. This 036 * chore should run periodically with a shorter interval than HFile TTL 037 * ("hbase.master.hfilecleaner.ttl", default 5 minutes). It ensures that if we cannot refresh files 038 * longer than that amount, the region will stop serving read requests because the referenced files 039 * might have been deleted (by the primary region). 040 */ 041@InterfaceAudience.Private 042public class StorefileRefresherChore extends ScheduledChore { 043 044 private static final Logger LOG = LoggerFactory.getLogger(StorefileRefresherChore.class); 045 046 /** 047 * The period (in milliseconds) for refreshing the store files for the secondary regions. 048 */ 049 public static final String REGIONSERVER_STOREFILE_REFRESH_PERIOD = 050 "hbase.regionserver.storefile.refresh.period"; 051 static final int DEFAULT_REGIONSERVER_STOREFILE_REFRESH_PERIOD = 0; // disabled by default 052 053 /** 054 * Whether all storefiles should be refreshed, as opposed to just hbase:meta's Meta region doesn't 055 * have WAL replication for replicas enabled yet 056 */ 057 public static final String REGIONSERVER_META_STOREFILE_REFRESH_PERIOD = 058 "hbase.regionserver.meta.storefile.refresh.period"; 059 private HRegionServer regionServer; 060 private long hfileTtl; 061 private int period; 062 private boolean onlyMetaRefresh = true; 063 064 // ts of last time regions store files are refreshed 065 private Map<String, Long> lastRefreshTimes; // encodedName -> long 066 067 public StorefileRefresherChore(int period, boolean onlyMetaRefresh, HRegionServer regionServer, 068 Stoppable stoppable) { 069 super("StorefileRefresherChore", stoppable, period); 070 this.period = period; 071 this.regionServer = regionServer; 072 this.hfileTtl = this.regionServer.getConfiguration() 073 .getLong(TimeToLiveHFileCleaner.TTL_CONF_KEY, TimeToLiveHFileCleaner.DEFAULT_TTL); 074 this.onlyMetaRefresh = onlyMetaRefresh; 075 if (period > hfileTtl / 2) { 076 throw new RuntimeException(REGIONSERVER_STOREFILE_REFRESH_PERIOD 077 + " should be set smaller than half of " + TimeToLiveHFileCleaner.TTL_CONF_KEY); 078 } 079 lastRefreshTimes = new HashMap<>(); 080 } 081 082 @Override 083 protected void chore() { 084 for (Region r : regionServer.getOnlineRegionsLocalContext()) { 085 if ( 086 !r.isReadOnly() || r.getRegionInfo().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID 087 || r.getTableDescriptor().isReadOnly() 088 ) { 089 // Skip checking for this region if it can accept writes. 090 // The refresher is only for refreshing secondary replicas. And if the table is readonly, 091 // meaning no writes to the primary replica, skip checking the secondary replicas as well. 092 continue; 093 } 094 // don't refresh unless enabled for all files, or it the meta region 095 // meta region don't have WAL replication for replicas enabled yet 096 if (onlyMetaRefresh && !r.getRegionInfo().isMetaRegion()) continue; 097 String encodedName = r.getRegionInfo().getEncodedName(); 098 long time = EnvironmentEdgeManager.currentTime(); 099 if (!lastRefreshTimes.containsKey(encodedName)) { 100 lastRefreshTimes.put(encodedName, time); 101 } 102 try { 103 for (Store store : r.getStores()) { 104 // TODO: some stores might see new data from flush, while others do not which 105 // MIGHT break atomic edits across column families. We can fix this with setting 106 // mvcc read numbers that we know every store has seen 107 store.refreshStoreFiles(); 108 } 109 } catch (IOException ex) { 110 LOG.warn("Exception while trying to refresh store files for region:" + r.getRegionInfo() 111 + ", exception:" + StringUtils.stringifyException(ex)); 112 113 // Store files have a TTL in the archive directory. If we fail to refresh for that long, we 114 // stop serving reads 115 if (isRegionStale(encodedName, time)) { 116 ((HRegion) r).setReadsEnabled(false); // stop serving reads 117 } 118 continue; 119 } 120 lastRefreshTimes.put(encodedName, time); 121 ((HRegion) r).setReadsEnabled(true); // restart serving reads 122 } 123 124 // remove closed regions 125 Iterator<String> lastRefreshTimesIter = lastRefreshTimes.keySet().iterator(); 126 while (lastRefreshTimesIter.hasNext()) { 127 String encodedName = lastRefreshTimesIter.next(); 128 if (regionServer.getRegion(encodedName) == null) { 129 lastRefreshTimesIter.remove(); 130 } 131 } 132 } 133 134 protected boolean isRegionStale(String encodedName, long time) { 135 long lastRefreshTime = lastRefreshTimes.get(encodedName); 136 return time - lastRefreshTime > hfileTtl - period; 137 } 138}