001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import java.io.IOException; 021import java.util.HashMap; 022import java.util.HashSet; 023import java.util.LinkedList; 024import java.util.List; 025import java.util.Map; 026import java.util.Set; 027import java.util.concurrent.locks.ReentrantReadWriteLock; 028 029import org.apache.hadoop.fs.FileSystem; 030import org.apache.hadoop.fs.Path; 031import org.apache.hadoop.hbase.MetaTableAccessor; 032import org.apache.hadoop.hbase.ScheduledChore; 033import org.apache.hadoop.hbase.ServerName; 034import org.apache.hadoop.hbase.client.RegionInfo; 035import org.apache.hadoop.hbase.client.TableState; 036import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 037import org.apache.hadoop.hbase.util.FSUtils; 038import org.apache.hadoop.hbase.util.HbckRegionInfo; 039import org.apache.hadoop.hbase.util.Pair; 040import org.apache.yetus.audience.InterfaceAudience; 041import org.apache.yetus.audience.InterfaceStability; 042import org.slf4j.Logger; 043import org.slf4j.LoggerFactory; 044 045/** 046 * Used to do the hbck checking job at master side. 047 */ 048@InterfaceAudience.Private 049@InterfaceStability.Evolving 050public class HbckChore extends ScheduledChore { 051 private static final Logger LOG = LoggerFactory.getLogger(HbckChore.class.getName()); 052 053 private static final String HBCK_CHORE_INTERVAL = "hbase.master.hbck.chore.interval"; 054 private static final int DEFAULT_HBCK_CHORE_INTERVAL = 60 * 60 * 1000; 055 056 private final MasterServices master; 057 058 /** 059 * This map contains the state of all hbck items. It maps from encoded region 060 * name to HbckRegionInfo structure. The information contained in HbckRegionInfo is used 061 * to detect and correct consistency (hdfs/meta/deployment) problems. 062 */ 063 private final Map<String, HbckRegionInfo> regionInfoMap = new HashMap<>(); 064 065 private final Set<String> disabledTableRegions = new HashSet<>(); 066 private final Set<String> splitParentRegions = new HashSet<>(); 067 068 /** 069 * The regions only opened on RegionServers, but no region info in meta. 070 */ 071 private final Map<String, ServerName> orphanRegionsOnRS = new HashMap<>(); 072 /** 073 * The regions have directory on FileSystem, but no region info in meta. 074 */ 075 private final Map<String, Path> orphanRegionsOnFS = new HashMap<>(); 076 /** 077 * The inconsistent regions. There are three case: 078 * case 1. Master thought this region opened, but no regionserver reported it. 079 * case 2. Master thought this region opened on Server1, but regionserver reported Server2 080 * case 3. More than one regionservers reported opened this region 081 */ 082 private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions = 083 new HashMap<>(); 084 085 /** 086 * The "snapshot" is used to save the last round's HBCK checking report. 087 */ 088 private final Map<String, ServerName> orphanRegionsOnRSSnapshot = new HashMap<>(); 089 private final Map<String, Path> orphanRegionsOnFSSnapshot = new HashMap<>(); 090 private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegionsSnapshot = 091 new HashMap<>(); 092 093 /** 094 * The "snapshot" may be changed after checking. And this checking report "snapshot" may be 095 * accessed by web ui. Use this rwLock to synchronize. 096 */ 097 ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock(); 098 099 /** 100 * When running, the "snapshot" may be changed when this round's checking finish. 101 */ 102 private volatile boolean running = false; 103 private volatile long checkingStartTimestamp = 0; 104 private volatile long checkingEndTimestamp = 0; 105 106 private boolean disabled = false; 107 108 public HbckChore(MasterServices master) { 109 super("HbckChore-", master, 110 master.getConfiguration().getInt(HBCK_CHORE_INTERVAL, DEFAULT_HBCK_CHORE_INTERVAL)); 111 this.master = master; 112 int interval = 113 master.getConfiguration().getInt(HBCK_CHORE_INTERVAL, DEFAULT_HBCK_CHORE_INTERVAL); 114 if (interval <= 0) { 115 LOG.warn(HBCK_CHORE_INTERVAL + " is <=0 hence disabling hbck chore"); 116 disableChore(); 117 } 118 } 119 120 @Override 121 protected synchronized void chore() { 122 if (isDisabled() || isRunning()) { 123 LOG.warn("hbckChore is either disabled or is already running. Can't run the chore"); 124 return; 125 } 126 regionInfoMap.clear(); 127 disabledTableRegions.clear(); 128 splitParentRegions.clear(); 129 orphanRegionsOnRS.clear(); 130 orphanRegionsOnFS.clear(); 131 inconsistentRegions.clear(); 132 checkingStartTimestamp = EnvironmentEdgeManager.currentTime(); 133 running = true; 134 try { 135 loadRegionsFromInMemoryState(); 136 loadRegionsFromRSReport(); 137 try { 138 loadRegionsFromFS(scanForMergedParentRegions()); 139 } catch (IOException e) { 140 LOG.warn("Failed to load the regions from filesystem", e); 141 } 142 saveCheckResultToSnapshot(); 143 } catch (Throwable t) { 144 LOG.warn("Unexpected", t); 145 } 146 running = false; 147 } 148 149 // This function does the sanity checks of making sure the chore is not run when it is 150 // disabled or when it's already running. It returns whether the chore was actually run or not. 151 protected boolean runChore() { 152 if (isDisabled() || isRunning()) { 153 if (isDisabled()) { 154 LOG.warn("hbck chore is disabled! Set " + HBCK_CHORE_INTERVAL + " > 0 to enable it."); 155 } else { 156 LOG.warn("hbck chore already running. Can't run till it finishes."); 157 } 158 return false; 159 } 160 chore(); 161 return true; 162 } 163 164 private void disableChore() { 165 this.disabled = true; 166 } 167 168 public boolean isDisabled() { 169 return this.disabled; 170 } 171 172 private void saveCheckResultToSnapshot() { 173 // Need synchronized here, as this "snapshot" may be access by web ui. 174 rwLock.writeLock().lock(); 175 try { 176 orphanRegionsOnRSSnapshot.clear(); 177 orphanRegionsOnRS.entrySet() 178 .forEach(e -> orphanRegionsOnRSSnapshot.put(e.getKey(), e.getValue())); 179 orphanRegionsOnFSSnapshot.clear(); 180 orphanRegionsOnFS.entrySet() 181 .forEach(e -> orphanRegionsOnFSSnapshot.put(e.getKey(), e.getValue())); 182 inconsistentRegionsSnapshot.clear(); 183 inconsistentRegions.entrySet() 184 .forEach(e -> inconsistentRegionsSnapshot.put(e.getKey(), e.getValue())); 185 checkingEndTimestamp = EnvironmentEdgeManager.currentTime(); 186 } finally { 187 rwLock.writeLock().unlock(); 188 } 189 } 190 191 /** 192 * Scan hbase:meta to get set of merged parent regions, this is a very heavy scan. 193 * 194 * @return Return generated {@link HashSet} 195 */ 196 private HashSet<String> scanForMergedParentRegions() throws IOException { 197 HashSet<String> mergedParentRegions = new HashSet<>(); 198 // Null tablename means scan all of meta. 199 MetaTableAccessor.scanMetaForTableRegions(this.master.getConnection(), 200 r -> { 201 List<RegionInfo> mergeParents = MetaTableAccessor.getMergeRegions(r.rawCells()); 202 if (mergeParents != null) { 203 for (RegionInfo mergeRegion : mergeParents) { 204 if (mergeRegion != null) { 205 // This region is already being merged 206 mergedParentRegions.add(mergeRegion.getEncodedName()); 207 } 208 } 209 } 210 return true; 211 }, 212 null); 213 return mergedParentRegions; 214 } 215 216 private void loadRegionsFromInMemoryState() { 217 List<RegionState> regionStates = 218 master.getAssignmentManager().getRegionStates().getRegionStates(); 219 for (RegionState regionState : regionStates) { 220 RegionInfo regionInfo = regionState.getRegion(); 221 if (master.getTableStateManager() 222 .isTableState(regionInfo.getTable(), TableState.State.DISABLED)) { 223 disabledTableRegions.add(regionInfo.getRegionNameAsString()); 224 } 225 if (regionInfo.isSplitParent()) { 226 splitParentRegions.add(regionInfo.getRegionNameAsString()); 227 } 228 HbckRegionInfo.MetaEntry metaEntry = 229 new HbckRegionInfo.MetaEntry(regionInfo, regionState.getServerName(), 230 regionState.getStamp()); 231 regionInfoMap.put(regionInfo.getEncodedName(), new HbckRegionInfo(metaEntry)); 232 } 233 LOG.info("Loaded {} regions from in-memory state of AssignmentManager", regionStates.size()); 234 } 235 236 private void loadRegionsFromRSReport() { 237 int numRegions = 0; 238 Map<ServerName, Set<byte[]>> rsReports = master.getAssignmentManager().getRSReports(); 239 for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) { 240 ServerName serverName = entry.getKey(); 241 for (byte[] regionName : entry.getValue()) { 242 String encodedRegionName = RegionInfo.encodeRegionName(regionName); 243 HbckRegionInfo hri = regionInfoMap.get(encodedRegionName); 244 if (hri == null) { 245 orphanRegionsOnRS.put(RegionInfo.getRegionNameAsString(regionName), serverName); 246 continue; 247 } 248 hri.addServer(hri.getMetaEntry(), serverName); 249 } 250 numRegions += entry.getValue().size(); 251 } 252 LOG.info("Loaded {} regions from {} regionservers' reports and found {} orphan regions", 253 numRegions, rsReports.size(), orphanRegionsOnFS.size()); 254 255 for (Map.Entry<String, HbckRegionInfo> entry : regionInfoMap.entrySet()) { 256 HbckRegionInfo hri = entry.getValue(); 257 ServerName locationInMeta = hri.getMetaEntry().getRegionServer(); 258 if (hri.getDeployedOn().size() == 0) { 259 if (locationInMeta == null) { 260 continue; 261 } 262 // skip the offline region which belong to disabled table. 263 if (disabledTableRegions.contains(hri.getRegionNameAsString())) { 264 continue; 265 } 266 // skip the split parent regions 267 if (splitParentRegions.contains(hri.getRegionNameAsString())) { 268 continue; 269 } 270 // Master thought this region opened, but no regionserver reported it. 271 inconsistentRegions.put(hri.getRegionNameAsString(), 272 new Pair<>(locationInMeta, new LinkedList<>())); 273 } else if (hri.getDeployedOn().size() > 1) { 274 // More than one regionserver reported opened this region 275 inconsistentRegions.put(hri.getRegionNameAsString(), 276 new Pair<>(locationInMeta, hri.getDeployedOn())); 277 } else if (!hri.getDeployedOn().get(0).equals(locationInMeta)) { 278 // Master thought this region opened on Server1, but regionserver reported Server2 279 inconsistentRegions.put(hri.getRegionNameAsString(), 280 new Pair<>(locationInMeta, hri.getDeployedOn())); 281 } 282 } 283 } 284 285 private void loadRegionsFromFS(final HashSet<String> mergedParentRegions) throws IOException { 286 Path rootDir = master.getMasterFileSystem().getRootDir(); 287 FileSystem fs = master.getMasterFileSystem().getFileSystem(); 288 289 int numRegions = 0; 290 List<Path> tableDirs = FSUtils.getTableDirs(fs, rootDir); 291 for (Path tableDir : tableDirs) { 292 List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir); 293 for (Path regionDir : regionDirs) { 294 String encodedRegionName = regionDir.getName(); 295 if (encodedRegionName == null) { 296 LOG.warn("Failed get of encoded name from {}", regionDir); 297 continue; 298 } 299 HbckRegionInfo hri = regionInfoMap.get(encodedRegionName); 300 // If it is not in in-memory database and not a merged region, 301 // report it as an orphan region. 302 if (hri == null && !mergedParentRegions.contains(encodedRegionName)) { 303 orphanRegionsOnFS.put(encodedRegionName, regionDir); 304 continue; 305 } 306 } 307 numRegions += regionDirs.size(); 308 } 309 LOG.info("Loaded {} tables {} regions from filesyetem and found {} orphan regions", 310 tableDirs.size(), numRegions, orphanRegionsOnFS.size()); 311 } 312 313 /** 314 * When running, the HBCK report may be changed later. 315 */ 316 public boolean isRunning() { 317 return running; 318 } 319 320 /** 321 * @return the regions only opened on RegionServers, but no region info in meta. 322 */ 323 public Map<String, ServerName> getOrphanRegionsOnRS() { 324 // Need synchronized here, as this "snapshot" may be changed after checking. 325 rwLock.readLock().lock(); 326 try { 327 return this.orphanRegionsOnRSSnapshot; 328 } finally { 329 rwLock.readLock().unlock(); 330 } 331 } 332 333 /** 334 * @return the regions have directory on FileSystem, but no region info in meta. 335 */ 336 public Map<String, Path> getOrphanRegionsOnFS() { 337 // Need synchronized here, as this "snapshot" may be changed after checking. 338 rwLock.readLock().lock(); 339 try { 340 return this.orphanRegionsOnFSSnapshot; 341 } finally { 342 rwLock.readLock().unlock(); 343 } 344 } 345 346 /** 347 * Found the inconsistent regions. There are three case: 348 * case 1. Master thought this region opened, but no regionserver reported it. 349 * case 2. Master thought this region opened on Server1, but regionserver reported Server2 350 * case 3. More than one regionservers reported opened this region 351 * 352 * @return the map of inconsistent regions. Key is the region name. Value is a pair of location in 353 * meta and the regionservers which reported opened this region. 354 */ 355 public Map<String, Pair<ServerName, List<ServerName>>> getInconsistentRegions() { 356 // Need synchronized here, as this "snapshot" may be changed after checking. 357 rwLock.readLock().lock(); 358 try { 359 return this.inconsistentRegionsSnapshot; 360 } finally { 361 rwLock.readLock().unlock(); 362 } 363 } 364 365 /** 366 * Used for web ui to show when the HBCK checking started. 367 */ 368 public long getCheckingStartTimestamp() { 369 return this.checkingStartTimestamp; 370 } 371 372 /** 373 * Used for web ui to show when the HBCK checking report generated. 374 */ 375 public long getCheckingEndTimestamp() { 376 return this.checkingEndTimestamp; 377 } 378}