001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import java.io.IOException;
021import java.util.HashMap;
022import java.util.HashSet;
023import java.util.LinkedList;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027import java.util.concurrent.locks.ReentrantReadWriteLock;
028
029import org.apache.hadoop.fs.FileSystem;
030import org.apache.hadoop.fs.Path;
031import org.apache.hadoop.hbase.MetaTableAccessor;
032import org.apache.hadoop.hbase.ScheduledChore;
033import org.apache.hadoop.hbase.ServerName;
034import org.apache.hadoop.hbase.client.RegionInfo;
035import org.apache.hadoop.hbase.client.TableState;
036import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
037import org.apache.hadoop.hbase.util.FSUtils;
038import org.apache.hadoop.hbase.util.HbckRegionInfo;
039import org.apache.hadoop.hbase.util.Pair;
040import org.apache.yetus.audience.InterfaceAudience;
041import org.apache.yetus.audience.InterfaceStability;
042import org.slf4j.Logger;
043import org.slf4j.LoggerFactory;
044
045/**
046 * Used to do the hbck checking job at master side.
047 */
048@InterfaceAudience.Private
049@InterfaceStability.Evolving
050public class HbckChore extends ScheduledChore {
051  private static final Logger LOG = LoggerFactory.getLogger(HbckChore.class.getName());
052
053  private static final String HBCK_CHORE_INTERVAL = "hbase.master.hbck.chore.interval";
054  private static final int DEFAULT_HBCK_CHORE_INTERVAL = 60 * 60 * 1000;
055
056  private final MasterServices master;
057
058  /**
059   * This map contains the state of all hbck items.  It maps from encoded region
060   * name to HbckRegionInfo structure.  The information contained in HbckRegionInfo is used
061   * to detect and correct consistency (hdfs/meta/deployment) problems.
062   */
063  private final Map<String, HbckRegionInfo> regionInfoMap = new HashMap<>();
064
065  private final Set<String> disabledTableRegions = new HashSet<>();
066  private final Set<String> splitParentRegions = new HashSet<>();
067
068  /**
069   * The regions only opened on RegionServers, but no region info in meta.
070   */
071  private final Map<String, ServerName> orphanRegionsOnRS = new HashMap<>();
072  /**
073   * The regions have directory on FileSystem, but no region info in meta.
074   */
075  private final Map<String, Path> orphanRegionsOnFS = new HashMap<>();
076  /**
077   * The inconsistent regions. There are three case:
078   * case 1. Master thought this region opened, but no regionserver reported it.
079   * case 2. Master thought this region opened on Server1, but regionserver reported Server2
080   * case 3. More than one regionservers reported opened this region
081   */
082  private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
083      new HashMap<>();
084
085  /**
086   * The "snapshot" is used to save the last round's HBCK checking report.
087   */
088  private final Map<String, ServerName> orphanRegionsOnRSSnapshot = new HashMap<>();
089  private final Map<String, Path> orphanRegionsOnFSSnapshot = new HashMap<>();
090  private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegionsSnapshot =
091      new HashMap<>();
092
093  /**
094   * The "snapshot" may be changed after checking. And this checking report "snapshot" may be
095   * accessed by web ui. Use this rwLock to synchronize.
096   */
097  ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
098
099  /**
100   * When running, the "snapshot" may be changed when this round's checking finish.
101   */
102  private volatile boolean running = false;
103  private volatile long checkingStartTimestamp = 0;
104  private volatile long checkingEndTimestamp = 0;
105
106  private boolean disabled = false;
107
108  public HbckChore(MasterServices master) {
109    super("HbckChore-", master,
110        master.getConfiguration().getInt(HBCK_CHORE_INTERVAL, DEFAULT_HBCK_CHORE_INTERVAL));
111    this.master = master;
112    int interval =
113        master.getConfiguration().getInt(HBCK_CHORE_INTERVAL, DEFAULT_HBCK_CHORE_INTERVAL);
114    if (interval <= 0) {
115      LOG.warn(HBCK_CHORE_INTERVAL + " is <=0 hence disabling hbck chore");
116      disableChore();
117    }
118  }
119
120  @Override
121  protected synchronized void chore() {
122    if (isDisabled() || isRunning()) {
123      LOG.warn("hbckChore is either disabled or is already running. Can't run the chore");
124      return;
125    }
126    regionInfoMap.clear();
127    disabledTableRegions.clear();
128    splitParentRegions.clear();
129    orphanRegionsOnRS.clear();
130    orphanRegionsOnFS.clear();
131    inconsistentRegions.clear();
132    checkingStartTimestamp = EnvironmentEdgeManager.currentTime();
133    running = true;
134    try {
135      loadRegionsFromInMemoryState();
136      loadRegionsFromRSReport();
137      try {
138        loadRegionsFromFS(scanForMergedParentRegions());
139      } catch (IOException e) {
140        LOG.warn("Failed to load the regions from filesystem", e);
141      }
142      saveCheckResultToSnapshot();
143    } catch (Throwable t) {
144      LOG.warn("Unexpected", t);
145    }
146    running = false;
147  }
148
149  // This function does the sanity checks of making sure the chore is not run when it is
150  // disabled or when it's already running. It returns whether the chore was actually run or not.
151  protected boolean runChore() {
152    if (isDisabled() || isRunning()) {
153      if (isDisabled()) {
154        LOG.warn("hbck chore is disabled! Set " + HBCK_CHORE_INTERVAL + " > 0 to enable it.");
155      } else {
156        LOG.warn("hbck chore already running. Can't run till it finishes.");
157      }
158      return false;
159    }
160    chore();
161    return true;
162  }
163
164  private void disableChore() {
165    this.disabled = true;
166  }
167
168  public boolean isDisabled() {
169    return this.disabled;
170  }
171
172  private void saveCheckResultToSnapshot() {
173    // Need synchronized here, as this "snapshot" may be access by web ui.
174    rwLock.writeLock().lock();
175    try {
176      orphanRegionsOnRSSnapshot.clear();
177      orphanRegionsOnRS.entrySet()
178          .forEach(e -> orphanRegionsOnRSSnapshot.put(e.getKey(), e.getValue()));
179      orphanRegionsOnFSSnapshot.clear();
180      orphanRegionsOnFS.entrySet()
181          .forEach(e -> orphanRegionsOnFSSnapshot.put(e.getKey(), e.getValue()));
182      inconsistentRegionsSnapshot.clear();
183      inconsistentRegions.entrySet()
184          .forEach(e -> inconsistentRegionsSnapshot.put(e.getKey(), e.getValue()));
185      checkingEndTimestamp = EnvironmentEdgeManager.currentTime();
186    } finally {
187      rwLock.writeLock().unlock();
188    }
189  }
190
191  /**
192   * Scan hbase:meta to get set of merged parent regions, this is a very heavy scan.
193   *
194   * @return Return generated {@link HashSet}
195   */
196  private HashSet<String> scanForMergedParentRegions() throws IOException {
197    HashSet<String> mergedParentRegions = new HashSet<>();
198    // Null tablename means scan all of meta.
199    MetaTableAccessor.scanMetaForTableRegions(this.master.getConnection(),
200      r -> {
201        List<RegionInfo> mergeParents = MetaTableAccessor.getMergeRegions(r.rawCells());
202        if (mergeParents != null) {
203          for (RegionInfo mergeRegion : mergeParents) {
204            if (mergeRegion != null) {
205              // This region is already being merged
206              mergedParentRegions.add(mergeRegion.getEncodedName());
207            }
208          }
209        }
210        return true;
211        },
212      null);
213    return mergedParentRegions;
214  }
215
216  private void loadRegionsFromInMemoryState() {
217    List<RegionState> regionStates =
218        master.getAssignmentManager().getRegionStates().getRegionStates();
219    for (RegionState regionState : regionStates) {
220      RegionInfo regionInfo = regionState.getRegion();
221      if (master.getTableStateManager()
222          .isTableState(regionInfo.getTable(), TableState.State.DISABLED)) {
223        disabledTableRegions.add(regionInfo.getRegionNameAsString());
224      }
225      if (regionInfo.isSplitParent()) {
226        splitParentRegions.add(regionInfo.getRegionNameAsString());
227      }
228      HbckRegionInfo.MetaEntry metaEntry =
229          new HbckRegionInfo.MetaEntry(regionInfo, regionState.getServerName(),
230              regionState.getStamp());
231      regionInfoMap.put(regionInfo.getEncodedName(), new HbckRegionInfo(metaEntry));
232    }
233    LOG.info("Loaded {} regions from in-memory state of AssignmentManager", regionStates.size());
234  }
235
236  private void loadRegionsFromRSReport() {
237    int numRegions = 0;
238    Map<ServerName, Set<byte[]>> rsReports = master.getAssignmentManager().getRSReports();
239    for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) {
240      ServerName serverName = entry.getKey();
241      for (byte[] regionName : entry.getValue()) {
242        String encodedRegionName = RegionInfo.encodeRegionName(regionName);
243        HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
244        if (hri == null) {
245          orphanRegionsOnRS.put(RegionInfo.getRegionNameAsString(regionName), serverName);
246          continue;
247        }
248        hri.addServer(hri.getMetaEntry(), serverName);
249      }
250      numRegions += entry.getValue().size();
251    }
252    LOG.info("Loaded {} regions from {} regionservers' reports and found {} orphan regions",
253        numRegions, rsReports.size(), orphanRegionsOnFS.size());
254
255    for (Map.Entry<String, HbckRegionInfo> entry : regionInfoMap.entrySet()) {
256      HbckRegionInfo hri = entry.getValue();
257      ServerName locationInMeta = hri.getMetaEntry().getRegionServer();
258      if (hri.getDeployedOn().size() == 0) {
259        if (locationInMeta == null) {
260          continue;
261        }
262        // skip the offline region which belong to disabled table.
263        if (disabledTableRegions.contains(hri.getRegionNameAsString())) {
264          continue;
265        }
266        // skip the split parent regions
267        if (splitParentRegions.contains(hri.getRegionNameAsString())) {
268          continue;
269        }
270        // Master thought this region opened, but no regionserver reported it.
271        inconsistentRegions.put(hri.getRegionNameAsString(),
272            new Pair<>(locationInMeta, new LinkedList<>()));
273      } else if (hri.getDeployedOn().size() > 1) {
274        // More than one regionserver reported opened this region
275        inconsistentRegions.put(hri.getRegionNameAsString(),
276            new Pair<>(locationInMeta, hri.getDeployedOn()));
277      } else if (!hri.getDeployedOn().get(0).equals(locationInMeta)) {
278        // Master thought this region opened on Server1, but regionserver reported Server2
279        inconsistentRegions.put(hri.getRegionNameAsString(),
280            new Pair<>(locationInMeta, hri.getDeployedOn()));
281      }
282    }
283  }
284
285  private void loadRegionsFromFS(final HashSet<String> mergedParentRegions) throws IOException {
286    Path rootDir = master.getMasterFileSystem().getRootDir();
287    FileSystem fs = master.getMasterFileSystem().getFileSystem();
288
289    int numRegions = 0;
290    List<Path> tableDirs = FSUtils.getTableDirs(fs, rootDir);
291    for (Path tableDir : tableDirs) {
292      List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
293      for (Path regionDir : regionDirs) {
294        String encodedRegionName = regionDir.getName();
295        if (encodedRegionName == null) {
296          LOG.warn("Failed get of encoded name from {}", regionDir);
297          continue;
298        }
299        HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
300        // If it is not in in-memory database and not a merged region,
301        // report it as an orphan region.
302        if (hri == null && !mergedParentRegions.contains(encodedRegionName)) {
303          orphanRegionsOnFS.put(encodedRegionName, regionDir);
304          continue;
305        }
306      }
307      numRegions += regionDirs.size();
308    }
309    LOG.info("Loaded {} tables {} regions from filesyetem and found {} orphan regions",
310        tableDirs.size(), numRegions, orphanRegionsOnFS.size());
311  }
312
313  /**
314   * When running, the HBCK report may be changed later.
315   */
316  public boolean isRunning() {
317    return running;
318  }
319
320  /**
321   * @return the regions only opened on RegionServers, but no region info in meta.
322   */
323  public Map<String, ServerName> getOrphanRegionsOnRS() {
324    // Need synchronized here, as this "snapshot" may be changed after checking.
325    rwLock.readLock().lock();
326    try {
327      return this.orphanRegionsOnRSSnapshot;
328    } finally {
329      rwLock.readLock().unlock();
330    }
331  }
332
333  /**
334   * @return the regions have directory on FileSystem, but no region info in meta.
335   */
336  public Map<String, Path> getOrphanRegionsOnFS() {
337    // Need synchronized here, as this "snapshot" may be changed after checking.
338    rwLock.readLock().lock();
339    try {
340      return this.orphanRegionsOnFSSnapshot;
341    } finally {
342      rwLock.readLock().unlock();
343    }
344  }
345
346  /**
347   * Found the inconsistent regions. There are three case:
348   * case 1. Master thought this region opened, but no regionserver reported it.
349   * case 2. Master thought this region opened on Server1, but regionserver reported Server2
350   * case 3. More than one regionservers reported opened this region
351   *
352   * @return the map of inconsistent regions. Key is the region name. Value is a pair of location in
353   *         meta and the regionservers which reported opened this region.
354   */
355  public Map<String, Pair<ServerName, List<ServerName>>> getInconsistentRegions() {
356    // Need synchronized here, as this "snapshot" may be changed after checking.
357    rwLock.readLock().lock();
358    try {
359      return this.inconsistentRegionsSnapshot;
360    } finally {
361      rwLock.readLock().unlock();
362    }
363  }
364
365  /**
366   * Used for web ui to show when the HBCK checking started.
367   */
368  public long getCheckingStartTimestamp() {
369    return this.checkingStartTimestamp;
370  }
371
372  /**
373   * Used for web ui to show when the HBCK checking report generated.
374   */
375  public long getCheckingEndTimestamp() {
376    return this.checkingEndTimestamp;
377  }
378}