001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import java.io.IOException;
021import java.util.HashMap;
022import java.util.HashSet;
023import java.util.LinkedList;
024import java.util.List;
025import java.util.Map;
026import java.util.Set;
027import java.util.concurrent.locks.ReentrantReadWriteLock;
028
029import org.apache.hadoop.fs.FileSystem;
030import org.apache.hadoop.fs.Path;
031import org.apache.hadoop.hbase.ScheduledChore;
032import org.apache.hadoop.hbase.ServerName;
033import org.apache.hadoop.hbase.client.RegionInfo;
034import org.apache.hadoop.hbase.client.TableState;
035import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
036import org.apache.hadoop.hbase.util.FSUtils;
037import org.apache.hadoop.hbase.util.HbckRegionInfo;
038import org.apache.hadoop.hbase.util.Pair;
039import org.apache.yetus.audience.InterfaceAudience;
040import org.apache.yetus.audience.InterfaceStability;
041import org.slf4j.Logger;
042import org.slf4j.LoggerFactory;
043
044/**
045 * Used to do the hbck checking job at master side.
046 */
047@InterfaceAudience.Private
048@InterfaceStability.Evolving
049public class HbckChore extends ScheduledChore {
050  private static final Logger LOG = LoggerFactory.getLogger(HbckChore.class.getName());
051
052  private static final String HBCK_CHORE_INTERVAL = "hbase.master.hbck.chore.interval";
053  private static final int DEFAULT_HBCK_CHORE_INTERVAL = 60 * 60 * 1000;
054
055  private final MasterServices master;
056
057  /**
058   * This map contains the state of all hbck items.  It maps from encoded region
059   * name to HbckRegionInfo structure.  The information contained in HbckRegionInfo is used
060   * to detect and correct consistency (hdfs/meta/deployment) problems.
061   */
062  private final Map<String, HbckRegionInfo> regionInfoMap = new HashMap<>();
063
064  private final Set<String> disabledTableRegions = new HashSet<>();
065  private final Set<String> splitParentRegions = new HashSet<>();
066
067  /**
068   * The regions only opened on RegionServers, but no region info in meta.
069   */
070  private final Map<String, ServerName> orphanRegionsOnRS = new HashMap<>();
071  /**
072   * The regions have directory on FileSystem, but no region info in meta.
073   */
074  private final Map<String, Path> orphanRegionsOnFS = new HashMap<>();
075  /**
076   * The inconsistent regions. There are three case:
077   * case 1. Master thought this region opened, but no regionserver reported it.
078   * case 2. Master thought this region opened on Server1, but regionserver reported Server2
079   * case 3. More than one regionservers reported opened this region
080   */
081  private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegions =
082      new HashMap<>();
083
084  /**
085   * The "snapshot" is used to save the last round's HBCK checking report.
086   */
087  private final Map<String, ServerName> orphanRegionsOnRSSnapshot = new HashMap<>();
088  private final Map<String, Path> orphanRegionsOnFSSnapshot = new HashMap<>();
089  private final Map<String, Pair<ServerName, List<ServerName>>> inconsistentRegionsSnapshot =
090      new HashMap<>();
091
092  /**
093   * The "snapshot" may be changed after checking. And this checking report "snapshot" may be
094   * accessed by web ui. Use this rwLock to synchronize.
095   */
096  ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
097
098  /**
099   * When running, the "snapshot" may be changed when this round's checking finish.
100   */
101  private volatile boolean running = false;
102  private volatile long checkingStartTimestamp = 0;
103  private volatile long checkingEndTimestamp = 0;
104
105  private boolean disabled = false;
106
107  public HbckChore(MasterServices master) {
108    super("HbckChore-", master,
109        master.getConfiguration().getInt(HBCK_CHORE_INTERVAL, DEFAULT_HBCK_CHORE_INTERVAL));
110    this.master = master;
111    int interval =
112        master.getConfiguration().getInt(HBCK_CHORE_INTERVAL, DEFAULT_HBCK_CHORE_INTERVAL);
113    if (interval <= 0) {
114      LOG.warn(HBCK_CHORE_INTERVAL + " is <=0 hence disabling hbck chore");
115      disableChore();
116    }
117  }
118
119  @Override
120  protected synchronized void chore() {
121    if (isDisabled() || isRunning()) {
122      LOG.warn("hbckChore is either disabled or is already running. Can't run the chore");
123      return;
124    }
125    regionInfoMap.clear();
126    disabledTableRegions.clear();
127    splitParentRegions.clear();
128    orphanRegionsOnRS.clear();
129    orphanRegionsOnFS.clear();
130    inconsistentRegions.clear();
131    checkingStartTimestamp = EnvironmentEdgeManager.currentTime();
132    running = true;
133    try {
134      loadRegionsFromInMemoryState();
135      loadRegionsFromRSReport();
136      try {
137        loadRegionsFromFS();
138      } catch (IOException e) {
139        LOG.warn("Failed to load the regions from filesystem", e);
140      }
141      saveCheckResultToSnapshot();
142    } catch (Throwable t) {
143      LOG.warn("Unexpected", t);
144    }
145    running = false;
146  }
147
148  // This function does the sanity checks of making sure the chore is not run when it is
149  // disabled or when it's already running. It returns whether the chore was actually run or not.
150  protected boolean runChore() {
151    if (isDisabled() || isRunning()) {
152      if (isDisabled()) {
153        LOG.warn("hbck chore is disabled! Set " + HBCK_CHORE_INTERVAL + " > 0 to enable it.");
154      } else {
155        LOG.warn("hbck chore already running. Can't run till it finishes.");
156      }
157      return false;
158    }
159    chore();
160    return true;
161  }
162
163  private void disableChore() {
164    this.disabled = true;
165  }
166
167  public boolean isDisabled() {
168    return this.disabled;
169  }
170
171  private void saveCheckResultToSnapshot() {
172    // Need synchronized here, as this "snapshot" may be access by web ui.
173    rwLock.writeLock().lock();
174    try {
175      orphanRegionsOnRSSnapshot.clear();
176      orphanRegionsOnRS.entrySet()
177          .forEach(e -> orphanRegionsOnRSSnapshot.put(e.getKey(), e.getValue()));
178      orphanRegionsOnFSSnapshot.clear();
179      orphanRegionsOnFS.entrySet()
180          .forEach(e -> orphanRegionsOnFSSnapshot.put(e.getKey(), e.getValue()));
181      inconsistentRegionsSnapshot.clear();
182      inconsistentRegions.entrySet()
183          .forEach(e -> inconsistentRegionsSnapshot.put(e.getKey(), e.getValue()));
184      checkingEndTimestamp = EnvironmentEdgeManager.currentTime();
185    } finally {
186      rwLock.writeLock().unlock();
187    }
188  }
189
190  private void loadRegionsFromInMemoryState() {
191    List<RegionState> regionStates =
192        master.getAssignmentManager().getRegionStates().getRegionStates();
193    for (RegionState regionState : regionStates) {
194      RegionInfo regionInfo = regionState.getRegion();
195      if (master.getTableStateManager()
196          .isTableState(regionInfo.getTable(), TableState.State.DISABLED)) {
197        disabledTableRegions.add(regionInfo.getRegionNameAsString());
198      }
199      if (regionInfo.isSplitParent()) {
200        splitParentRegions.add(regionInfo.getRegionNameAsString());
201      }
202      HbckRegionInfo.MetaEntry metaEntry =
203          new HbckRegionInfo.MetaEntry(regionInfo, regionState.getServerName(),
204              regionState.getStamp());
205      regionInfoMap.put(regionInfo.getEncodedName(), new HbckRegionInfo(metaEntry));
206    }
207    LOG.info("Loaded {} regions from in-memory state of AssignmentManager", regionStates.size());
208  }
209
210  private void loadRegionsFromRSReport() {
211    int numRegions = 0;
212    Map<ServerName, Set<byte[]>> rsReports = master.getAssignmentManager().getRSReports();
213    for (Map.Entry<ServerName, Set<byte[]>> entry : rsReports.entrySet()) {
214      ServerName serverName = entry.getKey();
215      for (byte[] regionName : entry.getValue()) {
216        String encodedRegionName = RegionInfo.encodeRegionName(regionName);
217        HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
218        if (hri == null) {
219          orphanRegionsOnRS.put(RegionInfo.getRegionNameAsString(regionName), serverName);
220          continue;
221        }
222        hri.addServer(hri.getMetaEntry(), serverName);
223      }
224      numRegions += entry.getValue().size();
225    }
226    LOG.info("Loaded {} regions from {} regionservers' reports and found {} orphan regions",
227        numRegions, rsReports.size(), orphanRegionsOnFS.size());
228
229    for (Map.Entry<String, HbckRegionInfo> entry : regionInfoMap.entrySet()) {
230      HbckRegionInfo hri = entry.getValue();
231      ServerName locationInMeta = hri.getMetaEntry().getRegionServer();
232      if (hri.getDeployedOn().size() == 0) {
233        if (locationInMeta == null) {
234          continue;
235        }
236        // skip the offline region which belong to disabled table.
237        if (disabledTableRegions.contains(hri.getRegionNameAsString())) {
238          continue;
239        }
240        // skip the split parent regions
241        if (splitParentRegions.contains(hri.getRegionNameAsString())) {
242          continue;
243        }
244        // Master thought this region opened, but no regionserver reported it.
245        inconsistentRegions.put(hri.getRegionNameAsString(),
246            new Pair<>(locationInMeta, new LinkedList<>()));
247      } else if (hri.getDeployedOn().size() > 1) {
248        // More than one regionserver reported opened this region
249        inconsistentRegions.put(hri.getRegionNameAsString(),
250            new Pair<>(locationInMeta, hri.getDeployedOn()));
251      } else if (!hri.getDeployedOn().get(0).equals(locationInMeta)) {
252        // Master thought this region opened on Server1, but regionserver reported Server2
253        inconsistentRegions.put(hri.getRegionNameAsString(),
254            new Pair<>(locationInMeta, hri.getDeployedOn()));
255      }
256    }
257  }
258
259  private void loadRegionsFromFS() throws IOException {
260    Path rootDir = master.getMasterFileSystem().getRootDir();
261    FileSystem fs = master.getMasterFileSystem().getFileSystem();
262
263    int numRegions = 0;
264    List<Path> tableDirs = FSUtils.getTableDirs(fs, rootDir);
265    for (Path tableDir : tableDirs) {
266      List<Path> regionDirs = FSUtils.getRegionDirs(fs, tableDir);
267      for (Path regionDir : regionDirs) {
268        String encodedRegionName = regionDir.getName();
269        if (encodedRegionName == null) {
270          LOG.warn("Failed get of encoded name from {}", regionDir);
271          continue;
272        }
273        HbckRegionInfo hri = regionInfoMap.get(encodedRegionName);
274        if (hri == null) {
275          orphanRegionsOnFS.put(encodedRegionName, regionDir);
276          continue;
277        }
278        HbckRegionInfo.HdfsEntry hdfsEntry = new HbckRegionInfo.HdfsEntry(regionDir);
279        hri.setHdfsEntry(hdfsEntry);
280      }
281      numRegions += regionDirs.size();
282    }
283    LOG.info("Loaded {} tables {} regions from filesyetem and found {} orphan regions",
284        tableDirs.size(), numRegions, orphanRegionsOnFS.size());
285  }
286
287  /**
288   * When running, the HBCK report may be changed later.
289   */
290  public boolean isRunning() {
291    return running;
292  }
293
294  /**
295   * @return the regions only opened on RegionServers, but no region info in meta.
296   */
297  public Map<String, ServerName> getOrphanRegionsOnRS() {
298    // Need synchronized here, as this "snapshot" may be changed after checking.
299    rwLock.readLock().lock();
300    try {
301      return this.orphanRegionsOnRSSnapshot;
302    } finally {
303      rwLock.readLock().unlock();
304    }
305  }
306
307  /**
308   * @return the regions have directory on FileSystem, but no region info in meta.
309   */
310  public Map<String, Path> getOrphanRegionsOnFS() {
311    // Need synchronized here, as this "snapshot" may be changed after checking.
312    rwLock.readLock().lock();
313    try {
314      return this.orphanRegionsOnFSSnapshot;
315    } finally {
316      rwLock.readLock().unlock();
317    }
318  }
319
320  /**
321   * Found the inconsistent regions. There are three case:
322   * case 1. Master thought this region opened, but no regionserver reported it.
323   * case 2. Master thought this region opened on Server1, but regionserver reported Server2
324   * case 3. More than one regionservers reported opened this region
325   *
326   * @return the map of inconsistent regions. Key is the region name. Value is a pair of location in
327   *         meta and the regionservers which reported opened this region.
328   */
329  public Map<String, Pair<ServerName, List<ServerName>>> getInconsistentRegions() {
330    // Need synchronized here, as this "snapshot" may be changed after checking.
331    rwLock.readLock().lock();
332    try {
333      return this.inconsistentRegionsSnapshot;
334    } finally {
335      rwLock.readLock().unlock();
336    }
337  }
338
339  /**
340   * Used for web ui to show when the HBCK checking started.
341   */
342  public long getCheckingStartTimestamp() {
343    return this.checkingStartTimestamp;
344  }
345
346  /**
347   * Used for web ui to show when the HBCK checking report generated.
348   */
349  public long getCheckingEndTimestamp() {
350    return this.checkingEndTimestamp;
351  }
352}