001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.chaos.actions;
019
020import java.util.Collections;
021import java.util.List;
022import java.util.Optional;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.ServerName;
025import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
026import org.apache.hadoop.hbase.zookeeper.ZKUtil;
027import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
028import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
029import org.apache.hadoop.hdfs.DFSUtil;
030import org.apache.hadoop.hdfs.DistributedFileSystem;
031import org.apache.hadoop.hdfs.HAUtil;
032import org.apache.hadoop.hdfs.server.namenode.ha.proto.HAZKInfoProtos.ActiveNodeInfo;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035
036/**
037 * Action that tries to restart the active namenode.
038 */
039public class RestartActiveNameNodeAction extends RestartActionBaseAction {
040  private static final Logger LOG = LoggerFactory.getLogger(RestartActiveNameNodeAction.class);
041
042  // Value taken from org.apache.hadoop.ha.ActiveStandbyElector.java, variable :- LOCK_FILENAME
043  private static final String ACTIVE_NN_LOCK_NAME = "ActiveStandbyElectorLock";
044
045  // Value taken from org.apache.hadoop.ha.ZKFailoverController.java
046  // variable :- ZK_PARENT_ZNODE_DEFAULT and ZK_PARENT_ZNODE_KEY
047  private static final String ZK_PARENT_ZNODE_DEFAULT = "/hadoop-ha";
048  private static final String ZK_PARENT_ZNODE_KEY = "ha.zookeeper.parent-znode";
049
050  public RestartActiveNameNodeAction(long sleepTime) {
051    super(sleepTime);
052  }
053
054  @Override
055  protected Logger getLogger() {
056    return LOG;
057  }
058
059  @Override
060  public void perform() throws Exception {
061    getLogger().info("Performing action: Restart active namenode");
062
063    final String hadoopHAZkNode;
064    String activeNamenode = null;
065    int activeNamenodePort = -1;
066    try (final DistributedFileSystem dfs = HdfsActionUtils.createDfs(getConf())) {
067      final Configuration conf = dfs.getConf();
068      hadoopHAZkNode = conf.get(ZK_PARENT_ZNODE_KEY, ZK_PARENT_ZNODE_DEFAULT);
069      final String nameServiceID = DFSUtil.getNamenodeNameServiceId(conf);
070
071      if (!HAUtil.isHAEnabled(conf, nameServiceID)) {
072        getLogger().info("HA for HDFS is not enabled; skipping");
073        return;
074      }
075      try (final ZKWatcher zkw = new ZKWatcher(conf, "get-active-namenode", null)) {
076        final RecoverableZooKeeper rzk = zkw.getRecoverableZooKeeper();
077        // If hadoopHAZkNode == '/', pass '' instead because then joinZNode will return '//' as a
078        // prefix
079        // which zk doesn't like as a prefix on the path.
080        final String hadoopHAZkNodePath = ZNodePaths.joinZNode(
081          (hadoopHAZkNode != null && hadoopHAZkNode.equals("/")) ? "" : hadoopHAZkNode,
082          nameServiceID);
083        final List<String> subChildren =
084          Optional.ofNullable(ZKUtil.listChildrenNoWatch(zkw, hadoopHAZkNodePath))
085            .orElse(Collections.emptyList());
086        for (final String eachEntry : subChildren) {
087          if (!eachEntry.contains(ACTIVE_NN_LOCK_NAME)) {
088            continue;
089          }
090          byte[] data =
091            rzk.getData(ZNodePaths.joinZNode(hadoopHAZkNodePath, ACTIVE_NN_LOCK_NAME), false, null);
092          ActiveNodeInfo proto = ActiveNodeInfo.parseFrom(data);
093          activeNamenode = proto.getHostname();
094          activeNamenodePort = proto.getPort();
095        }
096      }
097    }
098
099    if (activeNamenode == null) {
100      getLogger().info("No active Name node found in zookeeper under '{}'", hadoopHAZkNode);
101      return;
102    }
103
104    getLogger().info("Found Active NameNode host: {}", activeNamenode);
105    final ServerName activeNNHost = ServerName.valueOf(activeNamenode, activeNamenodePort, -1L);
106    getLogger().info("Restarting Active NameNode: {}", activeNamenode);
107    restartNameNode(activeNNHost, this.sleepTime);
108  }
109}