001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.chaos.actions;
020
021import java.util.List;
022
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.ServerName;
025import org.apache.hadoop.hbase.util.FSUtils;
026import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
027import org.apache.hadoop.hbase.zookeeper.ZKUtil;
028import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
029import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
030import org.apache.hadoop.hdfs.DFSUtil;
031import org.apache.hadoop.hdfs.HAUtil;
032import org.apache.hadoop.hdfs.server.namenode.ha.proto.HAZKInfoProtos.ActiveNodeInfo;
033import org.slf4j.Logger;
034import org.slf4j.LoggerFactory;
035
036/**
037 * Action that tries to restart the active namenode.
038 */
039public class RestartActiveNameNodeAction extends RestartActionBaseAction {
040  private static final Logger LOG =
041      LoggerFactory.getLogger(RestartActiveNameNodeAction.class);
042
043  // Value taken from org.apache.hadoop.ha.ActiveStandbyElector.java, variable :- LOCK_FILENAME
044  private static final String ACTIVE_NN_LOCK_NAME = "ActiveStandbyElectorLock";
045
046  // Value taken from org.apache.hadoop.ha.ZKFailoverController.java
047  // variable :- ZK_PARENT_ZNODE_DEFAULT and ZK_PARENT_ZNODE_KEY
048  private static final String ZK_PARENT_ZNODE_DEFAULT = "/hadoop-ha";
049  private static final String ZK_PARENT_ZNODE_KEY = "ha.zookeeper.parent-znode";
050
051  public RestartActiveNameNodeAction(long sleepTime) {
052    super(sleepTime);
053  }
054
055  @Override
056  public void perform() throws Exception {
057    LOG.info("Performing action: Restart active namenode");
058    Configuration conf = FSUtils.getRootDir(getConf()).getFileSystem(getConf()).getConf();
059    String nameServiceID = DFSUtil.getNamenodeNameServiceId(conf);
060    if (!HAUtil.isHAEnabled(conf, nameServiceID)) {
061      throw new Exception("HA for namenode is not enabled");
062    }
063    ZKWatcher zkw = null;
064    RecoverableZooKeeper rzk = null;
065    String activeNamenode = null;
066    String hadoopHAZkNode = conf.get(ZK_PARENT_ZNODE_KEY, ZK_PARENT_ZNODE_DEFAULT);
067    try {
068      zkw = new ZKWatcher(conf, "get-active-namenode", null);
069      rzk = zkw.getRecoverableZooKeeper();
070      String hadoopHAZkNodePath = ZNodePaths.joinZNode(hadoopHAZkNode, nameServiceID);
071      List<String> subChildern = ZKUtil.listChildrenNoWatch(zkw, hadoopHAZkNodePath);
072      for (String eachEntry : subChildern) {
073        if (eachEntry.contains(ACTIVE_NN_LOCK_NAME)) {
074          byte[] data =
075              rzk.getData(ZNodePaths.joinZNode(hadoopHAZkNodePath, ACTIVE_NN_LOCK_NAME), false,
076                null);
077          ActiveNodeInfo proto = ActiveNodeInfo.parseFrom(data);
078          activeNamenode = proto.getHostname();
079        }
080      }
081    } finally {
082      if (zkw != null) {
083        zkw.close();
084      }
085    }
086    if (activeNamenode == null) {
087      throw new Exception("No active Name node found in zookeeper under " + hadoopHAZkNode);
088    }
089    LOG.info("Found active namenode host:" + activeNamenode);
090    ServerName activeNNHost = ServerName.valueOf(activeNamenode, -1, -1);
091    LOG.info("Restarting Active NameNode :" + activeNamenode);
092    restartNameNode(activeNNHost, sleepTime);
093  }
094}