001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019package org.apache.hadoop.hbase.chaos.actions;
020
021import java.util.List;
022import org.apache.hadoop.conf.Configuration;
023import org.apache.hadoop.hbase.ServerName;
024import org.apache.hadoop.hbase.util.CommonFSUtils;
025import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
026import org.apache.hadoop.hbase.zookeeper.ZKUtil;
027import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
028import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
029import org.apache.hadoop.hdfs.DFSUtil;
030import org.apache.hadoop.hdfs.HAUtil;
031import org.apache.hadoop.hdfs.server.namenode.ha.proto.HAZKInfoProtos.ActiveNodeInfo;
032import org.slf4j.Logger;
033import org.slf4j.LoggerFactory;
034
035/**
036 * Action that tries to restart the active namenode.
037 */
038public class RestartActiveNameNodeAction extends RestartActionBaseAction {
039  private static final Logger LOG =
040      LoggerFactory.getLogger(RestartActiveNameNodeAction.class);
041
042  // Value taken from org.apache.hadoop.ha.ActiveStandbyElector.java, variable :- LOCK_FILENAME
043  private static final String ACTIVE_NN_LOCK_NAME = "ActiveStandbyElectorLock";
044
045  // Value taken from org.apache.hadoop.ha.ZKFailoverController.java
046  // variable :- ZK_PARENT_ZNODE_DEFAULT and ZK_PARENT_ZNODE_KEY
047  private static final String ZK_PARENT_ZNODE_DEFAULT = "/hadoop-ha";
048  private static final String ZK_PARENT_ZNODE_KEY = "ha.zookeeper.parent-znode";
049
050  public RestartActiveNameNodeAction(long sleepTime) {
051    super(sleepTime);
052  }
053
054  @Override protected Logger getLogger() {
055    return LOG;
056  }
057
058  @Override
059  public void perform() throws Exception {
060    getLogger().info("Performing action: Restart active namenode");
061    Configuration conf = CommonFSUtils.getRootDir(getConf()).getFileSystem(getConf()).getConf();
062    String nameServiceID = DFSUtil.getNamenodeNameServiceId(conf);
063    if (!HAUtil.isHAEnabled(conf, nameServiceID)) {
064      throw new Exception("HA for namenode is not enabled");
065    }
066    ZKWatcher zkw = null;
067    RecoverableZooKeeper rzk = null;
068    String activeNamenode = null;
069    String hadoopHAZkNode = conf.get(ZK_PARENT_ZNODE_KEY, ZK_PARENT_ZNODE_DEFAULT);
070    try {
071      zkw = new ZKWatcher(conf, "get-active-namenode", null);
072      rzk = zkw.getRecoverableZooKeeper();
073      String hadoopHAZkNodePath = ZNodePaths.joinZNode(hadoopHAZkNode, nameServiceID);
074      List<String> subChildern = ZKUtil.listChildrenNoWatch(zkw, hadoopHAZkNodePath);
075      for (String eachEntry : subChildern) {
076        if (eachEntry.contains(ACTIVE_NN_LOCK_NAME)) {
077          byte[] data =
078              rzk.getData(ZNodePaths.joinZNode(hadoopHAZkNodePath, ACTIVE_NN_LOCK_NAME), false,
079                null);
080          ActiveNodeInfo proto = ActiveNodeInfo.parseFrom(data);
081          activeNamenode = proto.getHostname();
082        }
083      }
084    } finally {
085      if (zkw != null) {
086        zkw.close();
087      }
088    }
089    if (activeNamenode == null) {
090      throw new Exception("No active Name node found in zookeeper under " + hadoopHAZkNode);
091    }
092    getLogger().info("Found active namenode host:" + activeNamenode);
093    ServerName activeNNHost = ServerName.valueOf(activeNamenode, -1, -1);
094    getLogger().info("Restarting Active NameNode :" + activeNamenode);
095    restartNameNode(activeNNHost, sleepTime);
096  }
097}