001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.chaos.actions;
019
020import java.util.List;
021import org.apache.hadoop.conf.Configuration;
022import org.apache.hadoop.hbase.ServerName;
023import org.apache.hadoop.hbase.util.CommonFSUtils;
024import org.apache.hadoop.hbase.zookeeper.RecoverableZooKeeper;
025import org.apache.hadoop.hbase.zookeeper.ZKUtil;
026import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
027import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
028import org.apache.hadoop.hdfs.DFSUtil;
029import org.apache.hadoop.hdfs.HAUtil;
030import org.apache.hadoop.hdfs.server.namenode.ha.proto.HAZKInfoProtos.ActiveNodeInfo;
031import org.slf4j.Logger;
032import org.slf4j.LoggerFactory;
033
034/**
035 * Action that tries to restart the active namenode.
036 */
037public class RestartActiveNameNodeAction extends RestartActionBaseAction {
038  private static final Logger LOG = LoggerFactory.getLogger(RestartActiveNameNodeAction.class);
039
040  // Value taken from org.apache.hadoop.ha.ActiveStandbyElector.java, variable :- LOCK_FILENAME
041  private static final String ACTIVE_NN_LOCK_NAME = "ActiveStandbyElectorLock";
042
043  // Value taken from org.apache.hadoop.ha.ZKFailoverController.java
044  // variable :- ZK_PARENT_ZNODE_DEFAULT and ZK_PARENT_ZNODE_KEY
045  private static final String ZK_PARENT_ZNODE_DEFAULT = "/hadoop-ha";
046  private static final String ZK_PARENT_ZNODE_KEY = "ha.zookeeper.parent-znode";
047
048  public RestartActiveNameNodeAction(long sleepTime) {
049    super(sleepTime);
050  }
051
052  @Override
053  protected Logger getLogger() {
054    return LOG;
055  }
056
057  @Override
058  public void perform() throws Exception {
059    getLogger().info("Performing action: Restart active namenode");
060    Configuration conf = CommonFSUtils.getRootDir(getConf()).getFileSystem(getConf()).getConf();
061    String nameServiceID = DFSUtil.getNamenodeNameServiceId(conf);
062    if (!HAUtil.isHAEnabled(conf, nameServiceID)) {
063      throw new Exception("HA for namenode is not enabled");
064    }
065    ZKWatcher zkw = null;
066    RecoverableZooKeeper rzk = null;
067    String activeNamenode = null;
068    String hadoopHAZkNode = conf.get(ZK_PARENT_ZNODE_KEY, ZK_PARENT_ZNODE_DEFAULT);
069    try {
070      zkw = new ZKWatcher(conf, "get-active-namenode", null);
071      rzk = zkw.getRecoverableZooKeeper();
072      String hadoopHAZkNodePath = ZNodePaths.joinZNode(hadoopHAZkNode, nameServiceID);
073      List<String> subChildern = ZKUtil.listChildrenNoWatch(zkw, hadoopHAZkNodePath);
074      for (String eachEntry : subChildern) {
075        if (eachEntry.contains(ACTIVE_NN_LOCK_NAME)) {
076          byte[] data =
077            rzk.getData(ZNodePaths.joinZNode(hadoopHAZkNodePath, ACTIVE_NN_LOCK_NAME), false, null);
078          ActiveNodeInfo proto = ActiveNodeInfo.parseFrom(data);
079          activeNamenode = proto.getHostname();
080        }
081      }
082    } finally {
083      if (zkw != null) {
084        zkw.close();
085      }
086    }
087    if (activeNamenode == null) {
088      throw new Exception("No active Name node found in zookeeper under " + hadoopHAZkNode);
089    }
090    getLogger().info("Found active namenode host:" + activeNamenode);
091    ServerName activeNNHost = ServerName.valueOf(activeNamenode, -1, -1);
092    getLogger().info("Restarting Active NameNode :" + activeNamenode);
093    restartNameNode(activeNNHost, sleepTime);
094  }
095}