001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import java.io.BufferedReader;
021import java.io.BufferedWriter;
022import java.io.File;
023import java.io.FileNotFoundException;
024import java.io.FileReader;
025import java.io.FileWriter;
026import java.io.IOException;
027import org.apache.hadoop.conf.Configuration;
028import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer;
029import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
030import org.apache.hadoop.hbase.zookeeper.ZKUtil;
031import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
032import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
033import org.apache.yetus.audience.InterfaceAudience;
034import org.apache.zookeeper.KeeperException;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038/**
039 * <p>
040 * Contains a set of methods for the collaboration between the start/stop scripts and the servers.
041 * It allows to delete immediately the znode when the master or the regions server crashes. The
042 * region server / master writes a specific file when it starts / becomes main master. When they end
043 * properly, they delete the file.
044 * </p>
045 * <p>
046 * In the script, we check for the existence of these files when the program ends. If they still
047 * exist we conclude that the server crashed, likely without deleting their znode. To have a faster
048 * recovery we delete immediately the znode.
049 * </p>
050 * <p>
051 * The strategy depends on the server type. For a region server we store the znode path in the file,
052 * and use it to delete it. for a master, as the znode path constant whatever the server, we check
053 * its content to make sure that the backup server is not now in charge.
054 * </p>
055 */
056@InterfaceAudience.Private
057public final class ZNodeClearer {
058  private static final Logger LOG = LoggerFactory.getLogger(ZNodeClearer.class);
059
060  private ZNodeClearer() {
061  }
062
063  /**
064   * Logs the errors without failing on exception.
065   */
066  public static void writeMyEphemeralNodeOnDisk(String fileContent) {
067    String fileName = ZNodeClearer.getMyEphemeralNodeFileName();
068    if (fileName == null) {
069      LOG.warn("Environment variable HBASE_ZNODE_FILE not set; znodes will not be cleared "
070        + "on crash by start scripts (Longer MTTR!)");
071      return;
072    }
073
074    FileWriter fstream;
075    try {
076      fstream = new FileWriter(fileName);
077    } catch (IOException e) {
078      LOG.warn("Can't write znode file " + fileName, e);
079      return;
080    }
081
082    BufferedWriter out = new BufferedWriter(fstream);
083
084    try {
085      try {
086        out.write(fileContent + "\n");
087      } finally {
088        try {
089          out.close();
090        } finally {
091          fstream.close();
092        }
093      }
094    } catch (IOException e) {
095      LOG.warn("Can't write znode file " + fileName, e);
096    }
097  }
098
099  /**
100   * read the content of znode file, expects a single line.
101   */
102  public static String readMyEphemeralNodeOnDisk() throws IOException {
103    String fileName = getMyEphemeralNodeFileName();
104    if (fileName == null) {
105      throw new FileNotFoundException("No filename; set environment variable HBASE_ZNODE_FILE");
106    }
107    FileReader znodeFile = new FileReader(fileName);
108    BufferedReader br = null;
109    try {
110      br = new BufferedReader(znodeFile);
111      String file_content = br.readLine();
112      return file_content;
113    } finally {
114      if (br != null) br.close();
115    }
116  }
117
118  /**
119   * Get the name of the file used to store the znode contents
120   */
121  public static String getMyEphemeralNodeFileName() {
122    return System.getenv().get("HBASE_ZNODE_FILE");
123  }
124
125  /**
126   * delete the znode file
127   */
128  public static void deleteMyEphemeralNodeOnDisk() {
129    String fileName = getMyEphemeralNodeFileName();
130
131    if (fileName != null) {
132      new File(fileName).delete();
133    }
134  }
135
136  /**
137   * See HBASE-14861. We are extracting master ServerName from rsZnodePath example:
138   * "/hbase/rs/server.example.com,16020,1448266496481"
139   * @param rsZnodePath from HBASE_ZNODE_FILE
140   * @return String representation of ServerName or null if fails
141   */
142
143  public static String parseMasterServerName(String rsZnodePath) {
144    String masterServerName = null;
145    try {
146      String[] rsZnodeParts = rsZnodePath.split("/");
147      masterServerName = rsZnodeParts[rsZnodeParts.length - 1];
148    } catch (IndexOutOfBoundsException e) {
149      LOG.warn("String " + rsZnodePath + " has wrong format", e);
150    }
151    return masterServerName;
152  }
153
154  /**
155   * @return true if cluster is configured with master-rs collocation
156   * @deprecated since 2.4.0, will be removed in 3.0.0.
157   * @see <a href="https://issues.apache.org/jira/browse/HBASE-15549">HBASE-15549</a>
158   */
159  @Deprecated
160  private static boolean tablesOnMaster(Configuration conf) {
161    boolean tablesOnMaster = true;
162    String confValue = conf.get(BaseLoadBalancer.TABLES_ON_MASTER);
163    if (confValue != null && confValue.equalsIgnoreCase("none")) {
164      tablesOnMaster = false;
165    }
166    return tablesOnMaster;
167  }
168
169  /**
170   * Delete the master znode if its content (ServerName string) is the same as the one in the znode
171   * file. (env: HBASE_ZNODE_FILE). I case of master-rs colloaction we extract ServerName string
172   * from rsZnode path.(HBASE-14861)
173   * @return true on successful deletion, false otherwise.
174   */
175  public static boolean clear(Configuration conf) {
176    Configuration tempConf = new Configuration(conf);
177    tempConf.setInt("zookeeper.recovery.retry", 0);
178
179    ZKWatcher zkw;
180    try {
181      zkw = new ZKWatcher(tempConf, "clean znode for master", new Abortable() {
182        @Override
183        public void abort(String why, Throwable e) {
184        }
185
186        @Override
187        public boolean isAborted() {
188          return false;
189        }
190      });
191    } catch (IOException e) {
192      LOG.warn("Can't connect to zookeeper to read the master znode", e);
193      return false;
194    }
195
196    String znodeFileContent;
197    try {
198      znodeFileContent = ZNodeClearer.readMyEphemeralNodeOnDisk();
199      if (ZNodeClearer.tablesOnMaster(conf)) {
200        // In case of master crash also remove rsZnode since master is also regionserver
201        ZKUtil.deleteNodeFailSilent(zkw,
202          ZNodePaths.joinZNode(zkw.getZNodePaths().rsZNode, znodeFileContent));
203        return MasterAddressTracker.deleteIfEquals(zkw,
204          ZNodeClearer.parseMasterServerName(znodeFileContent));
205      } else {
206        return MasterAddressTracker.deleteIfEquals(zkw, znodeFileContent);
207      }
208    } catch (FileNotFoundException fnfe) {
209      // If no file, just keep going -- return success.
210      LOG.warn("Can't find the znode file; presume non-fatal", fnfe);
211      return true;
212    } catch (IOException e) {
213      LOG.warn("Can't read the content of the znode file", e);
214      return false;
215    } catch (KeeperException e) {
216      LOG.warn("ZooKeeper exception deleting znode", e);
217      return false;
218    } finally {
219      zkw.close();
220    }
221  }
222}