001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018 019package org.apache.hadoop.hbase; 020 021import java.io.BufferedReader; 022import java.io.BufferedWriter; 023import java.io.File; 024import java.io.FileNotFoundException; 025import java.io.FileReader; 026import java.io.FileWriter; 027import java.io.IOException; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; 030import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker; 031import org.apache.hadoop.hbase.zookeeper.ZKUtil; 032import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 033import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 034import org.apache.yetus.audience.InterfaceAudience; 035import org.apache.zookeeper.KeeperException; 036import org.slf4j.Logger; 037import org.slf4j.LoggerFactory; 038 039/** 040 * <p>Contains a set of methods for the collaboration between the start/stop scripts and the 041 * servers. It allows to delete immediately the znode when the master or the regions server crashes. 042 * The region server / master writes a specific file when it starts / becomes main master. When they 043 * end properly, they delete the file.</p> 044 * <p>In the script, we check for the existence of these files when the program ends. If they still 045 * exist we conclude that the server crashed, likely without deleting their znode. To have a faster 046 * recovery we delete immediately the znode.</p> 047 * <p>The strategy depends on the server type. For a region server we store the znode path in the 048 * file, and use it to delete it. for a master, as the znode path constant whatever the server, we 049 * check its content to make sure that the backup server is not now in charge.</p> 050 */ 051@InterfaceAudience.Private 052public final class ZNodeClearer { 053 private static final Logger LOG = LoggerFactory.getLogger(ZNodeClearer.class); 054 055 private ZNodeClearer() {} 056 057 /** 058 * Logs the errors without failing on exception. 059 */ 060 public static void writeMyEphemeralNodeOnDisk(String fileContent) { 061 String fileName = ZNodeClearer.getMyEphemeralNodeFileName(); 062 if (fileName == null) { 063 LOG.warn("Environment variable HBASE_ZNODE_FILE not set; znodes will not be cleared " + 064 "on crash by start scripts (Longer MTTR!)"); 065 return; 066 } 067 068 FileWriter fstream; 069 try { 070 fstream = new FileWriter(fileName); 071 } catch (IOException e) { 072 LOG.warn("Can't write znode file "+fileName, e); 073 return; 074 } 075 076 BufferedWriter out = new BufferedWriter(fstream); 077 078 try { 079 try { 080 out.write(fileContent + "\n"); 081 } finally { 082 try { 083 out.close(); 084 } finally { 085 fstream.close(); 086 } 087 } 088 } catch (IOException e) { 089 LOG.warn("Can't write znode file "+fileName, e); 090 } 091 } 092 093 /** 094 * read the content of znode file, expects a single line. 095 */ 096 public static String readMyEphemeralNodeOnDisk() throws IOException { 097 String fileName = getMyEphemeralNodeFileName(); 098 if (fileName == null){ 099 throw new FileNotFoundException("No filename; set environment variable HBASE_ZNODE_FILE"); 100 } 101 FileReader znodeFile = new FileReader(fileName); 102 BufferedReader br = null; 103 try { 104 br = new BufferedReader(znodeFile); 105 String file_content = br.readLine(); 106 return file_content; 107 } finally { 108 if (br != null) br.close(); 109 } 110 } 111 112 /** 113 * Get the name of the file used to store the znode contents 114 */ 115 public static String getMyEphemeralNodeFileName() { 116 return System.getenv().get("HBASE_ZNODE_FILE"); 117 } 118 119 /** 120 * delete the znode file 121 */ 122 public static void deleteMyEphemeralNodeOnDisk() { 123 String fileName = getMyEphemeralNodeFileName(); 124 125 if (fileName != null) { 126 new File(fileName).delete(); 127 } 128 } 129 130 /** 131 * See HBASE-14861. We are extracting master ServerName from rsZnodePath 132 * example: "/hbase/rs/server.example.com,16020,1448266496481" 133 * @param rsZnodePath from HBASE_ZNODE_FILE 134 * @return String representation of ServerName or null if fails 135 */ 136 137 public static String parseMasterServerName(String rsZnodePath) { 138 String masterServerName = null; 139 try { 140 String[] rsZnodeParts = rsZnodePath.split("/"); 141 masterServerName = rsZnodeParts[rsZnodeParts.length -1]; 142 } catch (IndexOutOfBoundsException e) { 143 LOG.warn("String " + rsZnodePath + " has wrong format", e); 144 } 145 return masterServerName; 146 } 147 148 /** 149 * 150 * @return true if cluster is configured with master-rs collocation 151 */ 152 private static boolean tablesOnMaster(Configuration conf) { 153 boolean tablesOnMaster = true; 154 String confValue = conf.get(BaseLoadBalancer.TABLES_ON_MASTER); 155 if (confValue != null && confValue.equalsIgnoreCase("none")) { 156 tablesOnMaster = false; 157 } 158 return tablesOnMaster; 159 } 160 161 /** 162 * Delete the master znode if its content (ServerName string) is the same 163 * as the one in the znode file. (env: HBASE_ZNODE_FILE). I case of master-rs 164 * colloaction we extract ServerName string from rsZnode path.(HBASE-14861) 165 * @return true on successful deletion, false otherwise. 166 */ 167 public static boolean clear(Configuration conf) { 168 Configuration tempConf = new Configuration(conf); 169 tempConf.setInt("zookeeper.recovery.retry", 0); 170 171 ZKWatcher zkw; 172 try { 173 zkw = new ZKWatcher(tempConf, "clean znode for master", 174 new Abortable() { 175 @Override public void abort(String why, Throwable e) {} 176 @Override public boolean isAborted() { return false; } 177 }); 178 } catch (IOException e) { 179 LOG.warn("Can't connect to zookeeper to read the master znode", e); 180 return false; 181 } 182 183 String znodeFileContent; 184 try { 185 znodeFileContent = ZNodeClearer.readMyEphemeralNodeOnDisk(); 186 if (ZNodeClearer.tablesOnMaster(conf)) { 187 // In case of master crash also remove rsZnode since master is also regionserver 188 ZKUtil.deleteNodeFailSilent(zkw, 189 ZNodePaths.joinZNode(zkw.znodePaths.rsZNode, znodeFileContent)); 190 return MasterAddressTracker.deleteIfEquals(zkw, 191 ZNodeClearer.parseMasterServerName(znodeFileContent)); 192 } else { 193 return MasterAddressTracker.deleteIfEquals(zkw, znodeFileContent); 194 } 195 } catch (FileNotFoundException fnfe) { 196 // If no file, just keep going -- return success. 197 LOG.warn("Can't find the znode file; presume non-fatal", fnfe); 198 return true; 199 } catch (IOException e) { 200 LOG.warn("Can't read the content of the znode file", e); 201 return false; 202 } catch (KeeperException e) { 203 LOG.warn("ZooKeeper exception deleting znode", e); 204 return false; 205 } finally { 206 zkw.close(); 207 } 208 } 209}