001/** 002 * 003 * Licensed to the Apache Software Foundation (ASF) under one 004 * or more contributor license agreements. See the NOTICE file 005 * distributed with this work for additional information 006 * regarding copyright ownership. The ASF licenses this file 007 * to you under the Apache License, Version 2.0 (the 008 * "License"); you may not use this file except in compliance 009 * with the License. You may obtain a copy of the License at 010 * 011 * http://www.apache.org/licenses/LICENSE-2.0 012 * 013 * Unless required by applicable law or agreed to in writing, software 014 * distributed under the License is distributed on an "AS IS" BASIS, 015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 016 * See the License for the specific language governing permissions and 017 * limitations under the License. 018 */ 019package org.apache.hadoop.hbase.util; 020 021import java.io.IOException; 022import java.util.Collection; 023import java.util.EnumSet; 024import java.util.List; 025import java.util.Random; 026import org.apache.hadoop.conf.Configuration; 027import org.apache.hadoop.fs.Path; 028import org.apache.hadoop.hbase.ClusterMetrics.Option; 029import org.apache.hadoop.hbase.MetaTableAccessor; 030import org.apache.hadoop.hbase.ServerName; 031import org.apache.hadoop.hbase.TableName; 032import org.apache.hadoop.hbase.ZooKeeperConnectionException; 033import org.apache.hadoop.hbase.client.Admin; 034import org.apache.hadoop.hbase.client.ClusterConnection; 035import org.apache.hadoop.hbase.client.Connection; 036import org.apache.hadoop.hbase.client.ConnectionFactory; 037import org.apache.hadoop.hbase.client.Put; 038import org.apache.hadoop.hbase.client.RegionInfo; 039import org.apache.hadoop.hbase.client.Table; 040import org.apache.hadoop.hbase.client.TableDescriptor; 041import org.apache.hadoop.hbase.master.RegionState; 042import org.apache.hadoop.hbase.master.ServerManager; 043import org.apache.hadoop.hbase.regionserver.HRegion; 044import org.apache.yetus.audience.InterfaceAudience; 045import org.apache.zookeeper.KeeperException; 046import org.slf4j.Logger; 047import org.slf4j.LoggerFactory; 048 049/** 050 * This class contains helper methods that repair parts of hbase's filesystem 051 * contents. 052 */ 053@InterfaceAudience.Private 054public class HBaseFsckRepair { 055 private static final Logger LOG = LoggerFactory.getLogger(HBaseFsckRepair.class); 056 057 /** 058 * Fix multiple assignment by doing silent closes on each RS hosting the region 059 * and then force ZK unassigned node to OFFLINE to trigger assignment by 060 * master. 061 * 062 * @param connection HBase connection to the cluster 063 * @param region Region to undeploy 064 * @param servers list of Servers to undeploy from 065 */ 066 public static void fixMultiAssignment(Connection connection, RegionInfo region, 067 List<ServerName> servers) 068 throws IOException, KeeperException, InterruptedException { 069 // Close region on the servers silently 070 for(ServerName server : servers) { 071 closeRegionSilentlyAndWait(connection, server, region); 072 } 073 074 // Force ZK node to OFFLINE so master assigns 075 forceOfflineInZK(connection.getAdmin(), region); 076 } 077 078 /** 079 * Fix unassigned by creating/transition the unassigned ZK node for this 080 * region to OFFLINE state with a special flag to tell the master that this is 081 * a forced operation by HBCK. 082 * 083 * This assumes that info is in META. 084 * 085 * @param admin 086 * @param region 087 * @throws IOException 088 * @throws KeeperException 089 */ 090 public static void fixUnassigned(Admin admin, RegionInfo region) 091 throws IOException, KeeperException, InterruptedException { 092 // Force ZK node to OFFLINE so master assigns 093 forceOfflineInZK(admin, region); 094 } 095 096 /** 097 * In 0.90, this forces an HRI offline by setting the RegionTransitionData 098 * in ZK to have HBCK_CODE_NAME as the server. This is a special case in 099 * the AssignmentManager that attempts an assign call by the master. 100 * 101 * This doesn't seem to work properly in the updated version of 0.92+'s hbck 102 * so we use assign to force the region into transition. This has the 103 * side-effect of requiring a RegionInfo that considers regionId (timestamp) 104 * in comparators that is addressed by HBASE-5563. 105 */ 106 private static void forceOfflineInZK(Admin admin, final RegionInfo region) 107 throws ZooKeeperConnectionException, KeeperException, IOException, InterruptedException { 108 admin.assign(region.getRegionName()); 109 } 110 111 /* 112 * Should we check all assignments or just not in RIT? 113 */ 114 public static void waitUntilAssigned(Admin admin, 115 RegionInfo region) throws IOException, InterruptedException { 116 long timeout = admin.getConfiguration().getLong("hbase.hbck.assign.timeout", 120000); 117 long expiration = timeout + EnvironmentEdgeManager.currentTime(); 118 while (EnvironmentEdgeManager.currentTime() < expiration) { 119 try { 120 boolean inTransition = false; 121 for (RegionState rs : admin.getClusterMetrics(EnumSet.of(Option.REGIONS_IN_TRANSITION)) 122 .getRegionStatesInTransition()) { 123 if (RegionInfo.COMPARATOR.compare(rs.getRegion(), region) == 0) { 124 inTransition = true; 125 break; 126 } 127 } 128 if (!inTransition) { 129 // yay! no longer RIT 130 return; 131 } 132 // still in rit 133 LOG.info("Region still in transition, waiting for " 134 + "it to become assigned: " + region); 135 } catch (IOException e) { 136 LOG.warn("Exception when waiting for region to become assigned," 137 + " retrying", e); 138 } 139 Thread.sleep(1000); 140 } 141 throw new IOException("Region " + region + " failed to move out of " + 142 "transition within timeout " + timeout + "ms"); 143 } 144 145 /** 146 * Contacts a region server and waits up to hbase.hbck.close.timeout ms 147 * (default 120s) to close the region. This bypasses the active hmaster. 148 */ 149 @SuppressWarnings("deprecation") 150 public static void closeRegionSilentlyAndWait(Connection connection, 151 ServerName server, RegionInfo region) throws IOException, InterruptedException { 152 long timeout = connection.getConfiguration() 153 .getLong("hbase.hbck.close.timeout", 120000); 154 ServerManager.closeRegionSilentlyAndWait((ClusterConnection)connection, server, 155 region, timeout); 156 } 157 158 /** 159 * Puts the specified RegionInfo into META with replica related columns 160 */ 161 public static void fixMetaHoleOnlineAndAddReplicas(Configuration conf, 162 RegionInfo hri, Collection<ServerName> servers, int numReplicas) throws IOException { 163 Connection conn = ConnectionFactory.createConnection(conf); 164 Table meta = conn.getTable(TableName.META_TABLE_NAME); 165 Put put = MetaTableAccessor.makePutFromRegionInfo(hri, EnvironmentEdgeManager.currentTime()); 166 if (numReplicas > 1) { 167 Random r = new Random(); 168 ServerName[] serversArr = servers.toArray(new ServerName[servers.size()]); 169 for (int i = 1; i < numReplicas; i++) { 170 ServerName sn = serversArr[r.nextInt(serversArr.length)]; 171 // the column added here is just to make sure the master is able to 172 // see the additional replicas when it is asked to assign. The 173 // final value of these columns will be different and will be updated 174 // by the actual regionservers that start hosting the respective replicas 175 MetaTableAccessor.addLocation(put, sn, sn.getStartcode(), i); 176 } 177 } 178 meta.put(put); 179 meta.close(); 180 conn.close(); 181 } 182 183 /** 184 * Creates, flushes, and closes a new region. 185 */ 186 public static HRegion createHDFSRegionDir(Configuration conf, 187 RegionInfo hri, TableDescriptor htd) throws IOException { 188 // Create HRegion 189 Path root = FSUtils.getRootDir(conf); 190 HRegion region = HRegion.createHRegion(hri, root, conf, htd, null); 191 192 // Close the new region to flush to disk. Close log file too. 193 region.close(); 194 return region; 195 } 196 197 /* 198 * Remove parent 199 */ 200 public static void removeParentInMeta(Configuration conf, RegionInfo hri) throws IOException { 201 Connection conn = ConnectionFactory.createConnection(conf); 202 MetaTableAccessor.deleteRegionInfo(conn, hri); 203 } 204}