001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.procedure; 019 020import java.io.IOException; 021import java.util.Collections; 022import java.util.List; 023import java.util.stream.Collectors; 024import org.apache.hadoop.hbase.HRegionLocation; 025import org.apache.hadoop.hbase.TableName; 026import org.apache.hadoop.hbase.master.assignment.RegionStateNode; 027import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure; 028import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer; 029import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException; 030import org.apache.hadoop.hbase.procedure2.ProcedureUtil; 031import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; 032import org.apache.hadoop.hbase.util.RetryCounter; 033import org.apache.yetus.audience.InterfaceAudience; 034import org.slf4j.Logger; 035import org.slf4j.LoggerFactory; 036 037import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil; 038import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ReopenTableRegionsState; 039import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ReopenTableRegionsStateData; 040import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos; 041 042/** 043 * Used for reopening the regions for a table. 044 */ 045@InterfaceAudience.Private 046public class ReopenTableRegionsProcedure 047 extends AbstractStateMachineTableProcedure<ReopenTableRegionsState> { 048 049 private static final Logger LOG = LoggerFactory.getLogger(ReopenTableRegionsProcedure.class); 050 051 private TableName tableName; 052 053 private List<HRegionLocation> regions = Collections.emptyList(); 054 055 private RetryCounter retryCounter; 056 057 public ReopenTableRegionsProcedure() { 058 } 059 060 public ReopenTableRegionsProcedure(TableName tableName) { 061 this.tableName = tableName; 062 } 063 064 @Override 065 public TableName getTableName() { 066 return tableName; 067 } 068 069 @Override 070 public TableOperationType getTableOperationType() { 071 return TableOperationType.REGION_EDIT; 072 } 073 074 private boolean canSchedule(MasterProcedureEnv env, HRegionLocation loc) { 075 if (loc.getSeqNum() < 0) { 076 return false; 077 } 078 RegionStateNode regionNode = 079 env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion()); 080 // If the region node is null, then at least in the next round we can remove this region to make 081 // progress. And the second condition is a normal one, if there are no TRSP with it then we can 082 // schedule one to make progress. 083 return regionNode == null || !regionNode.isInTransition(); 084 } 085 086 @Override 087 protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state) 088 throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException { 089 switch (state) { 090 case REOPEN_TABLE_REGIONS_GET_REGIONS: 091 if (!env.getAssignmentManager().isTableEnabled(tableName)) { 092 LOG.info("Table {} is disabled, give up reopening its regions", tableName); 093 return Flow.NO_MORE_STATE; 094 } 095 regions = 096 env.getAssignmentManager().getRegionStates().getRegionsOfTableForReopen(tableName); 097 setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS); 098 return Flow.HAS_MORE_STATE; 099 case REOPEN_TABLE_REGIONS_REOPEN_REGIONS: 100 for (HRegionLocation loc : regions) { 101 RegionStateNode regionNode = 102 env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion()); 103 // this possible, maybe the region has already been merged or split, see HBASE-20921 104 if (regionNode == null) { 105 continue; 106 } 107 TransitRegionStateProcedure proc; 108 regionNode.lock(); 109 try { 110 if (regionNode.getProcedure() != null) { 111 continue; 112 } 113 proc = TransitRegionStateProcedure.reopen(env, regionNode.getRegionInfo()); 114 regionNode.setProcedure(proc); 115 } finally { 116 regionNode.unlock(); 117 } 118 addChildProcedure(proc); 119 } 120 setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_CONFIRM_REOPENED); 121 return Flow.HAS_MORE_STATE; 122 case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED: 123 regions = regions.stream().map(env.getAssignmentManager().getRegionStates()::checkReopened) 124 .filter(l -> l != null).collect(Collectors.toList()); 125 if (regions.isEmpty()) { 126 return Flow.NO_MORE_STATE; 127 } 128 if (regions.stream().anyMatch(loc -> canSchedule(env, loc))) { 129 retryCounter = null; 130 setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS); 131 return Flow.HAS_MORE_STATE; 132 } 133 // We can not schedule TRSP for all the regions need to reopen, wait for a while and retry 134 // again. 135 if (retryCounter == null) { 136 retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration()); 137 } 138 long backoff = retryCounter.getBackoffTimeAndIncrementAttempts(); 139 LOG.info( 140 "There are still {} region(s) which need to be reopened for table {} are in " + 141 "OPENING state, suspend {}secs and try again later", 142 regions.size(), tableName, backoff / 1000); 143 setTimeout(Math.toIntExact(backoff)); 144 setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT); 145 skipPersistence(); 146 throw new ProcedureSuspendedException(); 147 default: 148 throw new UnsupportedOperationException("unhandled state=" + state); 149 } 150 } 151 152 /** 153 * At end of timeout, wake ourselves up so we run again. 154 */ 155 @Override 156 protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) { 157 setState(ProcedureProtos.ProcedureState.RUNNABLE); 158 env.getProcedureScheduler().addFront(this); 159 return false; // 'false' means that this procedure handled the timeout 160 } 161 162 @Override 163 protected void rollbackState(MasterProcedureEnv env, ReopenTableRegionsState state) 164 throws IOException, InterruptedException { 165 throw new UnsupportedOperationException("unhandled state=" + state); 166 } 167 168 @Override 169 protected ReopenTableRegionsState getState(int stateId) { 170 return ReopenTableRegionsState.forNumber(stateId); 171 } 172 173 @Override 174 protected int getStateId(ReopenTableRegionsState state) { 175 return state.getNumber(); 176 } 177 178 @Override 179 protected ReopenTableRegionsState getInitialState() { 180 return ReopenTableRegionsState.REOPEN_TABLE_REGIONS_GET_REGIONS; 181 } 182 183 @Override 184 protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException { 185 super.serializeStateData(serializer); 186 ReopenTableRegionsStateData.Builder builder = ReopenTableRegionsStateData.newBuilder() 187 .setTableName(ProtobufUtil.toProtoTableName(tableName)); 188 regions.stream().map(ProtobufUtil::toRegionLocation).forEachOrdered(builder::addRegion); 189 serializer.serialize(builder.build()); 190 } 191 192 @Override 193 protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException { 194 super.deserializeStateData(serializer); 195 ReopenTableRegionsStateData data = serializer.deserialize(ReopenTableRegionsStateData.class); 196 tableName = ProtobufUtil.toTableName(data.getTableName()); 197 regions = data.getRegionList().stream().map(ProtobufUtil::toRegionLocation) 198 .collect(Collectors.toList()); 199 } 200}