001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collections;
023import java.util.List;
024import java.util.stream.Collectors;
025
026import org.apache.hadoop.hbase.HRegionLocation;
027import org.apache.hadoop.hbase.TableName;
028import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
029import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
030import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
031import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
032import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
033import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
034import org.apache.hadoop.hbase.util.Bytes;
035import org.apache.hadoop.hbase.util.RetryCounter;
036import org.apache.yetus.audience.InterfaceAudience;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;
041
042import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
043import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ReopenTableRegionsState;
044import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ReopenTableRegionsStateData;
045import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
046
047/**
048 * Used for reopening the regions for a table.
049 */
050@InterfaceAudience.Private
051public class ReopenTableRegionsProcedure
052    extends AbstractStateMachineTableProcedure<ReopenTableRegionsState> {
053
054  private static final Logger LOG = LoggerFactory.getLogger(ReopenTableRegionsProcedure.class);
055
056  private TableName tableName;
057
058  // Specify specific regions of a table to reopen.
059  // if specified null, all regions of the table will be reopened.
060  private final List<byte[]> regionNames;
061
062  private List<HRegionLocation> regions = Collections.emptyList();
063
064  private RetryCounter retryCounter;
065
066  public ReopenTableRegionsProcedure() {
067    regionNames = null;
068  }
069
070  public ReopenTableRegionsProcedure(TableName tableName) {
071    this.tableName = tableName;
072    this.regionNames = null;
073  }
074
075  public ReopenTableRegionsProcedure(final TableName tableName,
076      final List<byte[]> regionNames) {
077    this.tableName = tableName;
078    this.regionNames = regionNames;
079  }
080
081  @Override
082  public TableName getTableName() {
083    return tableName;
084  }
085
086  @Override
087  public TableOperationType getTableOperationType() {
088    return TableOperationType.REGION_EDIT;
089  }
090
091  private boolean canSchedule(MasterProcedureEnv env, HRegionLocation loc) {
092    if (loc.getSeqNum() < 0) {
093      return false;
094    }
095    RegionStateNode regionNode =
096      env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
097    // If the region node is null, then at least in the next round we can remove this region to make
098    // progress. And the second condition is a normal one, if there are no TRSP with it then we can
099    // schedule one to make progress.
100    return regionNode == null || !regionNode.isInTransition();
101  }
102
103  @Override
104  protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state)
105      throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
106    switch (state) {
107      case REOPEN_TABLE_REGIONS_GET_REGIONS:
108        if (!env.getAssignmentManager().isTableEnabled(tableName)) {
109          LOG.info("Table {} is disabled, give up reopening its regions", tableName);
110          return Flow.NO_MORE_STATE;
111        }
112        List<HRegionLocation> tableRegions = env.getAssignmentManager()
113          .getRegionStates().getRegionsOfTableForReopen(tableName);
114        regions = getRegionLocationsForReopen(tableRegions);
115        setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
116        return Flow.HAS_MORE_STATE;
117      case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
118        for (HRegionLocation loc : regions) {
119          RegionStateNode regionNode =
120            env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
121          // this possible, maybe the region has already been merged or split, see HBASE-20921
122          if (regionNode == null) {
123            continue;
124          }
125          TransitRegionStateProcedure proc;
126          regionNode.lock();
127          try {
128            if (regionNode.getProcedure() != null) {
129              continue;
130            }
131            proc = TransitRegionStateProcedure.reopen(env, regionNode.getRegionInfo());
132            regionNode.setProcedure(proc);
133          } finally {
134            regionNode.unlock();
135          }
136          addChildProcedure(proc);
137        }
138        setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_CONFIRM_REOPENED);
139        return Flow.HAS_MORE_STATE;
140      case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED:
141        regions = regions.stream().map(env.getAssignmentManager().getRegionStates()::checkReopened)
142          .filter(l -> l != null).collect(Collectors.toList());
143        if (regions.isEmpty()) {
144          return Flow.NO_MORE_STATE;
145        }
146        if (regions.stream().anyMatch(loc -> canSchedule(env, loc))) {
147          retryCounter = null;
148          setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
149          return Flow.HAS_MORE_STATE;
150        }
151        // We can not schedule TRSP for all the regions need to reopen, wait for a while and retry
152        // again.
153        if (retryCounter == null) {
154          retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
155        }
156        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
157        LOG.info(
158          "There are still {} region(s) which need to be reopened for table {} are in " +
159            "OPENING state, suspend {}secs and try again later",
160          regions.size(), tableName, backoff / 1000);
161        setTimeout(Math.toIntExact(backoff));
162        setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
163        skipPersistence();
164        throw new ProcedureSuspendedException();
165      default:
166        throw new UnsupportedOperationException("unhandled state=" + state);
167    }
168  }
169
170  private List<HRegionLocation> getRegionLocationsForReopen(
171      List<HRegionLocation> tableRegionsForReopen) {
172
173    List<HRegionLocation> regionsToReopen = new ArrayList<>();
174    if (CollectionUtils.isNotEmpty(regionNames) &&
175      CollectionUtils.isNotEmpty(tableRegionsForReopen)) {
176      for (byte[] regionName : regionNames) {
177        for (HRegionLocation hRegionLocation : tableRegionsForReopen) {
178          if (Bytes.equals(regionName, hRegionLocation.getRegion().getRegionName())) {
179            regionsToReopen.add(hRegionLocation);
180            break;
181          }
182        }
183      }
184    } else {
185      regionsToReopen = tableRegionsForReopen;
186    }
187    return regionsToReopen;
188  }
189
190  /**
191   * At end of timeout, wake ourselves up so we run again.
192   */
193  @Override
194  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
195    setState(ProcedureProtos.ProcedureState.RUNNABLE);
196    env.getProcedureScheduler().addFront(this);
197    return false; // 'false' means that this procedure handled the timeout
198  }
199
200  @Override
201  protected void rollbackState(MasterProcedureEnv env, ReopenTableRegionsState state)
202      throws IOException, InterruptedException {
203    throw new UnsupportedOperationException("unhandled state=" + state);
204  }
205
206  @Override
207  protected ReopenTableRegionsState getState(int stateId) {
208    return ReopenTableRegionsState.forNumber(stateId);
209  }
210
211  @Override
212  protected int getStateId(ReopenTableRegionsState state) {
213    return state.getNumber();
214  }
215
216  @Override
217  protected ReopenTableRegionsState getInitialState() {
218    return ReopenTableRegionsState.REOPEN_TABLE_REGIONS_GET_REGIONS;
219  }
220
221  @Override
222  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
223    super.serializeStateData(serializer);
224    ReopenTableRegionsStateData.Builder builder = ReopenTableRegionsStateData.newBuilder()
225      .setTableName(ProtobufUtil.toProtoTableName(tableName));
226    regions.stream().map(ProtobufUtil::toRegionLocation).forEachOrdered(builder::addRegion);
227    serializer.serialize(builder.build());
228  }
229
230  @Override
231  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
232    super.deserializeStateData(serializer);
233    ReopenTableRegionsStateData data = serializer.deserialize(ReopenTableRegionsStateData.class);
234    tableName = ProtobufUtil.toTableName(data.getTableName());
235    regions = data.getRegionList().stream().map(ProtobufUtil::toRegionLocation)
236      .collect(Collectors.toList());
237  }
238}