001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collections;
023import java.util.List;
024import java.util.stream.Collectors;
025import org.apache.hadoop.hbase.HRegionLocation;
026import org.apache.hadoop.hbase.TableName;
027import org.apache.hadoop.hbase.master.assignment.RegionStateNode;
028import org.apache.hadoop.hbase.master.assignment.TransitRegionStateProcedure;
029import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
030import org.apache.hadoop.hbase.procedure2.ProcedureSuspendedException;
031import org.apache.hadoop.hbase.procedure2.ProcedureUtil;
032import org.apache.hadoop.hbase.procedure2.ProcedureYieldException;
033import org.apache.hadoop.hbase.util.Bytes;
034import org.apache.hadoop.hbase.util.RetryCounter;
035import org.apache.yetus.audience.InterfaceAudience;
036import org.slf4j.Logger;
037import org.slf4j.LoggerFactory;
038
039import org.apache.hbase.thirdparty.com.google.protobuf.ByteString;
040import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils;
041
042import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
043import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ReopenTableRegionsState;
044import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.ReopenTableRegionsStateData;
045import org.apache.hadoop.hbase.shaded.protobuf.generated.ProcedureProtos;
046
047/**
048 * Used for reopening the regions for a table.
049 */
050@InterfaceAudience.Private
051public class ReopenTableRegionsProcedure
052  extends AbstractStateMachineTableProcedure<ReopenTableRegionsState> {
053
054  private static final Logger LOG = LoggerFactory.getLogger(ReopenTableRegionsProcedure.class);
055
056  private TableName tableName;
057
058  // Specify specific regions of a table to reopen.
059  // if specified null, all regions of the table will be reopened.
060  private List<byte[]> regionNames;
061
062  private List<HRegionLocation> regions = Collections.emptyList();
063
064  private RetryCounter retryCounter;
065
066  public ReopenTableRegionsProcedure() {
067    regionNames = Collections.emptyList();
068  }
069
070  public ReopenTableRegionsProcedure(TableName tableName) {
071    this.tableName = tableName;
072    this.regionNames = Collections.emptyList();
073  }
074
075  public ReopenTableRegionsProcedure(final TableName tableName, final List<byte[]> regionNames) {
076    this.tableName = tableName;
077    this.regionNames = regionNames;
078  }
079
080  @Override
081  public TableName getTableName() {
082    return tableName;
083  }
084
085  @Override
086  public TableOperationType getTableOperationType() {
087    return TableOperationType.REGION_EDIT;
088  }
089
090  private boolean canSchedule(MasterProcedureEnv env, HRegionLocation loc) {
091    if (loc.getSeqNum() < 0) {
092      return false;
093    }
094    RegionStateNode regionNode =
095      env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
096    // If the region node is null, then at least in the next round we can remove this region to make
097    // progress. And the second condition is a normal one, if there are no TRSP with it then we can
098    // schedule one to make progress.
099    return regionNode == null || !regionNode.isInTransition();
100  }
101
102  @Override
103  protected Flow executeFromState(MasterProcedureEnv env, ReopenTableRegionsState state)
104    throws ProcedureSuspendedException, ProcedureYieldException, InterruptedException {
105    switch (state) {
106      case REOPEN_TABLE_REGIONS_GET_REGIONS:
107        if (!isTableEnabled(env)) {
108          LOG.info("Table {} is disabled, give up reopening its regions", tableName);
109          return Flow.NO_MORE_STATE;
110        }
111        List<HRegionLocation> tableRegions =
112          env.getAssignmentManager().getRegionStates().getRegionsOfTableForReopen(tableName);
113        regions = getRegionLocationsForReopen(tableRegions);
114        setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
115        return Flow.HAS_MORE_STATE;
116      case REOPEN_TABLE_REGIONS_REOPEN_REGIONS:
117        for (HRegionLocation loc : regions) {
118          RegionStateNode regionNode =
119            env.getAssignmentManager().getRegionStates().getRegionStateNode(loc.getRegion());
120          // this possible, maybe the region has already been merged or split, see HBASE-20921
121          if (regionNode == null) {
122            continue;
123          }
124          TransitRegionStateProcedure proc;
125          regionNode.lock();
126          try {
127            if (regionNode.getProcedure() != null) {
128              continue;
129            }
130            proc = TransitRegionStateProcedure.reopen(env, regionNode.getRegionInfo());
131            regionNode.setProcedure(proc);
132          } finally {
133            regionNode.unlock();
134          }
135          addChildProcedure(proc);
136        }
137        setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_CONFIRM_REOPENED);
138        return Flow.HAS_MORE_STATE;
139      case REOPEN_TABLE_REGIONS_CONFIRM_REOPENED:
140        regions = regions.stream().map(env.getAssignmentManager().getRegionStates()::checkReopened)
141          .filter(l -> l != null).collect(Collectors.toList());
142        if (regions.isEmpty()) {
143          return Flow.NO_MORE_STATE;
144        }
145        if (regions.stream().anyMatch(loc -> canSchedule(env, loc))) {
146          retryCounter = null;
147          setNextState(ReopenTableRegionsState.REOPEN_TABLE_REGIONS_REOPEN_REGIONS);
148          return Flow.HAS_MORE_STATE;
149        }
150        // We can not schedule TRSP for all the regions need to reopen, wait for a while and retry
151        // again.
152        if (retryCounter == null) {
153          retryCounter = ProcedureUtil.createRetryCounter(env.getMasterConfiguration());
154        }
155        long backoff = retryCounter.getBackoffTimeAndIncrementAttempts();
156        LOG.info(
157          "There are still {} region(s) which need to be reopened for table {} are in "
158            + "OPENING state, suspend {}secs and try again later",
159          regions.size(), tableName, backoff / 1000);
160        setTimeout(Math.toIntExact(backoff));
161        setState(ProcedureProtos.ProcedureState.WAITING_TIMEOUT);
162        skipPersistence();
163        throw new ProcedureSuspendedException();
164      default:
165        throw new UnsupportedOperationException("unhandled state=" + state);
166    }
167  }
168
169  private List<HRegionLocation>
170    getRegionLocationsForReopen(List<HRegionLocation> tableRegionsForReopen) {
171
172    List<HRegionLocation> regionsToReopen = new ArrayList<>();
173    if (
174      CollectionUtils.isNotEmpty(regionNames) && CollectionUtils.isNotEmpty(tableRegionsForReopen)
175    ) {
176      for (byte[] regionName : regionNames) {
177        for (HRegionLocation hRegionLocation : tableRegionsForReopen) {
178          if (Bytes.equals(regionName, hRegionLocation.getRegion().getRegionName())) {
179            regionsToReopen.add(hRegionLocation);
180            break;
181          }
182        }
183      }
184    } else {
185      regionsToReopen = tableRegionsForReopen;
186    }
187    return regionsToReopen;
188  }
189
190  /**
191   * At end of timeout, wake ourselves up so we run again.
192   */
193  @Override
194  protected synchronized boolean setTimeoutFailure(MasterProcedureEnv env) {
195    setState(ProcedureProtos.ProcedureState.RUNNABLE);
196    env.getProcedureScheduler().addFront(this);
197    return false; // 'false' means that this procedure handled the timeout
198  }
199
200  @Override
201  protected void rollbackState(MasterProcedureEnv env, ReopenTableRegionsState state)
202    throws IOException, InterruptedException {
203    throw new UnsupportedOperationException("unhandled state=" + state);
204  }
205
206  @Override
207  protected ReopenTableRegionsState getState(int stateId) {
208    return ReopenTableRegionsState.forNumber(stateId);
209  }
210
211  @Override
212  protected int getStateId(ReopenTableRegionsState state) {
213    return state.getNumber();
214  }
215
216  @Override
217  protected ReopenTableRegionsState getInitialState() {
218    return ReopenTableRegionsState.REOPEN_TABLE_REGIONS_GET_REGIONS;
219  }
220
221  @Override
222  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
223    super.serializeStateData(serializer);
224    ReopenTableRegionsStateData.Builder builder = ReopenTableRegionsStateData.newBuilder()
225      .setTableName(ProtobufUtil.toProtoTableName(tableName));
226    regions.stream().map(ProtobufUtil::toRegionLocation).forEachOrdered(builder::addRegion);
227    if (CollectionUtils.isNotEmpty(regionNames)) {
228      // As of this writing, wrapping this statement withing if condition is only required
229      // for backward compatibility as we used to have 'regionNames' as null for cases
230      // where all regions of given table should be reopened. Now, we have kept emptyList()
231      // for 'regionNames' to indicate all regions of given table should be reopened unless
232      // 'regionNames' contains at least one specific region, in which case only list of regions
233      // that 'regionNames' contain should be reopened, not all regions of given table.
234      // Now, we don't need this check since we are not dealing with null 'regionNames' and hence,
235      // guarding by this if condition can be removed in HBase 4.0.0.
236      regionNames.stream().map(ByteString::copyFrom).forEachOrdered(builder::addRegionNames);
237    }
238    serializer.serialize(builder.build());
239  }
240
241  @Override
242  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
243    super.deserializeStateData(serializer);
244    ReopenTableRegionsStateData data = serializer.deserialize(ReopenTableRegionsStateData.class);
245    tableName = ProtobufUtil.toTableName(data.getTableName());
246    regions = data.getRegionList().stream().map(ProtobufUtil::toRegionLocation)
247      .collect(Collectors.toList());
248    if (CollectionUtils.isNotEmpty(data.getRegionNamesList())) {
249      regionNames = data.getRegionNamesList().stream().map(ByteString::toByteArray)
250        .collect(Collectors.toList());
251    } else {
252      regionNames = Collections.emptyList();
253    }
254  }
255}