001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver.handler;
019
020import static org.apache.hadoop.hbase.regionserver.CompactSplit.HBASE_REGION_SERVER_ENABLE_COMPACTION;
021
022import java.io.IOException;
023import java.util.concurrent.atomic.AtomicBoolean;
024import org.apache.hadoop.hbase.HConstants;
025import org.apache.hadoop.hbase.Server;
026import org.apache.hadoop.hbase.client.RegionInfo;
027import org.apache.hadoop.hbase.client.TableDescriptor;
028import org.apache.hadoop.hbase.executor.EventHandler;
029import org.apache.hadoop.hbase.executor.EventType;
030import org.apache.hadoop.hbase.procedure2.Procedure;
031import org.apache.hadoop.hbase.regionserver.HRegion;
032import org.apache.hadoop.hbase.regionserver.HRegionServer;
033import org.apache.hadoop.hbase.regionserver.RegionServerServices;
034import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext;
035import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext;
036import org.apache.hadoop.hbase.util.CancelableProgressable;
037import org.apache.yetus.audience.InterfaceAudience;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
042
043/**
044 * Handles opening of a region on a region server.
045 * <p>
046 * This is executed after receiving an OPEN RPC from the master or client.
047 * @deprecated Keep it here only for compatible
048 * @see AssignRegionHandler
049 */
050@Deprecated
051@InterfaceAudience.Private
052public class OpenRegionHandler extends EventHandler {
053  private static final Logger LOG = LoggerFactory.getLogger(OpenRegionHandler.class);
054
055  protected final RegionServerServices rsServices;
056
057  private final RegionInfo regionInfo;
058  private final TableDescriptor htd;
059  private final long masterSystemTime;
060
061  public OpenRegionHandler(final Server server, final RegionServerServices rsServices,
062    RegionInfo regionInfo, TableDescriptor htd, long masterSystemTime) {
063    this(server, rsServices, regionInfo, htd, masterSystemTime, EventType.M_RS_OPEN_REGION);
064  }
065
066  protected OpenRegionHandler(final Server server, final RegionServerServices rsServices,
067    final RegionInfo regionInfo, final TableDescriptor htd, long masterSystemTime,
068    EventType eventType) {
069    super(server, eventType);
070    this.rsServices = rsServices;
071    this.regionInfo = regionInfo;
072    this.htd = htd;
073    this.masterSystemTime = masterSystemTime;
074  }
075
076  public RegionInfo getRegionInfo() {
077    return regionInfo;
078  }
079
080  @Override
081  public void process() throws IOException {
082    boolean openSuccessful = false;
083    final String regionName = regionInfo.getRegionNameAsString();
084    HRegion region = null;
085
086    try {
087      if (this.server.isStopped() || this.rsServices.isStopping()) {
088        return;
089      }
090      final String encodedName = regionInfo.getEncodedName();
091
092      // 2 different difficult situations can occur
093      // 1) The opening was cancelled. This is an expected situation
094      // 2) The region is now marked as online while we're suppose to open. This would be a bug.
095
096      // Check that this region is not already online
097      if (this.rsServices.getRegion(encodedName) != null) {
098        LOG.error(
099          "Region " + encodedName + " was already online when we started processing the opening. "
100            + "Marking this new attempt as failed");
101        return;
102      }
103
104      // Check that we're still supposed to open the region.
105      // If fails, just return. Someone stole the region from under us.
106      if (!isRegionStillOpening()) {
107        LOG.error("Region " + encodedName + " opening cancelled");
108        return;
109      }
110
111      // Open region. After a successful open, failures in subsequent
112      // processing needs to do a close as part of cleanup.
113      region = openRegion();
114      if (region == null) {
115        return;
116      }
117
118      if (
119        !updateMeta(region, masterSystemTime) || this.server.isStopped()
120          || this.rsServices.isStopping()
121      ) {
122        return;
123      }
124
125      if (!isRegionStillOpening()) {
126        return;
127      }
128
129      // Successful region open, and add it to MutableOnlineRegions
130      this.rsServices.addRegion(region);
131      openSuccessful = true;
132
133      // Done! Successful region open
134      LOG.debug("Opened " + regionName + " on " + this.server.getServerName());
135    } finally {
136      // Do all clean up here
137      if (!openSuccessful) {
138        doCleanUpOnFailedOpen(region);
139      }
140      final Boolean current = this.rsServices.getRegionsInTransitionInRS()
141        .remove(this.regionInfo.getEncodedNameAsBytes());
142
143      // Let's check if we have met a race condition on open cancellation....
144      // A better solution would be to not have any race condition.
145      // this.rsServices.getRegionsInTransitionInRS().remove(
146      // this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE);
147      // would help.
148      if (openSuccessful) {
149        if (current == null) { // Should NEVER happen, but let's be paranoid.
150          LOG.error("Bad state: we've just opened a region that was NOT in transition. Region="
151            + regionName);
152        } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're
153                                                    // really unlucky.
154          LOG.error("Race condition: we've finished to open a region, while a close was requested "
155            + " on region=" + regionName + ". It can be a critical error, as a region that"
156            + " should be closed is now opened. Closing it now");
157          cleanupFailedOpen(region);
158        }
159      }
160    }
161  }
162
163  private void doCleanUpOnFailedOpen(HRegion region) throws IOException {
164    try {
165      if (region != null) {
166        cleanupFailedOpen(region);
167      }
168    } finally {
169      rsServices
170        .reportRegionStateTransition(new RegionStateTransitionContext(TransitionCode.FAILED_OPEN,
171          HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo, -1));
172    }
173  }
174
175  /**
176   * Update ZK or META. This can take a while if for example the hbase:meta is not available -- if
177   * server hosting hbase:meta crashed and we are waiting on it to come back -- so run in a thread
178   * and keep updating znode state meantime so master doesn't timeout our region-in-transition.
179   * Caller must cleanup region if this fails.
180   */
181  private boolean updateMeta(final HRegion r, long masterSystemTime) {
182    if (this.server.isStopped() || this.rsServices.isStopping()) {
183      return false;
184    }
185    // Object we do wait/notify on. Make it boolean. If set, we're done.
186    // Else, wait.
187    final AtomicBoolean signaller = new AtomicBoolean(false);
188    PostOpenDeployTasksThread t =
189      new PostOpenDeployTasksThread(r, this.server, this.rsServices, signaller, masterSystemTime);
190    t.start();
191    // Post open deploy task:
192    // meta => update meta location in ZK
193    // other region => update meta
194    while (
195      !signaller.get() && t.isAlive() && !this.server.isStopped() && !this.rsServices.isStopping()
196        && isRegionStillOpening()
197    ) {
198      synchronized (signaller) {
199        try {
200          // Wait for 10 seconds, so that server shutdown
201          // won't take too long if this thread happens to run.
202          if (!signaller.get()) signaller.wait(10000);
203        } catch (InterruptedException e) {
204          // Go to the loop check.
205        }
206      }
207    }
208    // Is thread still alive? We may have left above loop because server is
209    // stopping or we timed out the edit. Is so, interrupt it.
210    if (t.isAlive()) {
211      if (!signaller.get()) {
212        // Thread still running; interrupt
213        LOG.debug("Interrupting thread " + t);
214        t.interrupt();
215      }
216      try {
217        t.join();
218      } catch (InterruptedException ie) {
219        LOG.warn("Interrupted joining " + r.getRegionInfo().getRegionNameAsString(), ie);
220        Thread.currentThread().interrupt();
221      }
222    }
223
224    // Was there an exception opening the region? This should trigger on
225    // InterruptedException too. If so, we failed.
226    return (!Thread.interrupted() && t.getException() == null);
227  }
228
229  /**
230   * Thread to run region post open tasks. Call {@link #getException()} after the thread finishes to
231   * check for exceptions running
232   * {@link RegionServerServices#postOpenDeployTasks(PostOpenDeployContext)}
233   */
234  static class PostOpenDeployTasksThread extends Thread {
235    private Throwable exception = null;
236    private final Server server;
237    private final RegionServerServices services;
238    private final HRegion region;
239    private final AtomicBoolean signaller;
240    private final long masterSystemTime;
241
242    PostOpenDeployTasksThread(final HRegion region, final Server server,
243      final RegionServerServices services, final AtomicBoolean signaller, long masterSystemTime) {
244      super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
245      this.setDaemon(true);
246      this.server = server;
247      this.services = services;
248      this.region = region;
249      this.signaller = signaller;
250      this.masterSystemTime = masterSystemTime;
251    }
252
253    @Override
254    public void run() {
255      try {
256        this.services.postOpenDeployTasks(
257          new PostOpenDeployContext(region, Procedure.NO_PROC_ID, masterSystemTime, -1));
258      } catch (Throwable e) {
259        String msg = "Exception running postOpenDeployTasks; region="
260          + this.region.getRegionInfo().getEncodedName();
261        this.exception = e;
262        if (e instanceof IOException && isRegionStillOpening(region.getRegionInfo(), services)) {
263          server.abort(msg, e);
264        } else {
265          LOG.warn(msg, e);
266        }
267      }
268      // We're done. Set flag then wake up anyone waiting on thread to complete.
269      this.signaller.set(true);
270      synchronized (this.signaller) {
271        this.signaller.notify();
272      }
273    }
274
275    /** Returns Null or the run exception; call this method after thread is done. */
276    Throwable getException() {
277      return this.exception;
278    }
279  }
280
281  /** Returns Instance of HRegion if successful open else null. */
282  private HRegion openRegion() {
283    HRegion region = null;
284    boolean compactionEnabled =
285      ((HRegionServer) server).getCompactSplitThread().isCompactionsEnabled();
286    this.server.getConfiguration().setBoolean(HBASE_REGION_SERVER_ENABLE_COMPACTION,
287      compactionEnabled);
288    try {
289      // Instantiate the region. This also periodically tickles OPENING
290      // state so master doesn't timeout this region in transition.
291      region =
292        HRegion.openHRegion(this.regionInfo, this.htd, this.rsServices.getWAL(this.regionInfo),
293          this.server.getConfiguration(), this.rsServices, new CancelableProgressable() {
294            @Override
295            public boolean progress() {
296              if (!isRegionStillOpening()) {
297                LOG.warn("Open region aborted since it isn't opening any more");
298                return false;
299              }
300              return true;
301            }
302          });
303    } catch (Throwable t) {
304      // We failed open. Our caller will see the 'null' return value
305      // and transition the node back to FAILED_OPEN. If that fails,
306      // we rely on the Timeout Monitor in the master to reassign.
307      LOG.error("Failed open of region=" + this.regionInfo.getRegionNameAsString(), t);
308    }
309    return region;
310  }
311
312  private void cleanupFailedOpen(final HRegion region) throws IOException {
313    if (region != null) {
314      this.rsServices.removeRegion(region, null);
315      region.close();
316    }
317  }
318
319  private static boolean isRegionStillOpening(RegionInfo regionInfo,
320    RegionServerServices rsServices) {
321    byte[] encodedName = regionInfo.getEncodedNameAsBytes();
322    Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
323    return Boolean.TRUE.equals(action); // true means opening for RIT
324  }
325
326  private boolean isRegionStillOpening() {
327    return isRegionStillOpening(regionInfo, rsServices);
328  }
329}