001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver.handler;
019
020import java.io.IOException;
021import java.util.concurrent.atomic.AtomicBoolean;
022import org.apache.hadoop.hbase.HConstants;
023import org.apache.hadoop.hbase.Server;
024import org.apache.hadoop.hbase.client.RegionInfo;
025import org.apache.hadoop.hbase.client.TableDescriptor;
026import org.apache.hadoop.hbase.executor.EventHandler;
027import org.apache.hadoop.hbase.executor.EventType;
028import org.apache.hadoop.hbase.procedure2.Procedure;
029import org.apache.hadoop.hbase.regionserver.HRegion;
030import org.apache.hadoop.hbase.regionserver.RegionServerServices;
031import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext;
032import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext;
033import org.apache.hadoop.hbase.util.CancelableProgressable;
034import org.apache.yetus.audience.InterfaceAudience;
035import org.slf4j.Logger;
036import org.slf4j.LoggerFactory;
037
038import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
039
040/**
041 * Handles opening of a region on a region server.
042 * <p>
043 * This is executed after receiving an OPEN RPC from the master or client.
044 * @deprecated Keep it here only for compatible
045 * @see AssignRegionHandler
046 */
047@Deprecated
048@InterfaceAudience.Private
049public class OpenRegionHandler extends EventHandler {
050  private static final Logger LOG = LoggerFactory.getLogger(OpenRegionHandler.class);
051
052  protected final RegionServerServices rsServices;
053
054  private final RegionInfo regionInfo;
055  private final TableDescriptor htd;
056  private final long masterSystemTime;
057
058  public OpenRegionHandler(final Server server, final RegionServerServices rsServices,
059    RegionInfo regionInfo, TableDescriptor htd, long masterSystemTime) {
060    this(server, rsServices, regionInfo, htd, masterSystemTime, EventType.M_RS_OPEN_REGION);
061  }
062
063  protected OpenRegionHandler(final Server server, final RegionServerServices rsServices,
064    final RegionInfo regionInfo, final TableDescriptor htd, long masterSystemTime,
065    EventType eventType) {
066    super(server, eventType);
067    this.rsServices = rsServices;
068    this.regionInfo = regionInfo;
069    this.htd = htd;
070    this.masterSystemTime = masterSystemTime;
071  }
072
073  public RegionInfo getRegionInfo() {
074    return regionInfo;
075  }
076
077  @Override
078  public void process() throws IOException {
079    boolean openSuccessful = false;
080    final String regionName = regionInfo.getRegionNameAsString();
081    HRegion region = null;
082
083    try {
084      if (this.server.isStopped() || this.rsServices.isStopping()) {
085        return;
086      }
087      final String encodedName = regionInfo.getEncodedName();
088
089      // 2 different difficult situations can occur
090      // 1) The opening was cancelled. This is an expected situation
091      // 2) The region is now marked as online while we're suppose to open. This would be a bug.
092
093      // Check that this region is not already online
094      if (this.rsServices.getRegion(encodedName) != null) {
095        LOG.error(
096          "Region " + encodedName + " was already online when we started processing the opening. "
097            + "Marking this new attempt as failed");
098        return;
099      }
100
101      // Check that we're still supposed to open the region.
102      // If fails, just return. Someone stole the region from under us.
103      if (!isRegionStillOpening()) {
104        LOG.error("Region " + encodedName + " opening cancelled");
105        return;
106      }
107
108      // Open region. After a successful open, failures in subsequent
109      // processing needs to do a close as part of cleanup.
110      region = openRegion();
111      if (region == null) {
112        return;
113      }
114
115      if (
116        !updateMeta(region, masterSystemTime) || this.server.isStopped()
117          || this.rsServices.isStopping()
118      ) {
119        return;
120      }
121
122      if (!isRegionStillOpening()) {
123        return;
124      }
125
126      // Successful region open, and add it to MutableOnlineRegions
127      this.rsServices.addRegion(region);
128      openSuccessful = true;
129
130      // Done! Successful region open
131      LOG.debug("Opened " + regionName + " on " + this.server.getServerName());
132    } finally {
133      // Do all clean up here
134      if (!openSuccessful) {
135        doCleanUpOnFailedOpen(region);
136      }
137      final Boolean current = this.rsServices.getRegionsInTransitionInRS()
138        .remove(this.regionInfo.getEncodedNameAsBytes());
139
140      // Let's check if we have met a race condition on open cancellation....
141      // A better solution would be to not have any race condition.
142      // this.rsServices.getRegionsInTransitionInRS().remove(
143      // this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE);
144      // would help.
145      if (openSuccessful) {
146        if (current == null) { // Should NEVER happen, but let's be paranoid.
147          LOG.error("Bad state: we've just opened a region that was NOT in transition. Region="
148            + regionName);
149        } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're
150                                                    // really unlucky.
151          LOG.error("Race condition: we've finished to open a region, while a close was requested "
152            + " on region=" + regionName + ". It can be a critical error, as a region that"
153            + " should be closed is now opened. Closing it now");
154          cleanupFailedOpen(region);
155        }
156      }
157    }
158  }
159
160  private void doCleanUpOnFailedOpen(HRegion region) throws IOException {
161    try {
162      if (region != null) {
163        cleanupFailedOpen(region);
164      }
165    } finally {
166      rsServices.reportRegionStateTransition(new RegionStateTransitionContext(
167        TransitionCode.FAILED_OPEN, HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo));
168    }
169  }
170
171  /**
172   * Update ZK or META. This can take a while if for example the hbase:meta is not available -- if
173   * server hosting hbase:meta crashed and we are waiting on it to come back -- so run in a thread
174   * and keep updating znode state meantime so master doesn't timeout our region-in-transition.
175   * Caller must cleanup region if this fails.
176   */
177  private boolean updateMeta(final HRegion r, long masterSystemTime) {
178    if (this.server.isStopped() || this.rsServices.isStopping()) {
179      return false;
180    }
181    // Object we do wait/notify on. Make it boolean. If set, we're done.
182    // Else, wait.
183    final AtomicBoolean signaller = new AtomicBoolean(false);
184    PostOpenDeployTasksThread t =
185      new PostOpenDeployTasksThread(r, this.server, this.rsServices, signaller, masterSystemTime);
186    t.start();
187    // Post open deploy task:
188    // meta => update meta location in ZK
189    // other region => update meta
190    while (
191      !signaller.get() && t.isAlive() && !this.server.isStopped() && !this.rsServices.isStopping()
192        && isRegionStillOpening()
193    ) {
194      synchronized (signaller) {
195        try {
196          // Wait for 10 seconds, so that server shutdown
197          // won't take too long if this thread happens to run.
198          if (!signaller.get()) signaller.wait(10000);
199        } catch (InterruptedException e) {
200          // Go to the loop check.
201        }
202      }
203    }
204    // Is thread still alive? We may have left above loop because server is
205    // stopping or we timed out the edit. Is so, interrupt it.
206    if (t.isAlive()) {
207      if (!signaller.get()) {
208        // Thread still running; interrupt
209        LOG.debug("Interrupting thread " + t);
210        t.interrupt();
211      }
212      try {
213        t.join();
214      } catch (InterruptedException ie) {
215        LOG.warn("Interrupted joining " + r.getRegionInfo().getRegionNameAsString(), ie);
216        Thread.currentThread().interrupt();
217      }
218    }
219
220    // Was there an exception opening the region? This should trigger on
221    // InterruptedException too. If so, we failed.
222    return (!Thread.interrupted() && t.getException() == null);
223  }
224
225  /**
226   * Thread to run region post open tasks. Call {@link #getException()} after the thread finishes to
227   * check for exceptions running
228   * {@link RegionServerServices#postOpenDeployTasks(PostOpenDeployContext)}
229   */
230  static class PostOpenDeployTasksThread extends Thread {
231    private Throwable exception = null;
232    private final Server server;
233    private final RegionServerServices services;
234    private final HRegion region;
235    private final AtomicBoolean signaller;
236    private final long masterSystemTime;
237
238    PostOpenDeployTasksThread(final HRegion region, final Server server,
239      final RegionServerServices services, final AtomicBoolean signaller, long masterSystemTime) {
240      super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
241      this.setDaemon(true);
242      this.server = server;
243      this.services = services;
244      this.region = region;
245      this.signaller = signaller;
246      this.masterSystemTime = masterSystemTime;
247    }
248
249    @Override
250    public void run() {
251      try {
252        this.services.postOpenDeployTasks(
253          new PostOpenDeployContext(region, Procedure.NO_PROC_ID, masterSystemTime));
254      } catch (Throwable e) {
255        String msg = "Exception running postOpenDeployTasks; region="
256          + this.region.getRegionInfo().getEncodedName();
257        this.exception = e;
258        if (e instanceof IOException && isRegionStillOpening(region.getRegionInfo(), services)) {
259          server.abort(msg, e);
260        } else {
261          LOG.warn(msg, e);
262        }
263      }
264      // We're done. Set flag then wake up anyone waiting on thread to complete.
265      this.signaller.set(true);
266      synchronized (this.signaller) {
267        this.signaller.notify();
268      }
269    }
270
271    /** Returns Null or the run exception; call this method after thread is done. */
272    Throwable getException() {
273      return this.exception;
274    }
275  }
276
277  /** Returns Instance of HRegion if successful open else null. */
278  private HRegion openRegion() {
279    HRegion region = null;
280    try {
281      // Instantiate the region. This also periodically tickles OPENING
282      // state so master doesn't timeout this region in transition.
283      region =
284        HRegion.openHRegion(this.regionInfo, this.htd, this.rsServices.getWAL(this.regionInfo),
285          this.server.getConfiguration(), this.rsServices, new CancelableProgressable() {
286            @Override
287            public boolean progress() {
288              if (!isRegionStillOpening()) {
289                LOG.warn("Open region aborted since it isn't opening any more");
290                return false;
291              }
292              return true;
293            }
294          });
295    } catch (Throwable t) {
296      // We failed open. Our caller will see the 'null' return value
297      // and transition the node back to FAILED_OPEN. If that fails,
298      // we rely on the Timeout Monitor in the master to reassign.
299      LOG.error("Failed open of region=" + this.regionInfo.getRegionNameAsString(), t);
300    }
301    return region;
302  }
303
304  private void cleanupFailedOpen(final HRegion region) throws IOException {
305    if (region != null) {
306      this.rsServices.removeRegion(region, null);
307      region.close();
308    }
309  }
310
311  private static boolean isRegionStillOpening(RegionInfo regionInfo,
312    RegionServerServices rsServices) {
313    byte[] encodedName = regionInfo.getEncodedNameAsBytes();
314    Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
315    return Boolean.TRUE.equals(action); // true means opening for RIT
316  }
317
318  private boolean isRegionStillOpening() {
319    return isRegionStillOpening(regionInfo, rsServices);
320  }
321}