001/**
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one
004 * or more contributor license agreements.  See the NOTICE file
005 * distributed with this work for additional information
006 * regarding copyright ownership.  The ASF licenses this file
007 * to you under the Apache License, Version 2.0 (the
008 * "License"); you may not use this file except in compliance
009 * with the License.  You may obtain a copy of the License at
010 *
011 *     http://www.apache.org/licenses/LICENSE-2.0
012 *
013 * Unless required by applicable law or agreed to in writing, software
014 * distributed under the License is distributed on an "AS IS" BASIS,
015 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016 * See the License for the specific language governing permissions and
017 * limitations under the License.
018 */
019package org.apache.hadoop.hbase.regionserver.handler;
020
021import java.io.IOException;
022import java.util.concurrent.atomic.AtomicBoolean;
023
024import org.apache.hadoop.hbase.HConstants;
025import org.apache.hadoop.hbase.Server;
026import org.apache.hadoop.hbase.client.RegionInfo;
027import org.apache.hadoop.hbase.client.TableDescriptor;
028import org.apache.hadoop.hbase.executor.EventHandler;
029import org.apache.hadoop.hbase.executor.EventType;
030import org.apache.hadoop.hbase.procedure2.Procedure;
031import org.apache.hadoop.hbase.regionserver.HRegion;
032import org.apache.hadoop.hbase.regionserver.RegionServerServices;
033import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext;
034import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext;
035import org.apache.hadoop.hbase.util.CancelableProgressable;
036import org.apache.yetus.audience.InterfaceAudience;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
040/**
041 * Handles opening of a region on a region server.
042 * <p>
043 * This is executed after receiving an OPEN RPC from the master or client.
044 * @deprecated Keep it here only for compatible
045 * @see AssignRegionHandler
046 */
047@Deprecated
048@InterfaceAudience.Private
049public class OpenRegionHandler extends EventHandler {
050  private static final Logger LOG = LoggerFactory.getLogger(OpenRegionHandler.class);
051
052  protected final RegionServerServices rsServices;
053
054  private final RegionInfo regionInfo;
055  private final TableDescriptor htd;
056  private final long masterSystemTime;
057
058  public OpenRegionHandler(final Server server,
059      final RegionServerServices rsServices, RegionInfo regionInfo,
060      TableDescriptor htd, long masterSystemTime) {
061    this(server, rsServices, regionInfo, htd, masterSystemTime, EventType.M_RS_OPEN_REGION);
062  }
063
064  protected OpenRegionHandler(final Server server,
065                              final RegionServerServices rsServices, final RegionInfo regionInfo,
066                              final TableDescriptor htd, long masterSystemTime, EventType eventType) {
067    super(server, eventType);
068    this.rsServices = rsServices;
069    this.regionInfo = regionInfo;
070    this.htd = htd;
071    this.masterSystemTime = masterSystemTime;
072  }
073
074  public RegionInfo getRegionInfo() {
075    return regionInfo;
076  }
077
078  @Override
079  public void process() throws IOException {
080    boolean openSuccessful = false;
081    final String regionName = regionInfo.getRegionNameAsString();
082    HRegion region = null;
083
084    try {
085      if (this.server.isStopped() || this.rsServices.isStopping()) {
086        return;
087      }
088      final String encodedName = regionInfo.getEncodedName();
089
090      // 2 different difficult situations can occur
091      // 1) The opening was cancelled. This is an expected situation
092      // 2) The region is now marked as online while we're suppose to open. This would be a bug.
093
094      // Check that this region is not already online
095      if (this.rsServices.getRegion(encodedName) != null) {
096        LOG.error("Region " + encodedName +
097            " was already online when we started processing the opening. " +
098            "Marking this new attempt as failed");
099        return;
100      }
101
102      // Check that we're still supposed to open the region.
103      // If fails, just return.  Someone stole the region from under us.
104      if (!isRegionStillOpening()){
105        LOG.error("Region " + encodedName + " opening cancelled");
106        return;
107      }
108
109      // Open region.  After a successful open, failures in subsequent
110      // processing needs to do a close as part of cleanup.
111      region = openRegion();
112      if (region == null) {
113        return;
114      }
115
116      if (!updateMeta(region, masterSystemTime) || this.server.isStopped() ||
117          this.rsServices.isStopping()) {
118        return;
119      }
120
121      if (!isRegionStillOpening()) {
122        return;
123      }
124
125      // Successful region open, and add it to MutableOnlineRegions
126      this.rsServices.addRegion(region);
127      openSuccessful = true;
128
129      // Done!  Successful region open
130      LOG.debug("Opened " + regionName + " on " + this.server.getServerName());
131    } finally {
132      // Do all clean up here
133      if (!openSuccessful) {
134        doCleanUpOnFailedOpen(region);
135      }
136      final Boolean current = this.rsServices.getRegionsInTransitionInRS().
137          remove(this.regionInfo.getEncodedNameAsBytes());
138
139      // Let's check if we have met a race condition on open cancellation....
140      // A better solution would be to not have any race condition.
141      // this.rsServices.getRegionsInTransitionInRS().remove(
142      //  this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE);
143      // would help.
144      if (openSuccessful) {
145        if (current == null) { // Should NEVER happen, but let's be paranoid.
146          LOG.error("Bad state: we've just opened a region that was NOT in transition. Region="
147              + regionName);
148        } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're
149                                                    // really unlucky.
150          LOG.error("Race condition: we've finished to open a region, while a close was requested "
151              + " on region=" + regionName + ". It can be a critical error, as a region that"
152              + " should be closed is now opened. Closing it now");
153          cleanupFailedOpen(region);
154        }
155      }
156    }
157  }
158
159  private void doCleanUpOnFailedOpen(HRegion region) throws IOException {
160    try {
161      if (region != null) {
162        cleanupFailedOpen(region);
163      }
164    } finally {
165      rsServices.reportRegionStateTransition(new RegionStateTransitionContext(
166        TransitionCode.FAILED_OPEN, HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo));
167    }
168  }
169
170  /**
171   * Update ZK or META.  This can take a while if for example the
172   * hbase:meta is not available -- if server hosting hbase:meta crashed and we are
173   * waiting on it to come back -- so run in a thread and keep updating znode
174   * state meantime so master doesn't timeout our region-in-transition.
175   * Caller must cleanup region if this fails.
176   */
177  private boolean updateMeta(final HRegion r, long masterSystemTime) {
178    if (this.server.isStopped() || this.rsServices.isStopping()) {
179      return false;
180    }
181    // Object we do wait/notify on.  Make it boolean.  If set, we're done.
182    // Else, wait.
183    final AtomicBoolean signaller = new AtomicBoolean(false);
184    PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(r,
185      this.server, this.rsServices, signaller, masterSystemTime);
186    t.start();
187    // Post open deploy task:
188    //   meta => update meta location in ZK
189    //   other region => update meta
190    while (!signaller.get() && t.isAlive() && !this.server.isStopped() &&
191        !this.rsServices.isStopping() && isRegionStillOpening()) {
192      synchronized (signaller) {
193        try {
194          // Wait for 10 seconds, so that server shutdown
195          // won't take too long if this thread happens to run.
196          if (!signaller.get()) signaller.wait(10000);
197        } catch (InterruptedException e) {
198          // Go to the loop check.
199        }
200      }
201    }
202    // Is thread still alive?  We may have left above loop because server is
203    // stopping or we timed out the edit.  Is so, interrupt it.
204    if (t.isAlive()) {
205      if (!signaller.get()) {
206        // Thread still running; interrupt
207        LOG.debug("Interrupting thread " + t);
208        t.interrupt();
209      }
210      try {
211        t.join();
212      } catch (InterruptedException ie) {
213        LOG.warn("Interrupted joining " +
214          r.getRegionInfo().getRegionNameAsString(), ie);
215        Thread.currentThread().interrupt();
216      }
217    }
218
219    // Was there an exception opening the region?  This should trigger on
220    // InterruptedException too.  If so, we failed.
221    return (!Thread.interrupted() && t.getException() == null);
222  }
223
224  /**
225   * Thread to run region post open tasks. Call {@link #getException()} after the thread finishes
226   * to check for exceptions running
227   * {@link RegionServerServices#postOpenDeployTasks(PostOpenDeployContext)}
228   */
229  static class PostOpenDeployTasksThread extends Thread {
230    private Throwable exception = null;
231    private final Server server;
232    private final RegionServerServices services;
233    private final HRegion region;
234    private final AtomicBoolean signaller;
235    private final long masterSystemTime;
236
237    PostOpenDeployTasksThread(final HRegion region, final Server server,
238        final RegionServerServices services, final AtomicBoolean signaller, long masterSystemTime) {
239      super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
240      this.setDaemon(true);
241      this.server = server;
242      this.services = services;
243      this.region = region;
244      this.signaller = signaller;
245      this.masterSystemTime = masterSystemTime;
246    }
247
248    @Override
249    public void run() {
250      try {
251        this.services.postOpenDeployTasks(
252          new PostOpenDeployContext(region, Procedure.NO_PROC_ID, masterSystemTime));
253      } catch (Throwable e) {
254        String msg = "Exception running postOpenDeployTasks; region=" +
255          this.region.getRegionInfo().getEncodedName();
256        this.exception = e;
257        if (e instanceof IOException && isRegionStillOpening(region.getRegionInfo(), services)) {
258          server.abort(msg, e);
259        } else {
260          LOG.warn(msg, e);
261        }
262      }
263      // We're done. Set flag then wake up anyone waiting on thread to complete.
264      this.signaller.set(true);
265      synchronized (this.signaller) {
266        this.signaller.notify();
267      }
268    }
269
270    /**
271     * @return Null or the run exception; call this method after thread is done.
272     */
273    Throwable getException() {
274      return this.exception;
275    }
276  }
277
278  /**
279   * @return Instance of HRegion if successful open else null.
280   */
281  private HRegion openRegion() {
282    HRegion region = null;
283    try {
284      // Instantiate the region.  This also periodically tickles OPENING
285      // state so master doesn't timeout this region in transition.
286      region = HRegion.openHRegion(this.regionInfo, this.htd,
287        this.rsServices.getWAL(this.regionInfo),
288        this.server.getConfiguration(),
289        this.rsServices,
290        new CancelableProgressable() {
291          @Override
292          public boolean progress() {
293            if (!isRegionStillOpening()) {
294              LOG.warn("Open region aborted since it isn't opening any more");
295              return false;
296            }
297            return true;
298          }
299        });
300    } catch (Throwable t) {
301      // We failed open. Our caller will see the 'null' return value
302      // and transition the node back to FAILED_OPEN. If that fails,
303      // we rely on the Timeout Monitor in the master to reassign.
304      LOG.error(
305          "Failed open of region=" + this.regionInfo.getRegionNameAsString(), t);
306    }
307    return region;
308  }
309
310  private void cleanupFailedOpen(final HRegion region) throws IOException {
311    if (region != null) {
312      this.rsServices.removeRegion(region, null);
313      region.close();
314    }
315  }
316
317  private static boolean isRegionStillOpening(
318      RegionInfo regionInfo, RegionServerServices rsServices) {
319    byte[] encodedName = regionInfo.getEncodedNameAsBytes();
320    Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
321    return Boolean.TRUE.equals(action); // true means opening for RIT
322  }
323
324  private boolean isRegionStillOpening() {
325    return isRegionStillOpening(regionInfo, rsServices);
326  }
327}