001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.regionserver.handler;
019
020import static org.apache.hadoop.hbase.regionserver.CompactSplit.HBASE_REGION_SERVER_ENABLE_COMPACTION;
021
022import java.io.IOException;
023import java.util.concurrent.atomic.AtomicBoolean;
024import org.apache.hadoop.hbase.HConstants;
025import org.apache.hadoop.hbase.Server;
026import org.apache.hadoop.hbase.client.RegionInfo;
027import org.apache.hadoop.hbase.client.TableDescriptor;
028import org.apache.hadoop.hbase.executor.EventHandler;
029import org.apache.hadoop.hbase.executor.EventType;
030import org.apache.hadoop.hbase.procedure2.Procedure;
031import org.apache.hadoop.hbase.regionserver.HRegion;
032import org.apache.hadoop.hbase.regionserver.HRegionServer;
033import org.apache.hadoop.hbase.regionserver.RegionServerServices;
034import org.apache.hadoop.hbase.regionserver.RegionServerServices.PostOpenDeployContext;
035import org.apache.hadoop.hbase.regionserver.RegionServerServices.RegionStateTransitionContext;
036import org.apache.hadoop.hbase.util.CancelableProgressable;
037import org.apache.yetus.audience.InterfaceAudience;
038import org.slf4j.Logger;
039import org.slf4j.LoggerFactory;
040
041import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionStateTransition.TransitionCode;
042
043/**
044 * Handles opening of a region on a region server.
045 * <p>
046 * This is executed after receiving an OPEN RPC from the master or client.
047 * @deprecated Keep it here only for compatible
048 * @see AssignRegionHandler
049 */
050@Deprecated
051@InterfaceAudience.Private
052public class OpenRegionHandler extends EventHandler {
053  private static final Logger LOG = LoggerFactory.getLogger(OpenRegionHandler.class);
054
055  protected final RegionServerServices rsServices;
056
057  private final RegionInfo regionInfo;
058  private final TableDescriptor htd;
059  private final long masterSystemTime;
060
061  public OpenRegionHandler(final Server server, final RegionServerServices rsServices,
062    RegionInfo regionInfo, TableDescriptor htd, long masterSystemTime) {
063    this(server, rsServices, regionInfo, htd, masterSystemTime, EventType.M_RS_OPEN_REGION);
064  }
065
066  protected OpenRegionHandler(final Server server, final RegionServerServices rsServices,
067    final RegionInfo regionInfo, final TableDescriptor htd, long masterSystemTime,
068    EventType eventType) {
069    super(server, eventType);
070    this.rsServices = rsServices;
071    this.regionInfo = regionInfo;
072    this.htd = htd;
073    this.masterSystemTime = masterSystemTime;
074  }
075
076  public RegionInfo getRegionInfo() {
077    return regionInfo;
078  }
079
080  @Override
081  public void process() throws IOException {
082    boolean openSuccessful = false;
083    final String regionName = regionInfo.getRegionNameAsString();
084    HRegion region = null;
085
086    try {
087      if (this.server.isStopped() || this.rsServices.isStopping()) {
088        return;
089      }
090      final String encodedName = regionInfo.getEncodedName();
091
092      // 2 different difficult situations can occur
093      // 1) The opening was cancelled. This is an expected situation
094      // 2) The region is now marked as online while we're suppose to open. This would be a bug.
095
096      // Check that this region is not already online
097      if (this.rsServices.getRegion(encodedName) != null) {
098        LOG.error(
099          "Region " + encodedName + " was already online when we started processing the opening. "
100            + "Marking this new attempt as failed");
101        return;
102      }
103
104      // Check that we're still supposed to open the region.
105      // If fails, just return. Someone stole the region from under us.
106      if (!isRegionStillOpening()) {
107        LOG.error("Region " + encodedName + " opening cancelled");
108        return;
109      }
110
111      // Open region. After a successful open, failures in subsequent
112      // processing needs to do a close as part of cleanup.
113      region = openRegion();
114      if (region == null) {
115        return;
116      }
117
118      if (
119        !updateMeta(region, masterSystemTime) || this.server.isStopped()
120          || this.rsServices.isStopping()
121      ) {
122        return;
123      }
124
125      if (!isRegionStillOpening()) {
126        return;
127      }
128
129      // Successful region open, and add it to MutableOnlineRegions
130      this.rsServices.addRegion(region);
131      openSuccessful = true;
132
133      // Done! Successful region open
134      LOG.debug("Opened " + regionName + " on " + this.server.getServerName());
135    } finally {
136      // Do all clean up here
137      if (!openSuccessful) {
138        doCleanUpOnFailedOpen(region);
139      }
140      final Boolean current = this.rsServices.getRegionsInTransitionInRS()
141        .remove(this.regionInfo.getEncodedNameAsBytes());
142
143      // Let's check if we have met a race condition on open cancellation....
144      // A better solution would be to not have any race condition.
145      // this.rsServices.getRegionsInTransitionInRS().remove(
146      // this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE);
147      // would help.
148      if (openSuccessful) {
149        if (current == null) { // Should NEVER happen, but let's be paranoid.
150          LOG.error("Bad state: we've just opened a region that was NOT in transition. Region="
151            + regionName);
152        } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're
153                                                    // really unlucky.
154          LOG.error("Race condition: we've finished to open a region, while a close was requested "
155            + " on region=" + regionName + ". It can be a critical error, as a region that"
156            + " should be closed is now opened. Closing it now");
157          cleanupFailedOpen(region);
158        }
159      }
160    }
161  }
162
163  private void doCleanUpOnFailedOpen(HRegion region) throws IOException {
164    try {
165      if (region != null) {
166        cleanupFailedOpen(region);
167      }
168    } finally {
169      rsServices.reportRegionStateTransition(new RegionStateTransitionContext(
170        TransitionCode.FAILED_OPEN, HConstants.NO_SEQNUM, Procedure.NO_PROC_ID, -1, regionInfo));
171    }
172  }
173
174  /**
175   * Update ZK or META. This can take a while if for example the hbase:meta is not available -- if
176   * server hosting hbase:meta crashed and we are waiting on it to come back -- so run in a thread
177   * and keep updating znode state meantime so master doesn't timeout our region-in-transition.
178   * Caller must cleanup region if this fails.
179   */
180  private boolean updateMeta(final HRegion r, long masterSystemTime) {
181    if (this.server.isStopped() || this.rsServices.isStopping()) {
182      return false;
183    }
184    // Object we do wait/notify on. Make it boolean. If set, we're done.
185    // Else, wait.
186    final AtomicBoolean signaller = new AtomicBoolean(false);
187    PostOpenDeployTasksThread t =
188      new PostOpenDeployTasksThread(r, this.server, this.rsServices, signaller, masterSystemTime);
189    t.start();
190    // Post open deploy task:
191    // meta => update meta location in ZK
192    // other region => update meta
193    while (
194      !signaller.get() && t.isAlive() && !this.server.isStopped() && !this.rsServices.isStopping()
195        && isRegionStillOpening()
196    ) {
197      synchronized (signaller) {
198        try {
199          // Wait for 10 seconds, so that server shutdown
200          // won't take too long if this thread happens to run.
201          if (!signaller.get()) signaller.wait(10000);
202        } catch (InterruptedException e) {
203          // Go to the loop check.
204        }
205      }
206    }
207    // Is thread still alive? We may have left above loop because server is
208    // stopping or we timed out the edit. Is so, interrupt it.
209    if (t.isAlive()) {
210      if (!signaller.get()) {
211        // Thread still running; interrupt
212        LOG.debug("Interrupting thread " + t);
213        t.interrupt();
214      }
215      try {
216        t.join();
217      } catch (InterruptedException ie) {
218        LOG.warn("Interrupted joining " + r.getRegionInfo().getRegionNameAsString(), ie);
219        Thread.currentThread().interrupt();
220      }
221    }
222
223    // Was there an exception opening the region? This should trigger on
224    // InterruptedException too. If so, we failed.
225    return (!Thread.interrupted() && t.getException() == null);
226  }
227
228  /**
229   * Thread to run region post open tasks. Call {@link #getException()} after the thread finishes to
230   * check for exceptions running
231   * {@link RegionServerServices#postOpenDeployTasks(PostOpenDeployContext)}
232   */
233  static class PostOpenDeployTasksThread extends Thread {
234    private Throwable exception = null;
235    private final Server server;
236    private final RegionServerServices services;
237    private final HRegion region;
238    private final AtomicBoolean signaller;
239    private final long masterSystemTime;
240
241    PostOpenDeployTasksThread(final HRegion region, final Server server,
242      final RegionServerServices services, final AtomicBoolean signaller, long masterSystemTime) {
243      super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
244      this.setDaemon(true);
245      this.server = server;
246      this.services = services;
247      this.region = region;
248      this.signaller = signaller;
249      this.masterSystemTime = masterSystemTime;
250    }
251
252    @Override
253    public void run() {
254      try {
255        this.services.postOpenDeployTasks(
256          new PostOpenDeployContext(region, Procedure.NO_PROC_ID, masterSystemTime));
257      } catch (Throwable e) {
258        String msg = "Exception running postOpenDeployTasks; region="
259          + this.region.getRegionInfo().getEncodedName();
260        this.exception = e;
261        if (e instanceof IOException && isRegionStillOpening(region.getRegionInfo(), services)) {
262          server.abort(msg, e);
263        } else {
264          LOG.warn(msg, e);
265        }
266      }
267      // We're done. Set flag then wake up anyone waiting on thread to complete.
268      this.signaller.set(true);
269      synchronized (this.signaller) {
270        this.signaller.notify();
271      }
272    }
273
274    /** Returns Null or the run exception; call this method after thread is done. */
275    Throwable getException() {
276      return this.exception;
277    }
278  }
279
280  /** Returns Instance of HRegion if successful open else null. */
281  private HRegion openRegion() {
282    HRegion region = null;
283    boolean compactionEnabled =
284      ((HRegionServer) server).getCompactSplitThread().isCompactionsEnabled();
285    this.server.getConfiguration().setBoolean(HBASE_REGION_SERVER_ENABLE_COMPACTION,
286      compactionEnabled);
287    try {
288      // Instantiate the region. This also periodically tickles OPENING
289      // state so master doesn't timeout this region in transition.
290      region =
291        HRegion.openHRegion(this.regionInfo, this.htd, this.rsServices.getWAL(this.regionInfo),
292          this.server.getConfiguration(), this.rsServices, new CancelableProgressable() {
293            @Override
294            public boolean progress() {
295              if (!isRegionStillOpening()) {
296                LOG.warn("Open region aborted since it isn't opening any more");
297                return false;
298              }
299              return true;
300            }
301          });
302    } catch (Throwable t) {
303      // We failed open. Our caller will see the 'null' return value
304      // and transition the node back to FAILED_OPEN. If that fails,
305      // we rely on the Timeout Monitor in the master to reassign.
306      LOG.error("Failed open of region=" + this.regionInfo.getRegionNameAsString(), t);
307    }
308    return region;
309  }
310
311  private void cleanupFailedOpen(final HRegion region) throws IOException {
312    if (region != null) {
313      this.rsServices.removeRegion(region, null);
314      region.close();
315    }
316  }
317
318  private static boolean isRegionStillOpening(RegionInfo regionInfo,
319    RegionServerServices rsServices) {
320    byte[] encodedName = regionInfo.getEncodedNameAsBytes();
321    Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
322    return Boolean.TRUE.equals(action); // true means opening for RIT
323  }
324
325  private boolean isRegionStillOpening() {
326    return isRegionStillOpening(regionInfo, rsServices);
327  }
328}