View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.regionserver.handler;
20  
21  import java.io.IOException;
22  import java.util.concurrent.atomic.AtomicBoolean;
23  
24  import org.apache.commons.logging.Log;
25  import org.apache.commons.logging.LogFactory;
26  import org.apache.hadoop.classification.InterfaceAudience;
27  import org.apache.hadoop.hbase.HRegionInfo;
28  import org.apache.hadoop.hbase.HTableDescriptor;
29  import org.apache.hadoop.hbase.Server;
30  import org.apache.hadoop.hbase.executor.EventHandler;
31  import org.apache.hadoop.hbase.executor.EventType;
32  import org.apache.hadoop.hbase.master.AssignmentManager;
33  import org.apache.hadoop.hbase.regionserver.HRegion;
34  import org.apache.hadoop.hbase.regionserver.RegionServerAccounting;
35  import org.apache.hadoop.hbase.regionserver.RegionServerServices;
36  import org.apache.hadoop.hbase.util.CancelableProgressable;
37  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
38  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
39  import org.apache.zookeeper.KeeperException;
40  /**
41   * Handles opening of a region on a region server.
42   * <p>
43   * This is executed after receiving an OPEN RPC from the master or client.
44   */
45  @InterfaceAudience.Private
46  public class OpenRegionHandler extends EventHandler {
47    private static final Log LOG = LogFactory.getLog(OpenRegionHandler.class);
48  
49    protected final RegionServerServices rsServices;
50  
51    private final HRegionInfo regionInfo;
52    private final HTableDescriptor htd;
53  
54    private boolean tomActivated;
55    private int assignmentTimeout;
56  
57    // We get version of our znode at start of open process and monitor it across
58    // the total open. We'll fail the open if someone hijacks our znode; we can
59    // tell this has happened if version is not as expected.
60    private volatile int version = -1;
61    //version of the offline node that was set by the master
62    private volatile int versionOfOfflineNode = -1;
63  
64    public OpenRegionHandler(final Server server,
65        final RegionServerServices rsServices, HRegionInfo regionInfo,
66        HTableDescriptor htd) {
67      this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION, -1);
68    }
69    public OpenRegionHandler(final Server server,
70        final RegionServerServices rsServices, HRegionInfo regionInfo,
71        HTableDescriptor htd, int versionOfOfflineNode) {
72      this(server, rsServices, regionInfo, htd, EventType.M_RS_OPEN_REGION,
73          versionOfOfflineNode);
74    }
75  
76    protected OpenRegionHandler(final Server server,
77        final RegionServerServices rsServices, final HRegionInfo regionInfo,
78        final HTableDescriptor htd, EventType eventType,
79        final int versionOfOfflineNode) {
80      super(server, eventType);
81      this.rsServices = rsServices;
82      this.regionInfo = regionInfo;
83      this.htd = htd;
84      this.versionOfOfflineNode = versionOfOfflineNode;
85      tomActivated = this.server.getConfiguration().
86        getBoolean(AssignmentManager.ASSIGNMENT_TIMEOUT_MANAGEMENT,
87          AssignmentManager.DEFAULT_ASSIGNMENT_TIMEOUT_MANAGEMENT);
88      assignmentTimeout = this.server.getConfiguration().
89        getInt(AssignmentManager.ASSIGNMENT_TIMEOUT,
90          AssignmentManager.DEFAULT_ASSIGNMENT_TIMEOUT_DEFAULT);
91    }
92  
93    public HRegionInfo getRegionInfo() {
94      return regionInfo;
95    }
96  
97    @Override
98    public void process() throws IOException {
99      boolean openSuccessful = false;
100     boolean transitionedToOpening = false;
101     final String regionName = regionInfo.getRegionNameAsString();
102     HRegion region = null;
103 
104     try {
105       if (this.server.isStopped() || this.rsServices.isStopping()) {
106         return;
107       }
108       final String encodedName = regionInfo.getEncodedName();
109 
110       // 3 different difficult situations can occur
111       // 1) The opening was cancelled. This is an expected situation
112       // 2) The region was hijacked, we no longer have the znode
113       // 3) The region is now marked as online while we're suppose to open. This would be a bug.
114 
115       // Check that this region is not already online
116       if (this.rsServices.getFromOnlineRegions(encodedName) != null) {
117         LOG.error("Region " + encodedName +
118             " was already online when we started processing the opening. " +
119             "Marking this new attempt as failed");
120         return;
121       }
122 
123       // Check that we're still supposed to open the region and transition.
124       // If fails, just return.  Someone stole the region from under us.
125       // Calling transitionZookeeperOfflineToOpening initializes this.version.
126       if (!isRegionStillOpening()){
127         LOG.error("Region " + encodedName + " opening cancelled");
128         return;
129       }
130 
131       if (!transitionZookeeperOfflineToOpening(encodedName, versionOfOfflineNode)) {
132         LOG.warn("Region was hijacked? Opening cancelled for encodedName=" + encodedName);
133         // This is a desperate attempt: the znode is unlikely to be ours. But we can't do more.
134         return;
135       }
136       transitionedToOpening = true;
137       // Open region.  After a successful open, failures in subsequent
138       // processing needs to do a close as part of cleanup.
139       region = openRegion();
140       if (region == null) {
141         return;
142       }
143 
144       boolean failed = true;
145       if (tickleOpening("post_region_open")) {
146         if (updateMeta(region)) {
147           failed = false;
148         }
149       }
150       if (failed || this.server.isStopped() ||
151           this.rsServices.isStopping()) {
152         return;
153       }
154 
155 
156       if (!isRegionStillOpening() || !transitionToOpened(region)) {
157         // If we fail to transition to opened, it's because of one of two cases:
158         //    (a) we lost our ZK lease
159         // OR (b) someone else opened the region before us
160         // OR (c) someone cancelled the open
161         // In all cases, we try to transition to failed_open to be safe.
162         return;
163       }
164 
165       // We have a znode in the opened state now. We can't really delete it as the master job.
166       // Transitioning to failed open would create a race condition if the master has already
167       // acted the transition to opened.
168       // Cancelling the open is dangerous, because we would have a state where the master thinks
169       // the region is opened while the region is actually closed. It is a dangerous state
170       // to be in. For this reason, from now on, we're not going back. There is a message in the
171       // finally close to let the admin knows where we stand.
172 
173 
174       // Successful region open, and add it to OnlineRegions
175       this.rsServices.addToOnlineRegions(region);
176       openSuccessful = true;
177 
178       // Done!  Successful region open
179       LOG.debug("Opened " + regionName + " on " +
180         this.server.getServerName());
181 
182 
183     } finally {
184       // Do all clean up here
185       if (!openSuccessful) {
186         doCleanUpOnFailedOpen(region, transitionedToOpening);
187       }
188       final Boolean current = this.rsServices.getRegionsInTransitionInRS().
189           remove(this.regionInfo.getEncodedNameAsBytes());
190 
191       // Let's check if we have met a race condition on open cancellation....
192       // A better solution would be to not have any race condition.
193       // this.rsServices.getRegionsInTransitionInRS().remove(
194       //  this.regionInfo.getEncodedNameAsBytes(), Boolean.TRUE);
195       // would help, but we would still have a consistency issue to manage with
196       // 1) this.rsServices.addToOnlineRegions(region);
197       // 2) the ZK state.
198       if (openSuccessful) {
199         if (current == null) { // Should NEVER happen, but let's be paranoid.
200           LOG.error("Bad state: we've just opened a region that was NOT in transition. Region="
201               + regionName);
202         } else if (Boolean.FALSE.equals(current)) { // Can happen, if we're
203                                                     // really unlucky.
204           LOG.error("Race condition: we've finished to open a region, while a close was requested "
205               + " on region=" + regionName + ". It can be a critical error, as a region that"
206               + " should be closed is now opened. Closing it now");
207           cleanupFailedOpen(region);
208         }
209       }
210     }
211   }
212 
213   private void doCleanUpOnFailedOpen(HRegion region, boolean transitionedToOpening)
214       throws IOException {
215     if (transitionedToOpening) {
216       try {
217         if (region != null) {
218           cleanupFailedOpen(region);
219         }
220       } finally {
221         // Even if cleanupFailed open fails we need to do this transition
222         // See HBASE-7698
223         tryTransitionFromOpeningToFailedOpen(regionInfo);
224       }
225     } else {
226       // If still transition to OPENING is not done, we need to transition znode
227       // to FAILED_OPEN
228       tryTransitionFromOfflineToFailedOpen(this.rsServices, regionInfo, versionOfOfflineNode);
229     }
230   }
231 
232   /**
233    * Update ZK or META.  This can take a while if for example the
234    * hbase:meta is not available -- if server hosting hbase:meta crashed and we are
235    * waiting on it to come back -- so run in a thread and keep updating znode
236    * state meantime so master doesn't timeout our region-in-transition.
237    * Caller must cleanup region if this fails.
238    */
239   boolean updateMeta(final HRegion r) {
240     if (this.server.isStopped() || this.rsServices.isStopping()) {
241       return false;
242     }
243     // Object we do wait/notify on.  Make it boolean.  If set, we're done.
244     // Else, wait.
245     final AtomicBoolean signaller = new AtomicBoolean(false);
246     PostOpenDeployTasksThread t = new PostOpenDeployTasksThread(r,
247       this.server, this.rsServices, signaller);
248     t.start();
249     // Total timeout for meta edit.  If we fail adding the edit then close out
250     // the region and let it be assigned elsewhere.
251     long timeout = assignmentTimeout * 10;
252     long now = System.currentTimeMillis();
253     long endTime = now + timeout;
254     // Let our period at which we update OPENING state to be be 1/3rd of the
255     // regions-in-transition timeout period.
256     long period = Math.max(1, assignmentTimeout/ 3);
257     long lastUpdate = now;
258     boolean tickleOpening = true;
259     while (!signaller.get() && t.isAlive() && !this.server.isStopped() &&
260         !this.rsServices.isStopping() && (endTime > now)) {
261       long elapsed = now - lastUpdate;
262       if (elapsed > period) {
263         // Only tickle OPENING if postOpenDeployTasks is taking some time.
264         lastUpdate = now;
265         tickleOpening = tickleOpening("post_open_deploy");
266       }
267       synchronized (signaller) {
268         try {
269           signaller.wait(period);
270         } catch (InterruptedException e) {
271           // Go to the loop check.
272         }
273       }
274       now = System.currentTimeMillis();
275     }
276     // Is thread still alive?  We may have left above loop because server is
277     // stopping or we timed out the edit.  Is so, interrupt it.
278     if (t.isAlive()) {
279       if (!signaller.get()) {
280         // Thread still running; interrupt
281         LOG.debug("Interrupting thread " + t);
282         t.interrupt();
283       }
284       try {
285         t.join();
286       } catch (InterruptedException ie) {
287         LOG.warn("Interrupted joining " +
288           r.getRegionInfo().getRegionNameAsString(), ie);
289         Thread.currentThread().interrupt();
290       }
291     }
292 
293     // Was there an exception opening the region?  This should trigger on
294     // InterruptedException too.  If so, we failed.  Even if tickle opening fails
295     // then it is a failure.
296     return ((!Thread.interrupted() && t.getException() == null) && tickleOpening);
297   }
298 
299   /**
300    * Thread to run region post open tasks. Call {@link #getException()} after
301    * the thread finishes to check for exceptions running
302    * {@link RegionServerServices#postOpenDeployTasks(
303    * HRegion, org.apache.hadoop.hbase.catalog.CatalogTracker)}
304    * .
305    */
306   static class PostOpenDeployTasksThread extends Thread {
307     private Exception exception = null;
308     private final Server server;
309     private final RegionServerServices services;
310     private final HRegion region;
311     private final AtomicBoolean signaller;
312 
313     PostOpenDeployTasksThread(final HRegion region, final Server server,
314         final RegionServerServices services, final AtomicBoolean signaller) {
315       super("PostOpenDeployTasks:" + region.getRegionInfo().getEncodedName());
316       this.setDaemon(true);
317       this.server = server;
318       this.services = services;
319       this.region = region;
320       this.signaller = signaller;
321     }
322 
323     public void run() {
324       try {
325         this.services.postOpenDeployTasks(this.region,
326           this.server.getCatalogTracker());
327       } catch (KeeperException e) {
328         server.abort("Exception running postOpenDeployTasks; region=" +
329             this.region.getRegionInfo().getEncodedName(), e);
330       } catch (Exception e) {
331         LOG.warn("Exception running postOpenDeployTasks; region=" +
332           this.region.getRegionInfo().getEncodedName(), e);
333         this.exception = e;
334       }
335       // We're done.  Set flag then wake up anyone waiting on thread to complete.
336       this.signaller.set(true);
337       synchronized (this.signaller) {
338         this.signaller.notify();
339       }
340     }
341 
342     /**
343      * @return Null or the run exception; call this method after thread is done.
344      */
345     Exception getException() {
346       return this.exception;
347     }
348   }
349 
350 
351   /**
352    * @param r Region we're working on.
353    * @return whether znode is successfully transitioned to OPENED state.
354    * @throws IOException
355    */
356   boolean transitionToOpened(final HRegion r) throws IOException {
357     boolean result = false;
358     HRegionInfo hri = r.getRegionInfo();
359     final String name = hri.getRegionNameAsString();
360     // Finally, Transition ZK node to OPENED
361     try {
362       if (ZKAssign.transitionNodeOpened(this.server.getZooKeeper(), hri,
363           this.server.getServerName(), this.version) == -1) {
364         String warnMsg = "Completed the OPEN of region " + name +
365           " but when transitioning from " + " OPENING to OPENED ";
366         try {
367           String node = ZKAssign.getNodeName(this.server.getZooKeeper(), hri.getEncodedName());
368           if (ZKUtil.checkExists(this.server.getZooKeeper(), node) < 0) {
369             // if the znode 
370             rsServices.abort(warnMsg + "the znode disappeared", null);
371           } else {
372             LOG.warn(warnMsg + "got a version mismatch, someone else clashed; " +
373           "so now unassigning -- closing region on server: " + this.server.getServerName());
374           }
375         } catch (KeeperException ke) {
376           rsServices.abort(warnMsg, ke);
377         }
378       } else {
379         LOG.debug("Transitioned " + r.getRegionInfo().getEncodedName() +
380           " to OPENED in zk on " + this.server.getServerName());
381         result = true;
382       }
383     } catch (KeeperException e) {
384       LOG.error("Failed transitioning node " + name +
385         " from OPENING to OPENED -- closing region", e);
386     }
387     return result;
388   }
389 
390   /**
391    * This is not guaranteed to succeed, we just do our best.
392    * @param hri Region we're working on.
393    * @return whether znode is successfully transitioned to FAILED_OPEN state.
394    */
395   private boolean tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri) {
396     boolean result = false;
397     final String name = hri.getRegionNameAsString();
398     try {
399       LOG.info("Opening of region " + hri + " failed, transitioning" +
400           " from OPENING to FAILED_OPEN in ZK, expecting version " + this.version);
401       if (ZKAssign.transitionNode(
402           this.server.getZooKeeper(), hri,
403           this.server.getServerName(),
404           EventType.RS_ZK_REGION_OPENING,
405           EventType.RS_ZK_REGION_FAILED_OPEN,
406           this.version) == -1) {
407         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
408             "It's likely that the master already timed out this open " +
409             "attempt, and thus another RS already has the region.");
410       } else {
411         result = true;
412       }
413     } catch (KeeperException e) {
414       LOG.error("Failed transitioning node " + name +
415         " from OPENING to FAILED_OPEN", e);
416     }
417     return result;
418   }
419 
420   /**
421    * Try to transition to open. This function is static to make it usable before creating the
422    *  handler.
423    *
424    * This is not guaranteed to succeed, we just do our best.
425    *
426    * @param rsServices
427    * @param hri Region we're working on.
428    * @param versionOfOfflineNode version to checked.
429    * @return whether znode is successfully transitioned to FAILED_OPEN state.
430    */
431   public static boolean tryTransitionFromOfflineToFailedOpen(RegionServerServices rsServices,
432        final HRegionInfo hri, final int versionOfOfflineNode) {
433     boolean result = false;
434     final String name = hri.getRegionNameAsString();
435     try {
436       LOG.info("Opening of region " + hri + " failed, transitioning" +
437           " from OFFLINE to FAILED_OPEN in ZK, expecting version " + versionOfOfflineNode);
438       if (ZKAssign.transitionNode(
439           rsServices.getZooKeeper(), hri,
440           rsServices.getServerName(),
441           EventType.M_ZK_REGION_OFFLINE,
442           EventType.RS_ZK_REGION_FAILED_OPEN,
443           versionOfOfflineNode) == -1) {
444         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
445             "It's likely that the master already timed out this open " +
446             "attempt, and thus another RS already has the region.");
447       } else {
448         result = true;
449       }
450     } catch (KeeperException e) {
451       LOG.error("Failed transitioning node " + name + " from OFFLINE to FAILED_OPEN", e);
452     }
453     return result;
454   }
455 
456 
457   /**
458    * @return Instance of HRegion if successful open else null.
459    */
460   HRegion openRegion() {
461     HRegion region = null;
462     try {
463       // Instantiate the region.  This also periodically tickles our zk OPENING
464       // state so master doesn't timeout this region in transition.
465       region = HRegion.openHRegion(this.regionInfo, this.htd,
466           this.rsServices.getWAL(this.regionInfo),
467           this.server.getConfiguration(),
468           this.rsServices,
469         new CancelableProgressable() {
470           public boolean progress() {
471             // We may lose the znode ownership during the open.  Currently its
472             // too hard interrupting ongoing region open.  Just let it complete
473             // and check we still have the znode after region open.
474             return tickleOpening("open_region_progress");
475           }
476         });
477     } catch (Throwable t) {
478       // We failed open. Our caller will see the 'null' return value
479       // and transition the node back to FAILED_OPEN. If that fails,
480       // we rely on the Timeout Monitor in the master to reassign.
481       LOG.error(
482           "Failed open of region=" + this.regionInfo.getRegionNameAsString()
483               + ", starting to roll back the global memstore size.", t);
484       // Decrease the global memstore size.
485       if (this.rsServices != null) {
486         RegionServerAccounting rsAccounting =
487           this.rsServices.getRegionServerAccounting();
488         if (rsAccounting != null) {
489           rsAccounting.rollbackRegionReplayEditsSize(this.regionInfo.getRegionName());
490         }
491       }
492     }
493     return region;
494   }
495 
496   void cleanupFailedOpen(final HRegion region) throws IOException {
497     if (region != null) {
498       this.rsServices.removeFromOnlineRegions(region, null);
499       region.close();
500     }
501   }
502 
503   private boolean isRegionStillOpening() {
504     byte[] encodedName = regionInfo.getEncodedNameAsBytes();
505     Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
506     return Boolean.TRUE.equals(action); // true means opening for RIT
507   }
508 
509   /**
510    * Transition ZK node from OFFLINE to OPENING.
511    * @param encodedName Name of the znode file (Region encodedName is the znode
512    * name).
513    * @param versionOfOfflineNode - version Of OfflineNode that needs to be compared
514    * before changing the node's state from OFFLINE
515    * @return True if successful transition.
516    */
517   boolean transitionZookeeperOfflineToOpening(final String encodedName,
518       int versionOfOfflineNode) {
519     // TODO: should also handle transition from CLOSED?
520     try {
521       // Initialize the znode version.
522       this.version = ZKAssign.transitionNode(server.getZooKeeper(), regionInfo,
523           server.getServerName(), EventType.M_ZK_REGION_OFFLINE,
524           EventType.RS_ZK_REGION_OPENING, versionOfOfflineNode);
525     } catch (KeeperException e) {
526       LOG.error("Error transition from OFFLINE to OPENING for region=" +
527         encodedName, e);
528       this.version = -1;
529       return false;
530     }
531     boolean b = isGoodVersion();
532     if (!b) {
533       LOG.warn("Failed transition from OFFLINE to OPENING for region=" +
534         encodedName);
535     }
536     return b;
537   }
538 
539   /**
540    * Update our OPENING state in zookeeper.
541    * Do this so master doesn't timeout this region-in-transition.
542    * @param context Some context to add to logs if failure
543    * @return True if successful transition.
544    */
545   boolean tickleOpening(final String context) {
546     if (!isRegionStillOpening()) {
547       LOG.warn("Open region aborted since it isn't opening any more");
548       return false;
549     }
550     // If previous checks failed... do not try again.
551     if (!isGoodVersion()) return false;
552     String encodedName = this.regionInfo.getEncodedName();
553     try {
554       this.version =
555         ZKAssign.retransitionNodeOpening(server.getZooKeeper(),
556           this.regionInfo, this.server.getServerName(), this.version, tomActivated);
557     } catch (KeeperException e) {
558       server.abort("Exception refreshing OPENING; region=" + encodedName +
559         ", context=" + context, e);
560       this.version = -1;
561       return false;
562     }
563     boolean b = isGoodVersion();
564     if (!b) {
565       LOG.warn("Failed refreshing OPENING; region=" + encodedName +
566         ", context=" + context);
567     }
568     return b;
569   }
570 
571   private boolean isGoodVersion() {
572     return this.version != -1;
573   }
574 }