View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.coordination;
19  
20  import org.apache.commons.logging.Log;
21  import org.apache.commons.logging.LogFactory;
22  import org.apache.hadoop.hbase.classification.InterfaceAudience;
23  import org.apache.hadoop.hbase.CoordinatedStateManager;
24  import org.apache.hadoop.hbase.HRegionInfo;
25  import org.apache.hadoop.hbase.ServerName;
26  import org.apache.hadoop.hbase.executor.EventType;
27  import org.apache.hadoop.hbase.master.AssignmentManager;
28  import org.apache.hadoop.hbase.master.RegionState;
29  import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
30  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
31  import org.apache.hadoop.hbase.regionserver.HRegion;
32  import org.apache.hadoop.hbase.regionserver.RegionServerServices;
33  import org.apache.hadoop.hbase.zookeeper.ZKAssign;
34  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
35  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
36  import org.apache.zookeeper.KeeperException;
37  
38  import java.io.IOException;
39  
40  /**
41   * ZK-based implementation of {@link OpenRegionCoordination}.
42   */
43  @InterfaceAudience.Private
44  public class ZkOpenRegionCoordination implements OpenRegionCoordination {
45    private static final Log LOG = LogFactory.getLog(ZkOpenRegionCoordination.class);
46  
47    private CoordinatedStateManager coordination;
48    private final ZooKeeperWatcher watcher;
49  
50    public ZkOpenRegionCoordination(CoordinatedStateManager coordination,
51                                    ZooKeeperWatcher watcher) {
52      this.coordination = coordination;
53      this.watcher = watcher;
54    }
55  
56    //-------------------------------
57    // Region Server-side operations
58    //-------------------------------
59  
60    /**
61     * @param r Region we're working on.
62     * @return whether znode is successfully transitioned to OPENED state.
63     * @throws java.io.IOException
64     */
65    @Override
66    public boolean transitionToOpened(final HRegion r, OpenRegionDetails ord) throws IOException {
67      ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
68  
69      boolean result = false;
70      HRegionInfo hri = r.getRegionInfo();
71      final String name = hri.getRegionNameAsString();
72      // Finally, Transition ZK node to OPENED
73      try {
74        if (ZKAssign.transitionNodeOpened(watcher, hri,
75          zkOrd.getServerName(), zkOrd.getVersion()) == -1) {
76          String warnMsg = "Completed the OPEN of region " + name +
77            " but when transitioning from " + " OPENING to OPENED ";
78          try {
79            String node = ZKAssign.getNodeName(watcher, hri.getEncodedName());
80            if (ZKUtil.checkExists(watcher, node) < 0) {
81              // if the znode
82              coordination.getServer().abort(warnMsg + "the znode disappeared", null);
83            } else {
84              LOG.warn(warnMsg + "got a version mismatch, someone else clashed; " +
85                "so now unassigning -- closing region on server: " + zkOrd.getServerName());
86            }
87          } catch (KeeperException ke) {
88            coordination.getServer().abort(warnMsg, ke);
89          }
90        } else {
91          LOG.debug("Transitioned " + r.getRegionInfo().getEncodedName() +
92            " to OPENED in zk on " + zkOrd.getServerName());
93          result = true;
94        }
95      } catch (KeeperException e) {
96        LOG.error("Failed transitioning node " + name +
97          " from OPENING to OPENED -- closing region", e);
98      }
99      return result;
100   }
101 
102   /**
103    * Transition ZK node from OFFLINE to OPENING.
104    * @param regionInfo region info instance
105    * @param ord - instance of open region details, for ZK implementation
106    *   will include version Of OfflineNode that needs to be compared
107    *   before changing the node's state from OFFLINE
108    * @return True if successful transition.
109    */
110   @Override
111   public boolean transitionFromOfflineToOpening(HRegionInfo regionInfo,
112                                                 OpenRegionDetails ord) {
113     ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
114 
115     // encoded name is used as znode encoded name in ZK
116     final String encodedName = regionInfo.getEncodedName();
117 
118     // TODO: should also handle transition from CLOSED?
119     try {
120       // Initialize the znode version.
121       zkOrd.setVersion(ZKAssign.transitionNode(watcher, regionInfo,
122         zkOrd.getServerName(), EventType.M_ZK_REGION_OFFLINE,
123         EventType.RS_ZK_REGION_OPENING, zkOrd.getVersionOfOfflineNode()));
124     } catch (KeeperException e) {
125       LOG.error("Error transition from OFFLINE to OPENING for region=" +
126         encodedName, e);
127       zkOrd.setVersion(-1);
128       return false;
129     }
130     boolean b = isGoodVersion(zkOrd);
131     if (!b) {
132       LOG.warn("Failed transition from OFFLINE to OPENING for region=" +
133         encodedName);
134     }
135     return b;
136   }
137 
138   /**
139    * Update our OPENING state in zookeeper.
140    * Do this so master doesn't timeout this region-in-transition.
141    * We may lose the znode ownership during the open.  Currently its
142    * too hard interrupting ongoing region open.  Just let it complete
143    * and check we still have the znode after region open.
144    *
145    * @param context Some context to add to logs if failure
146    * @return True if successful transition.
147    */
148   @Override
149   public boolean tickleOpening(OpenRegionDetails ord, HRegionInfo regionInfo,
150                                RegionServerServices rsServices, final String context) {
151     ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
152     if (!isRegionStillOpening(regionInfo, rsServices)) {
153       LOG.warn("Open region aborted since it isn't opening any more");
154       return false;
155     }
156     // If previous checks failed... do not try again.
157     if (!isGoodVersion(zkOrd)) return false;
158     String encodedName = regionInfo.getEncodedName();
159     try {
160       zkOrd.setVersion(ZKAssign.confirmNodeOpening(watcher,
161           regionInfo, zkOrd.getServerName(), zkOrd.getVersion()));
162     } catch (KeeperException e) {
163       coordination.getServer().abort("Exception refreshing OPENING; region=" + encodedName +
164         ", context=" + context, e);
165       zkOrd.setVersion(-1);
166       return false;
167     }
168     boolean b = isGoodVersion(zkOrd);
169     if (!b) {
170       LOG.warn("Failed refreshing OPENING; region=" + encodedName +
171         ", context=" + context);
172     }
173     return b;
174   }
175 
176   /**
177    * Try to transition to open.
178    *
179    * This is not guaranteed to succeed, we just do our best.
180    *
181    * @param rsServices
182    * @param hri Region we're working on.
183    * @param ord Details about region open task
184    * @return whether znode is successfully transitioned to FAILED_OPEN state.
185    */
186   @Override
187   public boolean tryTransitionFromOfflineToFailedOpen(RegionServerServices rsServices,
188                                                       final HRegionInfo hri,
189                                                       OpenRegionDetails ord) {
190     ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
191     boolean result = false;
192     final String name = hri.getRegionNameAsString();
193     try {
194       LOG.info("Opening of region " + hri + " failed, transitioning" +
195         " from OFFLINE to FAILED_OPEN in ZK, expecting version " +
196         zkOrd.getVersionOfOfflineNode());
197       if (ZKAssign.transitionNode(
198         rsServices.getZooKeeper(), hri,
199         rsServices.getServerName(),
200         EventType.M_ZK_REGION_OFFLINE,
201         EventType.RS_ZK_REGION_FAILED_OPEN,
202         zkOrd.getVersionOfOfflineNode()) == -1) {
203         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
204           "It's likely that the master already timed out this open " +
205           "attempt, and thus another RS already has the region.");
206       } else {
207         result = true;
208       }
209     } catch (KeeperException e) {
210       LOG.error("Failed transitioning node " + name + " from OFFLINE to FAILED_OPEN", e);
211     }
212     return result;
213   }
214 
215   private boolean isGoodVersion(ZkOpenRegionDetails zkOrd) {
216     return zkOrd.getVersion() != -1;
217   }
218 
219   /**
220    * This is not guaranteed to succeed, we just do our best.
221    * @param hri Region we're working on.
222    * @return whether znode is successfully transitioned to FAILED_OPEN state.
223    */
224   @Override
225   public boolean tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri,
226                                                       OpenRegionDetails ord) {
227     ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
228     boolean result = false;
229     final String name = hri.getRegionNameAsString();
230     try {
231       LOG.info("Opening of region " + hri + " failed, transitioning" +
232         " from OPENING to FAILED_OPEN in ZK, expecting version " + zkOrd.getVersion());
233       if (ZKAssign.transitionNode(
234         watcher, hri,
235         zkOrd.getServerName(),
236         EventType.RS_ZK_REGION_OPENING,
237         EventType.RS_ZK_REGION_FAILED_OPEN,
238         zkOrd.getVersion()) == -1) {
239         LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
240           "It's likely that the master already timed out this open " +
241           "attempt, and thus another RS already has the region.");
242       } else {
243         result = true;
244       }
245     } catch (KeeperException e) {
246       LOG.error("Failed transitioning node " + name +
247         " from OPENING to FAILED_OPEN", e);
248     }
249     return result;
250   }
251 
252   /**
253    * Parse ZK-related fields from request.
254    */
255   @Override
256   public OpenRegionCoordination.OpenRegionDetails parseFromProtoRequest(
257       AdminProtos.OpenRegionRequest.RegionOpenInfo regionOpenInfo) {
258     ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd =
259       new ZkOpenRegionCoordination.ZkOpenRegionDetails();
260 
261     int versionOfOfflineNode = -1;
262     if (regionOpenInfo.hasVersionOfOfflineNode()) {
263       versionOfOfflineNode = regionOpenInfo.getVersionOfOfflineNode();
264     }
265     zkCrd.setVersionOfOfflineNode(versionOfOfflineNode);
266     zkCrd.setServerName(coordination.getServer().getServerName());
267 
268     return zkCrd;
269   }
270 
271   /**
272    * No ZK tracking will be performed for that case.
273    * This method should be used when we want to construct CloseRegionDetails,
274    * but don't want any coordination on that (when it's initiated by regionserver),
275    * so no znode state transitions will be performed.
276    */
277   @Override
278   public OpenRegionCoordination.OpenRegionDetails getDetailsForNonCoordinatedOpening() {
279     ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd =
280       new ZkOpenRegionCoordination.ZkOpenRegionDetails();
281     zkCrd.setVersionOfOfflineNode(-1);
282     zkCrd.setServerName(coordination.getServer().getServerName());
283 
284     return zkCrd;
285   }
286 
287   //--------------------------
288   // HMaster-side operations
289   //--------------------------
290   @Override
291   public boolean commitOpenOnMasterSide(AssignmentManager assignmentManager,
292                                         HRegionInfo regionInfo,
293                                         OpenRegionDetails ord) {
294     boolean committedSuccessfully = true;
295 
296     // Code to defend against case where we get SPLIT before region open
297     // processing completes; temporary till we make SPLITs go via zk -- 0.92.
298     RegionState regionState = assignmentManager.getRegionStates()
299       .getRegionTransitionState(regionInfo.getEncodedName());
300     boolean openedNodeDeleted = false;
301     if (regionState != null && regionState.isOpened()) {
302       openedNodeDeleted = deleteOpenedNode(regionInfo, ord);
303       if (!openedNodeDeleted) {
304         LOG.error("Znode of region " + regionInfo.getShortNameToLog() + " could not be deleted.");
305       }
306     } else {
307       LOG.warn("Skipping the onlining of " + regionInfo.getShortNameToLog() +
308         " because regions is NOT in RIT -- presuming this is because it SPLIT");
309     }
310     if (!openedNodeDeleted) {
311       if (assignmentManager.getTableStateManager().isTableState(regionInfo.getTable(),
312           ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING)) {
313         debugLog(regionInfo, "Opened region "
314           + regionInfo.getShortNameToLog() + " but "
315           + "this table is disabled, triggering close of region");
316         committedSuccessfully = false;
317       }
318     }
319 
320     return committedSuccessfully;
321   }
322 
323   private boolean deleteOpenedNode(HRegionInfo regionInfo, OpenRegionDetails ord) {
324     ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
325     int expectedVersion = zkOrd.getVersion();
326 
327     debugLog(regionInfo, "Handling OPENED of " +
328       regionInfo.getShortNameToLog() + " from " + zkOrd.getServerName().toString() +
329       "; deleting unassigned node");
330     try {
331       // delete the opened znode only if the version matches.
332       return ZKAssign.deleteNode(this.coordination.getServer().getZooKeeper(),
333         regionInfo.getEncodedName(), EventType.RS_ZK_REGION_OPENED, expectedVersion);
334     } catch(KeeperException.NoNodeException e){
335       // Getting no node exception here means that already the region has been opened.
336       LOG.warn("The znode of the region " + regionInfo.getShortNameToLog() +
337         " would have already been deleted");
338       return false;
339     } catch (KeeperException e) {
340       this.coordination.getServer().abort("Error deleting OPENED node in ZK (" +
341         regionInfo.getRegionNameAsString() + ")", e);
342     }
343     return false;
344   }
345 
346   private void debugLog(HRegionInfo region, String string) {
347     if (region.isMetaTable()) {
348       LOG.info(string);
349     } else {
350       LOG.debug(string);
351     }
352   }
353 
354   // Additional classes and helper methods
355 
356   /**
357    * ZK-based implementation. Has details about whether the state transition should be
358    * reflected in ZK, as well as expected version of znode.
359    */
360   public static class ZkOpenRegionDetails implements OpenRegionCoordination.OpenRegionDetails {
361 
362     // We get version of our znode at start of open process and monitor it across
363     // the total open. We'll fail the open if someone hijacks our znode; we can
364     // tell this has happened if version is not as expected.
365     private volatile int version = -1;
366 
367     //version of the offline node that was set by the master
368     private volatile int versionOfOfflineNode = -1;
369 
370     /**
371      * Server name the handler is running on.
372      */
373     private ServerName serverName;
374 
375     public ZkOpenRegionDetails() {
376     }
377 
378     public ZkOpenRegionDetails(int versionOfOfflineNode) {
379       this.versionOfOfflineNode = versionOfOfflineNode;
380     }
381 
382     public int getVersionOfOfflineNode() {
383       return versionOfOfflineNode;
384     }
385 
386     public void setVersionOfOfflineNode(int versionOfOfflineNode) {
387       this.versionOfOfflineNode = versionOfOfflineNode;
388     }
389 
390     public int getVersion() {
391       return version;
392     }
393 
394     public void setVersion(int version) {
395       this.version = version;
396     }
397 
398     @Override
399     public ServerName getServerName() {
400       return serverName;
401     }
402 
403     @Override
404     public void setServerName(ServerName serverName) {
405       this.serverName = serverName;
406     }
407   }
408 
409   private boolean isRegionStillOpening(HRegionInfo regionInfo, RegionServerServices rsServices) {
410     byte[] encodedName = regionInfo.getEncodedNameAsBytes();
411     Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
412     return Boolean.TRUE.equals(action); // true means opening for RIT
413   }
414 }