1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18 package org.apache.hadoop.hbase.coordination;
19
20 import org.apache.commons.logging.Log;
21 import org.apache.commons.logging.LogFactory;
22 import org.apache.hadoop.hbase.classification.InterfaceAudience;
23 import org.apache.hadoop.hbase.CoordinatedStateManager;
24 import org.apache.hadoop.hbase.HRegionInfo;
25 import org.apache.hadoop.hbase.ServerName;
26 import org.apache.hadoop.hbase.executor.EventType;
27 import org.apache.hadoop.hbase.master.AssignmentManager;
28 import org.apache.hadoop.hbase.master.RegionState;
29 import org.apache.hadoop.hbase.protobuf.generated.AdminProtos;
30 import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
31 import org.apache.hadoop.hbase.regionserver.HRegion;
32 import org.apache.hadoop.hbase.regionserver.RegionServerServices;
33 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
34 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
35 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
36 import org.apache.zookeeper.KeeperException;
37
38 import java.io.IOException;
39
40
41
42
43 @InterfaceAudience.Private
44 public class ZkOpenRegionCoordination implements OpenRegionCoordination {
45 private static final Log LOG = LogFactory.getLog(ZkOpenRegionCoordination.class);
46
47 private CoordinatedStateManager coordination;
48 private final ZooKeeperWatcher watcher;
49
50 public ZkOpenRegionCoordination(CoordinatedStateManager coordination,
51 ZooKeeperWatcher watcher) {
52 this.coordination = coordination;
53 this.watcher = watcher;
54 }
55
56
57
58
59
60
61
62
63
64
65 @Override
66 public boolean transitionToOpened(final HRegion r, OpenRegionDetails ord) throws IOException {
67 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
68
69 boolean result = false;
70 HRegionInfo hri = r.getRegionInfo();
71 final String name = hri.getRegionNameAsString();
72
73 try {
74 if (ZKAssign.transitionNodeOpened(watcher, hri,
75 zkOrd.getServerName(), zkOrd.getVersion()) == -1) {
76 String warnMsg = "Completed the OPEN of region " + name +
77 " but when transitioning from " + " OPENING to OPENED ";
78 try {
79 String node = ZKAssign.getNodeName(watcher, hri.getEncodedName());
80 if (ZKUtil.checkExists(watcher, node) < 0) {
81
82 coordination.getServer().abort(warnMsg + "the znode disappeared", null);
83 } else {
84 LOG.warn(warnMsg + "got a version mismatch, someone else clashed; " +
85 "so now unassigning -- closing region on server: " + zkOrd.getServerName());
86 }
87 } catch (KeeperException ke) {
88 coordination.getServer().abort(warnMsg, ke);
89 }
90 } else {
91 LOG.debug("Transitioned " + r.getRegionInfo().getEncodedName() +
92 " to OPENED in zk on " + zkOrd.getServerName());
93 result = true;
94 }
95 } catch (KeeperException e) {
96 LOG.error("Failed transitioning node " + name +
97 " from OPENING to OPENED -- closing region", e);
98 }
99 return result;
100 }
101
102
103
104
105
106
107
108
109
110 @Override
111 public boolean transitionFromOfflineToOpening(HRegionInfo regionInfo,
112 OpenRegionDetails ord) {
113 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
114
115
116 final String encodedName = regionInfo.getEncodedName();
117
118
119 try {
120
121 zkOrd.setVersion(ZKAssign.transitionNode(watcher, regionInfo,
122 zkOrd.getServerName(), EventType.M_ZK_REGION_OFFLINE,
123 EventType.RS_ZK_REGION_OPENING, zkOrd.getVersionOfOfflineNode()));
124 } catch (KeeperException e) {
125 LOG.error("Error transition from OFFLINE to OPENING for region=" +
126 encodedName, e);
127 zkOrd.setVersion(-1);
128 return false;
129 }
130 boolean b = isGoodVersion(zkOrd);
131 if (!b) {
132 LOG.warn("Failed transition from OFFLINE to OPENING for region=" +
133 encodedName);
134 }
135 return b;
136 }
137
138
139
140
141
142
143
144
145
146
147
148 @Override
149 public boolean tickleOpening(OpenRegionDetails ord, HRegionInfo regionInfo,
150 RegionServerServices rsServices, final String context) {
151 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
152 if (!isRegionStillOpening(regionInfo, rsServices)) {
153 LOG.warn("Open region aborted since it isn't opening any more");
154 return false;
155 }
156
157 if (!isGoodVersion(zkOrd)) return false;
158 String encodedName = regionInfo.getEncodedName();
159 try {
160 zkOrd.setVersion(ZKAssign.confirmNodeOpening(watcher,
161 regionInfo, zkOrd.getServerName(), zkOrd.getVersion()));
162 } catch (KeeperException e) {
163 coordination.getServer().abort("Exception refreshing OPENING; region=" + encodedName +
164 ", context=" + context, e);
165 zkOrd.setVersion(-1);
166 return false;
167 }
168 boolean b = isGoodVersion(zkOrd);
169 if (!b) {
170 LOG.warn("Failed refreshing OPENING; region=" + encodedName +
171 ", context=" + context);
172 }
173 return b;
174 }
175
176
177
178
179
180
181
182
183
184
185
186 @Override
187 public boolean tryTransitionFromOfflineToFailedOpen(RegionServerServices rsServices,
188 final HRegionInfo hri,
189 OpenRegionDetails ord) {
190 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
191 boolean result = false;
192 final String name = hri.getRegionNameAsString();
193 try {
194 LOG.info("Opening of region " + hri + " failed, transitioning" +
195 " from OFFLINE to FAILED_OPEN in ZK, expecting version " +
196 zkOrd.getVersionOfOfflineNode());
197 if (ZKAssign.transitionNode(
198 rsServices.getZooKeeper(), hri,
199 rsServices.getServerName(),
200 EventType.M_ZK_REGION_OFFLINE,
201 EventType.RS_ZK_REGION_FAILED_OPEN,
202 zkOrd.getVersionOfOfflineNode()) == -1) {
203 LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
204 "It's likely that the master already timed out this open " +
205 "attempt, and thus another RS already has the region.");
206 } else {
207 result = true;
208 }
209 } catch (KeeperException e) {
210 LOG.error("Failed transitioning node " + name + " from OFFLINE to FAILED_OPEN", e);
211 }
212 return result;
213 }
214
215 private boolean isGoodVersion(ZkOpenRegionDetails zkOrd) {
216 return zkOrd.getVersion() != -1;
217 }
218
219
220
221
222
223
224 @Override
225 public boolean tryTransitionFromOpeningToFailedOpen(final HRegionInfo hri,
226 OpenRegionDetails ord) {
227 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
228 boolean result = false;
229 final String name = hri.getRegionNameAsString();
230 try {
231 LOG.info("Opening of region " + hri + " failed, transitioning" +
232 " from OPENING to FAILED_OPEN in ZK, expecting version " + zkOrd.getVersion());
233 if (ZKAssign.transitionNode(
234 watcher, hri,
235 zkOrd.getServerName(),
236 EventType.RS_ZK_REGION_OPENING,
237 EventType.RS_ZK_REGION_FAILED_OPEN,
238 zkOrd.getVersion()) == -1) {
239 LOG.warn("Unable to mark region " + hri + " as FAILED_OPEN. " +
240 "It's likely that the master already timed out this open " +
241 "attempt, and thus another RS already has the region.");
242 } else {
243 result = true;
244 }
245 } catch (KeeperException e) {
246 LOG.error("Failed transitioning node " + name +
247 " from OPENING to FAILED_OPEN", e);
248 }
249 return result;
250 }
251
252
253
254
255 @Override
256 public OpenRegionCoordination.OpenRegionDetails parseFromProtoRequest(
257 AdminProtos.OpenRegionRequest.RegionOpenInfo regionOpenInfo) {
258 ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd =
259 new ZkOpenRegionCoordination.ZkOpenRegionDetails();
260
261 int versionOfOfflineNode = -1;
262 if (regionOpenInfo.hasVersionOfOfflineNode()) {
263 versionOfOfflineNode = regionOpenInfo.getVersionOfOfflineNode();
264 }
265 zkCrd.setVersionOfOfflineNode(versionOfOfflineNode);
266 zkCrd.setServerName(coordination.getServer().getServerName());
267
268 return zkCrd;
269 }
270
271
272
273
274
275
276
277 @Override
278 public OpenRegionCoordination.OpenRegionDetails getDetailsForNonCoordinatedOpening() {
279 ZkOpenRegionCoordination.ZkOpenRegionDetails zkCrd =
280 new ZkOpenRegionCoordination.ZkOpenRegionDetails();
281 zkCrd.setVersionOfOfflineNode(-1);
282 zkCrd.setServerName(coordination.getServer().getServerName());
283
284 return zkCrd;
285 }
286
287
288
289
290 @Override
291 public boolean commitOpenOnMasterSide(AssignmentManager assignmentManager,
292 HRegionInfo regionInfo,
293 OpenRegionDetails ord) {
294 boolean committedSuccessfully = true;
295
296
297
298 RegionState regionState = assignmentManager.getRegionStates()
299 .getRegionTransitionState(regionInfo.getEncodedName());
300 boolean openedNodeDeleted = false;
301 if (regionState != null && regionState.isOpened()) {
302 openedNodeDeleted = deleteOpenedNode(regionInfo, ord);
303 if (!openedNodeDeleted) {
304 LOG.error("Znode of region " + regionInfo.getShortNameToLog() + " could not be deleted.");
305 }
306 } else {
307 LOG.warn("Skipping the onlining of " + regionInfo.getShortNameToLog() +
308 " because regions is NOT in RIT -- presuming this is because it SPLIT");
309 }
310 if (!openedNodeDeleted) {
311 if (assignmentManager.getTableStateManager().isTableState(regionInfo.getTable(),
312 ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING)) {
313 debugLog(regionInfo, "Opened region "
314 + regionInfo.getShortNameToLog() + " but "
315 + "this table is disabled, triggering close of region");
316 committedSuccessfully = false;
317 }
318 }
319
320 return committedSuccessfully;
321 }
322
323 private boolean deleteOpenedNode(HRegionInfo regionInfo, OpenRegionDetails ord) {
324 ZkOpenRegionDetails zkOrd = (ZkOpenRegionDetails) ord;
325 int expectedVersion = zkOrd.getVersion();
326
327 debugLog(regionInfo, "Handling OPENED of " +
328 regionInfo.getShortNameToLog() + " from " + zkOrd.getServerName().toString() +
329 "; deleting unassigned node");
330 try {
331
332 return ZKAssign.deleteNode(this.coordination.getServer().getZooKeeper(),
333 regionInfo.getEncodedName(), EventType.RS_ZK_REGION_OPENED, expectedVersion);
334 } catch(KeeperException.NoNodeException e){
335
336 LOG.warn("The znode of the region " + regionInfo.getShortNameToLog() +
337 " would have already been deleted");
338 return false;
339 } catch (KeeperException e) {
340 this.coordination.getServer().abort("Error deleting OPENED node in ZK (" +
341 regionInfo.getRegionNameAsString() + ")", e);
342 }
343 return false;
344 }
345
346 private void debugLog(HRegionInfo region, String string) {
347 if (region.isMetaTable()) {
348 LOG.info(string);
349 } else {
350 LOG.debug(string);
351 }
352 }
353
354
355
356
357
358
359
360 public static class ZkOpenRegionDetails implements OpenRegionCoordination.OpenRegionDetails {
361
362
363
364
365 private volatile int version = -1;
366
367
368 private volatile int versionOfOfflineNode = -1;
369
370
371
372
373 private ServerName serverName;
374
375 public ZkOpenRegionDetails() {
376 }
377
378 public ZkOpenRegionDetails(int versionOfOfflineNode) {
379 this.versionOfOfflineNode = versionOfOfflineNode;
380 }
381
382 public int getVersionOfOfflineNode() {
383 return versionOfOfflineNode;
384 }
385
386 public void setVersionOfOfflineNode(int versionOfOfflineNode) {
387 this.versionOfOfflineNode = versionOfOfflineNode;
388 }
389
390 public int getVersion() {
391 return version;
392 }
393
394 public void setVersion(int version) {
395 this.version = version;
396 }
397
398 @Override
399 public ServerName getServerName() {
400 return serverName;
401 }
402
403 @Override
404 public void setServerName(ServerName serverName) {
405 this.serverName = serverName;
406 }
407 }
408
409 private boolean isRegionStillOpening(HRegionInfo regionInfo, RegionServerServices rsServices) {
410 byte[] encodedName = regionInfo.getEncodedNameAsBytes();
411 Boolean action = rsServices.getRegionsInTransitionInRS().get(encodedName);
412 return Boolean.TRUE.equals(action);
413 }
414 }