View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.replication;
20  
21  import java.io.IOException;
22  import java.util.ArrayList;
23  import java.util.Collections;
24  import java.util.List;
25  import java.util.UUID;
26  
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.hbase.Abortable;
31  import org.apache.hadoop.hbase.ServerName;
32  import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
33  import org.apache.hadoop.hbase.zookeeper.ZKUtil;
34  import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
35  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
36  import org.apache.zookeeper.KeeperException;
37  import org.apache.zookeeper.KeeperException.AuthFailedException;
38  import org.apache.zookeeper.KeeperException.ConnectionLossException;
39  import org.apache.zookeeper.KeeperException.SessionExpiredException;
40  
41  /**
42   * A {@link BaseReplicationEndpoint} for replication endpoints whose
43   * target cluster is an HBase cluster.
44   */
45  @InterfaceAudience.Private
46  @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="MT_CORRECTNESS",
47    justification="Thinks zkw needs to be synchronized access but should be fine as is.")
48  public abstract class HBaseReplicationEndpoint extends BaseReplicationEndpoint
49    implements Abortable {
50  
51    private static final Log LOG = LogFactory.getLog(HBaseReplicationEndpoint.class);
52  
53    private ZooKeeperWatcher zkw = null; // FindBugs: MT_CORRECTNESS
54  
55    private List<ServerName> regionServers = new ArrayList<ServerName>(0);
56    private long lastRegionServerUpdate;
57  
58    protected void disconnect() {
59      if (zkw != null) {
60        zkw.close();
61      }
62    }
63  
64    /**
65     * A private method used to re-establish a zookeeper session with a peer cluster.
66     * @param ke
67     */
68    protected void reconnect(KeeperException ke) {
69      if (ke instanceof ConnectionLossException || ke instanceof SessionExpiredException
70          || ke instanceof AuthFailedException) {
71        String clusterKey = ctx.getPeerConfig().getClusterKey();
72        LOG.warn("Lost the ZooKeeper connection for peer " + clusterKey, ke);
73        try {
74          reloadZkWatcher();
75        } catch (IOException io) {
76          LOG.warn("Creation of ZookeeperWatcher failed for peer " + clusterKey, io);
77        }
78      }
79    }
80  
81    @Override
82    protected void doStart() {
83      try {
84        reloadZkWatcher();
85        notifyStarted();
86      } catch (IOException e) {
87        notifyFailed(e);
88      }
89    }
90  
91    @Override
92    protected void doStop() {
93      disconnect();
94      notifyStopped();
95    }
96  
97    @Override
98    // Synchronize peer cluster connection attempts to avoid races and rate
99    // limit connections when multiple replication sources try to connect to
100   // the peer cluster. If the peer cluster is down we can get out of control
101   // over time.
102   public synchronized UUID getPeerUUID() {
103     UUID peerUUID = null;
104     try {
105       peerUUID = ZKClusterId.getUUIDForCluster(zkw);
106     } catch (KeeperException ke) {
107       reconnect(ke);
108     }
109     return peerUUID;
110   }
111 
112   /**
113    * Get the ZK connection to this peer
114    * @return zk connection
115    */
116   protected ZooKeeperWatcher getZkw() {
117     return zkw;
118   }
119 
120   /**
121    * Closes the current ZKW (if not null) and creates a new one
122    * @throws IOException If anything goes wrong connecting
123    */
124   void reloadZkWatcher() throws IOException {
125     if (zkw != null) zkw.close();
126     zkw = new ZooKeeperWatcher(ctx.getConfiguration(),
127         "connection to cluster: " + ctx.getPeerId(), this);
128     getZkw().registerListener(new PeerRegionServerListener(this));
129   }
130 
131   @Override
132   public void abort(String why, Throwable e) {
133     LOG.error("The HBaseReplicationEndpoint corresponding to peer " + ctx.getPeerId()
134         + " was aborted for the following reason(s):" + why, e);
135   }
136 
137   @Override
138   public boolean isAborted() {
139     // Currently this is never "Aborted", we just log when the abort method is called.
140     return false;
141   }
142 
143   /**
144    * Get the list of all the region servers from the specified peer
145    * @param zkw zk connection to use
146    * @return list of region server addresses or an empty list if the slave is unavailable
147    */
148   protected static List<ServerName> fetchSlavesAddresses(ZooKeeperWatcher zkw)
149       throws KeeperException {
150     List<String> children = ZKUtil.listChildrenAndWatchForNewChildren(zkw, zkw.rsZNode);
151     if (children == null) {
152       return Collections.emptyList();
153     }
154     List<ServerName> addresses = new ArrayList<ServerName>(children.size());
155     for (String child : children) {
156       addresses.add(ServerName.parseServerName(child));
157     }
158     return addresses;
159   }
160 
161   /**
162    * Get a list of all the addresses of all the region servers
163    * for this peer cluster
164    * @return list of addresses
165    */
166   // Synchronize peer cluster connection attempts to avoid races and rate
167   // limit connections when multiple replication sources try to connect to
168   // the peer cluster. If the peer cluster is down we can get out of control
169   // over time.
170   public synchronized List<ServerName> getRegionServers() {
171     try {
172       setRegionServers(fetchSlavesAddresses(this.getZkw()));
173     } catch (KeeperException ke) {
174       if (LOG.isDebugEnabled()) {
175         LOG.debug("Fetch slaves addresses failed", ke);
176       }
177       reconnect(ke);
178     }
179     return regionServers;
180   }
181 
182   /**
183    * Set the list of region servers for that peer
184    * @param regionServers list of addresses for the region servers
185    */
186   public synchronized void setRegionServers(List<ServerName> regionServers) {
187     this.regionServers = regionServers;
188     lastRegionServerUpdate = System.currentTimeMillis();
189   }
190 
191   /**
192    * Get the timestamp at which the last change occurred to the list of region servers to replicate
193    * to.
194    * @return The System.currentTimeMillis at the last time the list of peer region servers changed.
195    */
196   public long getLastRegionServerUpdate() {
197     return lastRegionServerUpdate;
198   }
199 
200   /**
201    * Tracks changes to the list of region servers in a peer's cluster.
202    */
203   public static class PeerRegionServerListener extends ZooKeeperListener {
204 
205     private final HBaseReplicationEndpoint replicationEndpoint;
206     private final String regionServerListNode;
207 
208     public PeerRegionServerListener(HBaseReplicationEndpoint replicationPeer) {
209       super(replicationPeer.getZkw());
210       this.replicationEndpoint = replicationPeer;
211       this.regionServerListNode = replicationEndpoint.getZkw().rsZNode;
212     }
213 
214     @Override
215     public synchronized void nodeChildrenChanged(String path) {
216       if (path.equals(regionServerListNode)) {
217         try {
218           LOG.info("Detected change to peer region servers, fetching updated list");
219           replicationEndpoint.setRegionServers(fetchSlavesAddresses(replicationEndpoint.getZkw()));
220         } catch (KeeperException e) {
221           LOG.error("Error reading slave addresses", e);
222         }
223       }
224     }
225   }
226 }