View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  package org.apache.hadoop.hbase.coordination;
21  
22  import java.io.IOException;
23  import java.io.InterruptedIOException;
24  import java.util.Set;
25  import java.util.concurrent.ConcurrentMap;
26  
27  import org.apache.hadoop.hbase.classification.InterfaceAudience;
28  import org.apache.hadoop.hbase.HRegionInfo;
29  import org.apache.hadoop.hbase.ServerName;
30  import org.apache.hadoop.hbase.master.MasterServices;
31  import org.apache.hadoop.hbase.master.SplitLogManager.ResubmitDirective;
32  import org.apache.hadoop.hbase.master.SplitLogManager.Task;
33  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode;
34  
35  import com.google.common.annotations.VisibleForTesting;
36  
37  /**
38   * Coordination for SplitLogManager. It creates and works with tasks for split log operations<BR>
39   * Manager prepares task by calling {@link #prepareTask} and submit it by
40   * {@link #submitTask(String)}. After that it periodically check the number of remaining tasks by
41   * {@link #remainingTasksInCoordination()} and waits until it become zero.
42   * <P>
43   * Methods required for task life circle: <BR>
44   * {@link #markRegionsRecovering(ServerName, Set)} mark regions for log replaying. Used by
45   * {@link org.apache.hadoop.hbase.master.MasterFileSystem} <BR>
46   * {@link #removeRecoveringRegions(Set, Boolean)} make regions cleanup that previous were marked as
47   * recovering. Called after all tasks processed <BR>
48   * {@link #removeStaleRecoveringRegions(Set)} remove stale recovering. called by
49   * {@link org.apache.hadoop.hbase.master.MasterFileSystem} after Active Master is initialized <BR>
50   * {@link #getLastRecoveryTime()} required for garbage collector and should indicate when the last
51   * recovery has been made<BR>
52   * {@link #checkTaskStillAvailable(String)} Check that task is still there <BR>
53   * {@link #checkTasks()} check for unassigned tasks and resubmit them
54   */
55  @InterfaceAudience.Private
56  public interface SplitLogManagerCoordination {
57  
58    /**
59     * Detail class that shares data between coordination and split log manager
60     */
61    public static class SplitLogManagerDetails {
62      final private ConcurrentMap<String, Task> tasks;
63      final private MasterServices master;
64      final private Set<String> failedDeletions;
65      final private ServerName serverName;
66  
67      public SplitLogManagerDetails(ConcurrentMap<String, Task> tasks, MasterServices master,
68          Set<String> failedDeletions, ServerName serverName) {
69        this.tasks = tasks;
70        this.master = master;
71        this.failedDeletions = failedDeletions;
72        this.serverName = serverName;
73      }
74  
75      /**
76       * @return the master value
77       */
78      public MasterServices getMaster() {
79        return master;
80      }
81  
82      /**
83       * @return map of tasks
84       */
85      public ConcurrentMap<String, Task> getTasks() {
86        return tasks;
87      }
88  
89      /**
90       * @return a set of failed deletions
91       */
92      public Set<String> getFailedDeletions() {
93        return failedDeletions;
94      }
95  
96      /**
97       * @return server name
98       */
99      public ServerName getServerName() {
100       return serverName;
101     }
102   }
103 
104   /**
105    * Provide the configuration from the SplitLogManager
106    */
107   void setDetails(SplitLogManagerDetails details);
108 
109   /**
110    * Returns the configuration that was provided previously
111    */
112   SplitLogManagerDetails getDetails();
113 
114   /**
115    * Prepare the new task
116    * @param taskName name of the task
117    * @return the task id
118    */
119   String prepareTask(String taskName);
120 
121   /**
122    * Mark regions in recovering state for distributed log replay
123    * @param serverName server name
124    * @param userRegions set of regions to be marked
125    * @throws IOException in case of failure
126    * @throws InterruptedIOException
127    */
128   void markRegionsRecovering(final ServerName serverName, Set<HRegionInfo> userRegions)
129       throws IOException, InterruptedIOException;
130 
131   /**
132    * tells Coordination that it should check for new tasks
133    */
134   void checkTasks();
135 
136   /**
137    * It removes recovering regions from Coordination
138    * @param serverNames servers which are just recovered
139    * @param isMetaRecovery whether current recovery is for the meta region on
140    *          <code>serverNames</code>
141    */
142   void removeRecoveringRegions(Set<String> serverNames, Boolean isMetaRecovery) throws IOException;
143 
144   /**
145    * Return the number of remaining tasks
146    */
147   int remainingTasksInCoordination();
148 
149   /**
150    * Check that the task is still there
151    * @param task node to check
152    */
153   void checkTaskStillAvailable(String task);
154 
155   /**
156    * Change the recovery mode.
157    * @param b the recovery mode state
158    * @throws InterruptedIOException
159    * @throws IOException in case of failure
160    */
161   void setRecoveryMode(boolean b) throws InterruptedIOException, IOException;
162 
163   /**
164    * Removes known stale servers
165    * @param knownServers set of previously failed servers
166    * @throws IOException in case of failure
167    * @throws InterruptedIOException
168    */
169   void removeStaleRecoveringRegions(Set<String> knownServers) throws IOException,
170       InterruptedIOException;
171 
172   /**
173    * Resubmit the task in case if found unassigned or failed
174    * @param taskName path related to task
175    * @param task to resubmit
176    * @param force whether it should be forced
177    * @return whether it was successful
178    */
179 
180   boolean resubmitTask(String taskName, Task task, ResubmitDirective force);
181 
182   /**
183    * @param taskName to be submitted
184    */
185   void submitTask(String taskName);
186 
187   /**
188    * @param taskName to be removed
189    */
190   void deleteTask(String taskName);
191 
192   /**
193    * @return shows whether the log recovery mode is in replaying state
194    */
195   boolean isReplaying();
196 
197   /**
198    * @return shows whether the log recovery mode is in splitting state
199    */
200   boolean isSplitting();
201 
202   /**
203    * @return the time of last attempt to recover
204    */
205   long getLastRecoveryTime();
206 
207   /**
208    * Temporary function, mostly for UTs. In the regular code isReplaying or isSplitting should be
209    * used.
210    * @return the current log recovery mode.
211    */
212   RecoveryMode getRecoveryMode();
213 
214   /**
215    * Support method to init constants such as timeout. Mostly required for UTs.
216    * @throws IOException
217    */
218   @VisibleForTesting
219   void init() throws IOException;
220 }