View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.security.PrivilegedExceptionAction;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.concurrent.atomic.AtomicBoolean;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.MetaTableAccessor;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.TableNotDisabledException;
38  import org.apache.hadoop.hbase.TableNotFoundException;
39  import org.apache.hadoop.hbase.TableStateManager;
40  import org.apache.hadoop.hbase.classification.InterfaceAudience;
41  import org.apache.hadoop.hbase.exceptions.HBaseException;
42  import org.apache.hadoop.hbase.executor.EventType;
43  import org.apache.hadoop.hbase.master.AssignmentManager;
44  import org.apache.hadoop.hbase.master.BulkAssigner;
45  import org.apache.hadoop.hbase.master.GeneralBulkAssigner;
46  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
47  import org.apache.hadoop.hbase.master.MasterServices;
48  import org.apache.hadoop.hbase.master.RegionStates;
49  import org.apache.hadoop.hbase.master.ServerManager;
50  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
51  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
52  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
53  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.EnableTableState;
54  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
55  import org.apache.hadoop.hbase.util.Pair;
56  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
57  import org.apache.hadoop.security.UserGroupInformation;
58  
59  @InterfaceAudience.Private
60  public class EnableTableProcedure
61      extends StateMachineProcedure<MasterProcedureEnv, EnableTableState>
62      implements TableProcedureInterface {
63    private static final Log LOG = LogFactory.getLog(EnableTableProcedure.class);
64  
65    private final AtomicBoolean aborted = new AtomicBoolean(false);
66  
67    // This is for back compatible with 1.0 asynchronized operations.
68    private final ProcedurePrepareLatch syncLatch;
69  
70    private TableName tableName;
71    private boolean skipTableStateCheck;
72    private UserGroupInformation user;
73  
74    private Boolean traceEnabled = null;
75  
76    public EnableTableProcedure() {
77      syncLatch = null;
78    }
79  
80    /**
81     * Constructor
82     * @param env MasterProcedureEnv
83     * @param tableName the table to operate on
84     * @param skipTableStateCheck whether to check table state
85     */
86    public EnableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
87        final boolean skipTableStateCheck) {
88      this(env, tableName, skipTableStateCheck, null);
89    }
90  
91    /**
92     * Constructor
93     * @param env MasterProcedureEnv
94     * @param tableName the table to operate on
95     * @param skipTableStateCheck whether to check table state
96     */
97    public EnableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
98        final boolean skipTableStateCheck, final ProcedurePrepareLatch syncLatch) {
99      this.tableName = tableName;
100     this.skipTableStateCheck = skipTableStateCheck;
101     this.user = env.getRequestUser().getUGI();
102     this.setOwner(this.user.getShortUserName());
103 
104     // Compatible with 1.0: We use latch to make sure that this procedure implementation is
105     // compatible with 1.0 asynchronized operations. We need to lock the table and check
106     // whether the Enable operation could be performed (table exists and offline; table state
107     // is DISABLED). Once it is done, we are good to release the latch and the client can
108     // start asynchronously wait for the operation.
109     //
110     // Note: the member syncLatch could be null if we are in failover or recovery scenario.
111     // This is ok for backward compatible, as 1.0 client would not able to peek at procedure.
112     this.syncLatch = syncLatch;
113   }
114 
115   @Override
116   protected Flow executeFromState(final MasterProcedureEnv env, final EnableTableState state)
117       throws InterruptedException {
118     if (isTraceEnabled()) {
119       LOG.trace(this + " execute state=" + state);
120     }
121 
122     try {
123       switch (state) {
124       case ENABLE_TABLE_PREPARE:
125         if (prepareEnable(env)) {
126           setNextState(EnableTableState.ENABLE_TABLE_PRE_OPERATION);
127         } else {
128           assert isFailed() : "enable should have an exception here";
129           return Flow.NO_MORE_STATE;
130         }
131         break;
132       case ENABLE_TABLE_PRE_OPERATION:
133         preEnable(env, state);
134         setNextState(EnableTableState.ENABLE_TABLE_SET_ENABLING_TABLE_STATE);
135         break;
136       case ENABLE_TABLE_SET_ENABLING_TABLE_STATE:
137         setTableStateToEnabling(env, tableName);
138         setNextState(EnableTableState.ENABLE_TABLE_MARK_REGIONS_ONLINE);
139         break;
140       case ENABLE_TABLE_MARK_REGIONS_ONLINE:
141         markRegionsOnline(env, tableName, true);
142         setNextState(EnableTableState.ENABLE_TABLE_SET_ENABLED_TABLE_STATE);
143         break;
144       case ENABLE_TABLE_SET_ENABLED_TABLE_STATE:
145         setTableStateToEnabled(env, tableName);
146         setNextState(EnableTableState.ENABLE_TABLE_POST_OPERATION);
147         break;
148       case ENABLE_TABLE_POST_OPERATION:
149         postEnable(env, state);
150         return Flow.NO_MORE_STATE;
151       default:
152         throw new UnsupportedOperationException("unhandled state=" + state);
153       }
154     } catch (HBaseException|IOException e) {
155       LOG.error("Error trying to enable table=" + tableName + " state=" + state, e);
156       setFailure("master-enable-table", e);
157     }
158     return Flow.HAS_MORE_STATE;
159   }
160 
161   @Override
162   protected void rollbackState(final MasterProcedureEnv env, final EnableTableState state)
163       throws IOException {
164     if (isTraceEnabled()) {
165       LOG.trace(this + " rollback state=" + state);
166     }
167     try {
168       switch (state) {
169       case ENABLE_TABLE_POST_OPERATION:
170         // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.preDisable())?
171         break;
172       case ENABLE_TABLE_SET_ENABLED_TABLE_STATE:
173         DisableTableProcedure.setTableStateToDisabling(env, tableName);
174         break;
175       case ENABLE_TABLE_MARK_REGIONS_ONLINE:
176         markRegionsOfflineDuringRecovery(env);
177         break;
178       case ENABLE_TABLE_SET_ENABLING_TABLE_STATE:
179         DisableTableProcedure.setTableStateToDisabled(env, tableName);
180         break;
181       case ENABLE_TABLE_PRE_OPERATION:
182         // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.postDisable())?
183         break;
184       case ENABLE_TABLE_PREPARE:
185         // Nothing to undo for this state.
186         // We do need to count down the latch count so that we don't stuck.
187         ProcedurePrepareLatch.releaseLatch(syncLatch, this);
188         break;
189       default:
190         throw new UnsupportedOperationException("unhandled state=" + state);
191       }
192     } catch (HBaseException e) {
193       LOG.warn("Failed enable table rollback attempt step=" + state + " table=" + tableName, e);
194       throw new IOException(e);
195     } catch (IOException e) {
196       // This will be retried. Unless there is a bug in the code,
197       // this should be just a "temporary error" (e.g. network down)
198       LOG.warn("Failed enable table rollback attempt step=" + state + " table=" + tableName, e);
199       throw e;
200     }
201   }
202 
203   @Override
204   protected EnableTableState getState(final int stateId) {
205     return EnableTableState.valueOf(stateId);
206   }
207 
208   @Override
209   protected int getStateId(final EnableTableState state) {
210     return state.getNumber();
211   }
212 
213   @Override
214   protected EnableTableState getInitialState() {
215     return EnableTableState.ENABLE_TABLE_PREPARE;
216   }
217 
218   @Override
219   protected void setNextState(final EnableTableState state) {
220     if (aborted.get()) {
221       setAbortFailure("Enable-table", "abort requested");
222     } else {
223       super.setNextState(state);
224     }
225   }
226 
227   @Override
228   public boolean abort(final MasterProcedureEnv env) {
229     aborted.set(true);
230     return true;
231   }
232 
233   @Override
234   protected boolean acquireLock(final MasterProcedureEnv env) {
235     if (env.waitInitialized(this)) return false;
236     return env.getProcedureQueue().tryAcquireTableExclusiveLock(this, tableName);
237   }
238 
239   @Override
240   protected void releaseLock(final MasterProcedureEnv env) {
241     env.getProcedureQueue().releaseTableExclusiveLock(this, tableName);
242   }
243 
244   @Override
245   public void serializeStateData(final OutputStream stream) throws IOException {
246     super.serializeStateData(stream);
247 
248     MasterProcedureProtos.EnableTableStateData.Builder enableTableMsg =
249         MasterProcedureProtos.EnableTableStateData.newBuilder()
250             .setUserInfo(MasterProcedureUtil.toProtoUserInfo(user))
251             .setTableName(ProtobufUtil.toProtoTableName(tableName))
252             .setSkipTableStateCheck(skipTableStateCheck);
253 
254     enableTableMsg.build().writeDelimitedTo(stream);
255   }
256 
257   @Override
258   public void deserializeStateData(final InputStream stream) throws IOException {
259     super.deserializeStateData(stream);
260 
261     MasterProcedureProtos.EnableTableStateData enableTableMsg =
262         MasterProcedureProtos.EnableTableStateData.parseDelimitedFrom(stream);
263     user = MasterProcedureUtil.toUserInfo(enableTableMsg.getUserInfo());
264     tableName = ProtobufUtil.toTableName(enableTableMsg.getTableName());
265     skipTableStateCheck = enableTableMsg.getSkipTableStateCheck();
266   }
267 
268   @Override
269   public void toStringClassDetails(StringBuilder sb) {
270     sb.append(getClass().getSimpleName());
271     sb.append(" (table=");
272     sb.append(tableName);
273     sb.append(")");
274   }
275 
276   @Override
277   public TableName getTableName() {
278     return tableName;
279   }
280 
281   @Override
282   public TableOperationType getTableOperationType() {
283     return TableOperationType.ENABLE;
284   }
285 
286 
287   /**
288    * Action before any real action of enabling table. Set the exception in the procedure instead
289    * of throwing it.  This approach is to deal with backward compatible with 1.0.
290    * @param env MasterProcedureEnv
291    * @return whether the table passes the necessary checks
292    * @throws IOException
293    */
294   private boolean prepareEnable(final MasterProcedureEnv env) throws IOException {
295     boolean canTableBeEnabled = true;
296 
297     // Check whether table exists
298     if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) {
299       setFailure("master-enable-table", new TableNotFoundException(tableName));
300       canTableBeEnabled = false;
301     } else if (!skipTableStateCheck) {
302       // There could be multiple client requests trying to disable or enable
303       // the table at the same time. Ensure only the first request is honored
304       // After that, no other requests can be accepted until the table reaches
305       // DISABLED or ENABLED.
306       //
307       // Note: in 1.0 release, we called TableStateManager.setTableStateIfInStates() to set
308       // the state to ENABLING from DISABLED. The implementation was done before table lock
309       // was implemented. With table lock, there is no need to set the state here (it will
310       // set the state later on). A quick state check should be enough for us to move forward.
311       TableStateManager tsm = env.getMasterServices().getAssignmentManager().getTableStateManager();
312       if (!tsm.isTableState(tableName, ZooKeeperProtos.Table.State.DISABLED)) {
313         LOG.info("Table " + tableName + " isn't disabled; skipping enable");
314         setFailure("master-enable-table", new TableNotDisabledException(this.tableName));
315         canTableBeEnabled = false;
316       }
317     }
318 
319     // We are done the check. Future actions in this procedure could be done asynchronously.
320     ProcedurePrepareLatch.releaseLatch(syncLatch, this);
321 
322     return canTableBeEnabled;
323   }
324 
325   /**
326    * Action before enabling table.
327    * @param env MasterProcedureEnv
328    * @param state the procedure state
329    * @throws IOException
330    * @throws InterruptedException
331    */
332   private void preEnable(final MasterProcedureEnv env, final EnableTableState state)
333       throws IOException, InterruptedException {
334     runCoprocessorAction(env, state);
335   }
336 
337   /**
338    * Mark table state to Enabling
339    * @param env MasterProcedureEnv
340    * @param tableName the target table
341    * @throws IOException
342    */
343   protected static void setTableStateToEnabling(
344       final MasterProcedureEnv env,
345       final TableName tableName) throws HBaseException, IOException {
346     // Set table disabling flag up in zk.
347     LOG.info("Attempting to enable the table " + tableName);
348     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
349       tableName,
350       ZooKeeperProtos.Table.State.ENABLING);
351   }
352 
353   /**
354    * Mark offline regions of the table online with retry
355    * @param env MasterProcedureEnv
356    * @param tableName the target table
357    * @param retryRequired whether to retry if the first run failed
358    * @throws IOException
359    */
360   protected static void markRegionsOnline(
361       final MasterProcedureEnv env,
362       final TableName tableName,
363       final Boolean retryRequired) throws IOException {
364     // This is best effort approach to make all regions of a table online.  If we fail to do
365     // that, it is ok that the table has some offline regions; user can fix it manually.
366 
367     // Dev consideration: add a config to control max number of retry. For now, it is hard coded.
368     int maxTry = (retryRequired ? 10 : 1);
369     boolean done = false;
370 
371     do {
372       try {
373         done = markRegionsOnline(env, tableName);
374         if (done) {
375           break;
376         }
377         maxTry--;
378       } catch (Exception e) {
379         LOG.warn("Received exception while marking regions online. tries left: " + maxTry, e);
380         maxTry--;
381         if (maxTry > 0) {
382           continue; // we still have some retry left, try again.
383         }
384         throw e;
385       }
386     } while (maxTry > 0);
387 
388     if (!done) {
389       LOG.warn("Some or all regions of the Table '" + tableName + "' were offline");
390     }
391   }
392 
393   /**
394    * Mark offline regions of the table online
395    * @param env MasterProcedureEnv
396    * @param tableName the target table
397    * @return whether the operation is fully completed or being interrupted.
398    * @throws IOException
399    */
400   private static boolean markRegionsOnline(final MasterProcedureEnv env, final TableName tableName)
401       throws IOException {
402     final AssignmentManager assignmentManager = env.getMasterServices().getAssignmentManager();
403     final MasterServices masterServices = env.getMasterServices();
404     final ServerManager serverManager = masterServices.getServerManager();
405     boolean done = false;
406     // Get the regions of this table. We're done when all listed
407     // tables are onlined.
408     List<Pair<HRegionInfo, ServerName>> tableRegionsAndLocations;
409 
410     if (TableName.META_TABLE_NAME.equals(tableName)) {
411       tableRegionsAndLocations =
412           new MetaTableLocator().getMetaRegionsAndLocations(masterServices.getZooKeeper());
413     } else {
414       tableRegionsAndLocations =
415           MetaTableAccessor.getTableRegionsAndLocations(
416             masterServices.getZooKeeper(), masterServices.getConnection(), tableName, true);
417     }
418 
419     int countOfRegionsInTable = tableRegionsAndLocations.size();
420     Map<HRegionInfo, ServerName> regionsToAssign =
421         regionsToAssignWithServerName(env, tableRegionsAndLocations);
422 
423     // need to potentially create some regions for the replicas
424     List<HRegionInfo> unrecordedReplicas =
425         AssignmentManager.replicaRegionsNotRecordedInMeta(new HashSet<HRegionInfo>(
426             regionsToAssign.keySet()), masterServices);
427     Map<ServerName, List<HRegionInfo>> srvToUnassignedRegs =
428         assignmentManager.getBalancer().roundRobinAssignment(unrecordedReplicas,
429           serverManager.getOnlineServersList());
430     if (srvToUnassignedRegs != null) {
431       for (Map.Entry<ServerName, List<HRegionInfo>> entry : srvToUnassignedRegs.entrySet()) {
432         for (HRegionInfo h : entry.getValue()) {
433           regionsToAssign.put(h, entry.getKey());
434         }
435       }
436     }
437 
438     int offlineRegionsCount = regionsToAssign.size();
439 
440     LOG.info("Table '" + tableName + "' has " + countOfRegionsInTable + " regions, of which "
441         + offlineRegionsCount + " are offline.");
442     if (offlineRegionsCount == 0) {
443       return true;
444     }
445 
446     List<ServerName> onlineServers = serverManager.createDestinationServersList();
447     Map<ServerName, List<HRegionInfo>> bulkPlan =
448         env.getMasterServices().getAssignmentManager().getBalancer()
449             .retainAssignment(regionsToAssign, onlineServers);
450     if (bulkPlan != null) {
451       LOG.info("Bulk assigning " + offlineRegionsCount + " region(s) across " + bulkPlan.size()
452           + " server(s), retainAssignment=true");
453 
454       BulkAssigner ba = new GeneralBulkAssigner(masterServices, bulkPlan, assignmentManager, true);
455       try {
456         if (ba.bulkAssign()) {
457           done = true;
458         }
459       } catch (InterruptedException e) {
460         LOG.warn("Enable operation was interrupted when enabling table '" + tableName + "'");
461         // Preserve the interrupt.
462         Thread.currentThread().interrupt();
463       }
464     } else {
465       LOG.info("Balancer was unable to find suitable servers for table " + tableName
466           + ", leaving unassigned");
467     }
468     return done;
469   }
470 
471   /**
472    * Mark regions of the table offline during recovery
473    * @param env MasterProcedureEnv
474    */
475   private void markRegionsOfflineDuringRecovery(final MasterProcedureEnv env) {
476     try {
477       // This is a best effort attempt. We will move on even it does not succeed. We will retry
478       // several times until we giving up.
479       DisableTableProcedure.markRegionsOffline(env, tableName, true);
480     } catch (Exception e) {
481       LOG.debug("Failed to offline all regions of table " + tableName + ". Ignoring", e);
482     }
483   }
484 
485   /**
486    * Mark table state to Enabled
487    * @param env MasterProcedureEnv
488    * @throws IOException
489    */
490   protected static void setTableStateToEnabled(
491       final MasterProcedureEnv env,
492       final TableName tableName) throws HBaseException, IOException {
493     // Flip the table to Enabled
494     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
495       tableName,
496       ZooKeeperProtos.Table.State.ENABLED);
497     LOG.info("Table '" + tableName + "' was successfully enabled.");
498   }
499 
500   /**
501    * Action after enabling table.
502    * @param env MasterProcedureEnv
503    * @param state the procedure state
504    * @throws IOException
505    * @throws InterruptedException
506    */
507   private void postEnable(final MasterProcedureEnv env, final EnableTableState state)
508       throws IOException, InterruptedException {
509     runCoprocessorAction(env, state);
510   }
511 
512   /**
513    * The procedure could be restarted from a different machine. If the variable is null, we need to
514    * retrieve it.
515    * @return traceEnabled
516    */
517   private Boolean isTraceEnabled() {
518     if (traceEnabled == null) {
519       traceEnabled = LOG.isTraceEnabled();
520     }
521     return traceEnabled;
522   }
523 
524   /**
525    * @param regionsInMeta
526    * @return List of regions neither in transition nor assigned.
527    * @throws IOException
528    */
529   private static Map<HRegionInfo, ServerName> regionsToAssignWithServerName(
530       final MasterProcedureEnv env,
531       final List<Pair<HRegionInfo, ServerName>> regionsInMeta) throws IOException {
532     Map<HRegionInfo, ServerName> regionsToAssign =
533         new HashMap<HRegionInfo, ServerName>(regionsInMeta.size());
534     RegionStates regionStates = env.getMasterServices().getAssignmentManager().getRegionStates();
535     for (Pair<HRegionInfo, ServerName> regionLocation : regionsInMeta) {
536       HRegionInfo hri = regionLocation.getFirst();
537       ServerName sn = regionLocation.getSecond();
538       if (regionStates.isRegionOffline(hri)) {
539         regionsToAssign.put(hri, sn);
540       } else {
541         if (LOG.isDebugEnabled()) {
542           LOG.debug("Skipping assign for the region " + hri + " during enable table "
543               + hri.getTable() + " because its already in tranition or assigned.");
544         }
545       }
546     }
547     return regionsToAssign;
548   }
549 
550   /**
551    * Coprocessor Action.
552    * @param env MasterProcedureEnv
553    * @param state the procedure state
554    * @throws IOException
555    * @throws InterruptedException
556    */
557   private void runCoprocessorAction(final MasterProcedureEnv env, final EnableTableState state)
558       throws IOException, InterruptedException {
559     final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
560     if (cpHost != null) {
561       user.doAs(new PrivilegedExceptionAction<Void>() {
562         @Override
563         public Void run() throws Exception {
564           switch (state) {
565           case ENABLE_TABLE_PRE_OPERATION:
566             cpHost.preEnableTableHandler(getTableName());
567             break;
568           case ENABLE_TABLE_POST_OPERATION:
569             cpHost.postEnableTableHandler(getTableName());
570             break;
571           default:
572             throw new UnsupportedOperationException(this + " unhandled state=" + state);
573           }
574           return null;
575         }
576       });
577     }
578   }
579 }