View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.security.PrivilegedExceptionAction;
25  import java.util.List;
26  import java.util.concurrent.ExecutorService;
27  import java.util.concurrent.atomic.AtomicBoolean;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.hbase.HRegionInfo;
32  import org.apache.hadoop.hbase.MetaTableAccessor;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.TableNotEnabledException;
35  import org.apache.hadoop.hbase.TableNotFoundException;
36  import org.apache.hadoop.hbase.TableStateManager;
37  import org.apache.hadoop.hbase.classification.InterfaceAudience;
38  import org.apache.hadoop.hbase.constraint.ConstraintException;
39  import org.apache.hadoop.hbase.exceptions.HBaseException;
40  import org.apache.hadoop.hbase.master.AssignmentManager;
41  import org.apache.hadoop.hbase.master.BulkAssigner;
42  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
43  import org.apache.hadoop.hbase.master.RegionState;
44  import org.apache.hadoop.hbase.master.RegionStates;
45  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
46  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
47  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
48  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
49  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
50  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
51  import org.apache.hadoop.security.UserGroupInformation;
52  import org.apache.htrace.Trace;
53  
54  @InterfaceAudience.Private
55  public class DisableTableProcedure
56      extends StateMachineProcedure<MasterProcedureEnv, DisableTableState>
57      implements TableProcedureInterface {
58    private static final Log LOG = LogFactory.getLog(DisableTableProcedure.class);
59  
60    private final AtomicBoolean aborted = new AtomicBoolean(false);
61  
62    // This is for back compatible with 1.0 asynchronized operations.
63    private final ProcedurePrepareLatch syncLatch;
64  
65    private TableName tableName;
66    private boolean skipTableStateCheck;
67    private UserGroupInformation user;
68  
69    private Boolean traceEnabled = null;
70  
71    enum MarkRegionOfflineOpResult {
72      MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL,
73      BULK_ASSIGN_REGIONS_FAILED,
74      MARK_ALL_REGIONS_OFFLINE_INTERRUPTED,
75    }
76  
77    public DisableTableProcedure() {
78      syncLatch = null;
79    }
80  
81    /**
82     * Constructor
83     * @param env MasterProcedureEnv
84     * @param tableName the table to operate on
85     * @param skipTableStateCheck whether to check table state
86     */
87    public DisableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
88        final boolean skipTableStateCheck) {
89      this(env, tableName, skipTableStateCheck, null);
90    }
91  
92    /**
93     * Constructor
94     * @param env MasterProcedureEnv
95     * @param tableName the table to operate on
96     * @param skipTableStateCheck whether to check table state
97     */
98    public DisableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
99        final boolean skipTableStateCheck, final ProcedurePrepareLatch syncLatch) {
100     this.tableName = tableName;
101     this.skipTableStateCheck = skipTableStateCheck;
102     this.user = env.getRequestUser().getUGI();
103     this.setOwner(this.user.getShortUserName());
104 
105     // Compatible with 1.0: We use latch to make sure that this procedure implementation is
106     // compatible with 1.0 asynchronized operations. We need to lock the table and check
107     // whether the Disable operation could be performed (table exists and online; table state
108     // is ENABLED). Once it is done, we are good to release the latch and the client can
109     // start asynchronously wait for the operation.
110     //
111     // Note: the member syncLatch could be null if we are in failover or recovery scenario.
112     // This is ok for backward compatible, as 1.0 client would not able to peek at procedure.
113     this.syncLatch = syncLatch;
114   }
115 
116   @Override
117   protected Flow executeFromState(final MasterProcedureEnv env, final DisableTableState state)
118       throws InterruptedException {
119     if (isTraceEnabled()) {
120       LOG.trace(this + " execute state=" + state);
121     }
122 
123     try {
124       switch (state) {
125       case DISABLE_TABLE_PREPARE:
126         if (prepareDisable(env)) {
127           setNextState(DisableTableState.DISABLE_TABLE_PRE_OPERATION);
128         } else {
129           assert isFailed() : "disable should have an exception here";
130           return Flow.NO_MORE_STATE;
131         }
132         break;
133       case DISABLE_TABLE_PRE_OPERATION:
134         preDisable(env, state);
135         setNextState(DisableTableState.DISABLE_TABLE_SET_DISABLING_TABLE_STATE);
136         break;
137       case DISABLE_TABLE_SET_DISABLING_TABLE_STATE:
138         setTableStateToDisabling(env, tableName);
139         setNextState(DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE);
140         break;
141       case DISABLE_TABLE_MARK_REGIONS_OFFLINE:
142         if (markRegionsOffline(env, tableName, true) ==
143             MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
144           setNextState(DisableTableState.DISABLE_TABLE_SET_DISABLED_TABLE_STATE);
145         } else {
146           LOG.trace("Retrying later to disable the missing regions");
147         }
148         break;
149       case DISABLE_TABLE_SET_DISABLED_TABLE_STATE:
150         setTableStateToDisabled(env, tableName);
151         setNextState(DisableTableState.DISABLE_TABLE_POST_OPERATION);
152         break;
153       case DISABLE_TABLE_POST_OPERATION:
154         postDisable(env, state);
155         return Flow.NO_MORE_STATE;
156       default:
157         throw new UnsupportedOperationException("unhandled state=" + state);
158       }
159     } catch (HBaseException|IOException e) {
160       LOG.warn("Retriable error trying to disable table=" + tableName + " state=" + state, e);
161     }
162     return Flow.HAS_MORE_STATE;
163   }
164 
165   @Override
166   protected void rollbackState(final MasterProcedureEnv env, final DisableTableState state)
167       throws IOException {
168     if (state == DisableTableState.DISABLE_TABLE_PREPARE) {
169       undoTableStateChange(env);
170       ProcedurePrepareLatch.releaseLatch(syncLatch, this);
171       return;
172     }
173 
174     // The delete doesn't have a rollback. The execution will succeed, at some point.
175     throw new UnsupportedOperationException("unhandled state=" + state);
176   }
177 
178   @Override
179   protected DisableTableState getState(final int stateId) {
180     return DisableTableState.valueOf(stateId);
181   }
182 
183   @Override
184   protected int getStateId(final DisableTableState state) {
185     return state.getNumber();
186   }
187 
188   @Override
189   protected DisableTableState getInitialState() {
190     return DisableTableState.DISABLE_TABLE_PREPARE;
191   }
192 
193   @Override
194   protected void setNextState(final DisableTableState state) {
195     if (aborted.get()) {
196       setAbortFailure("disable-table", "abort requested");
197     } else {
198       super.setNextState(state);
199     }
200   }
201 
202   @Override
203   public boolean abort(final MasterProcedureEnv env) {
204     aborted.set(true);
205     return true;
206   }
207 
208   @Override
209   protected boolean acquireLock(final MasterProcedureEnv env) {
210     if (env.waitInitialized(this)) return false;
211     return env.getProcedureQueue().tryAcquireTableExclusiveLock(this, tableName);
212   }
213 
214   @Override
215   protected void releaseLock(final MasterProcedureEnv env) {
216     env.getProcedureQueue().releaseTableExclusiveLock(this, tableName);
217   }
218 
219   @Override
220   public void serializeStateData(final OutputStream stream) throws IOException {
221     super.serializeStateData(stream);
222 
223     MasterProcedureProtos.DisableTableStateData.Builder disableTableMsg =
224         MasterProcedureProtos.DisableTableStateData.newBuilder()
225             .setUserInfo(MasterProcedureUtil.toProtoUserInfo(user))
226             .setTableName(ProtobufUtil.toProtoTableName(tableName))
227             .setSkipTableStateCheck(skipTableStateCheck);
228 
229     disableTableMsg.build().writeDelimitedTo(stream);
230   }
231 
232   @Override
233   public void deserializeStateData(final InputStream stream) throws IOException {
234     super.deserializeStateData(stream);
235 
236     MasterProcedureProtos.DisableTableStateData disableTableMsg =
237         MasterProcedureProtos.DisableTableStateData.parseDelimitedFrom(stream);
238     user = MasterProcedureUtil.toUserInfo(disableTableMsg.getUserInfo());
239     tableName = ProtobufUtil.toTableName(disableTableMsg.getTableName());
240     skipTableStateCheck = disableTableMsg.getSkipTableStateCheck();
241   }
242 
243   @Override
244   public void toStringClassDetails(StringBuilder sb) {
245     sb.append(getClass().getSimpleName());
246     sb.append(" (table=");
247     sb.append(tableName);
248     sb.append(")");
249   }
250 
251   @Override
252   public TableName getTableName() {
253     return tableName;
254   }
255 
256   @Override
257   public TableOperationType getTableOperationType() {
258     return TableOperationType.DISABLE;
259   }
260 
261   /**
262    * Action before any real action of disabling table. Set the exception in the procedure instead
263    * of throwing it.  This approach is to deal with backward compatible with 1.0.
264    * @param env MasterProcedureEnv
265    * @throws HBaseException
266    * @throws IOException
267    */
268   private boolean prepareDisable(final MasterProcedureEnv env) throws HBaseException, IOException {
269     boolean canTableBeDisabled = true;
270     if (tableName.equals(TableName.META_TABLE_NAME)) {
271       setFailure("master-disable-table", new ConstraintException("Cannot disable catalog table"));
272       canTableBeDisabled = false;
273     } else if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) {
274       setFailure("master-disable-table", new TableNotFoundException(tableName));
275       canTableBeDisabled = false;
276     } else if (!skipTableStateCheck) {
277       // There could be multiple client requests trying to disable or enable
278       // the table at the same time. Ensure only the first request is honored
279       // After that, no other requests can be accepted until the table reaches
280       // DISABLED or ENABLED.
281       //
282       // Note: A quick state check should be enough for us to move forward. However, instead of
283       // calling TableStateManager.isTableState() to just check the state, we called
284       // TableStateManager.setTableStateIfInStates() to set the state to DISABLING from ENABLED.
285       // This is because we treat empty state as enabled from 0.92-clusters. See
286       // ZKTableStateManager.setTableStateIfInStates() that has a hack solution to work around
287       // this issue.
288       TableStateManager tsm =
289         env.getMasterServices().getAssignmentManager().getTableStateManager();
290       if (!tsm.setTableStateIfInStates(tableName, ZooKeeperProtos.Table.State.DISABLING,
291             ZooKeeperProtos.Table.State.DISABLING, ZooKeeperProtos.Table.State.ENABLED)) {
292         LOG.info("Table " + tableName + " isn't enabled; skipping disable");
293         setFailure("master-disable-table", new TableNotEnabledException(tableName));
294         canTableBeDisabled = false;
295       }
296     }
297 
298     // We are done the check. Future actions in this procedure could be done asynchronously.
299     ProcedurePrepareLatch.releaseLatch(syncLatch, this);
300 
301     return canTableBeDisabled;
302   }
303 
304   /**
305    * Rollback of table state change in prepareDisable()
306    * @param env MasterProcedureEnv
307    */
308   @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="REC_CATCH_EXCEPTION",
309       justification="Intended")
310   private void undoTableStateChange(final MasterProcedureEnv env) {
311     if (!skipTableStateCheck) {
312       try {
313         // If the state was changed, undo it.
314         if (env.getMasterServices().getAssignmentManager().getTableStateManager().isTableState(
315             tableName, ZooKeeperProtos.Table.State.DISABLING)) {
316           EnableTableProcedure.setTableStateToEnabled(env, tableName);
317         }
318       } catch (Exception e) {
319         // Ignore exception.
320         LOG.trace(e.getMessage());
321       }
322     }
323   }
324 
325   /**
326    * Action before disabling table.
327    * @param env MasterProcedureEnv
328    * @param state the procedure state
329    * @throws IOException
330    * @throws InterruptedException
331    */
332   protected void preDisable(final MasterProcedureEnv env, final DisableTableState state)
333       throws IOException, InterruptedException {
334     runCoprocessorAction(env, state);
335   }
336 
337   /**
338    * Mark table state to Disabling
339    * @param env MasterProcedureEnv
340    * @throws IOException
341    */
342   protected static void setTableStateToDisabling(
343       final MasterProcedureEnv env,
344       final TableName tableName) throws HBaseException, IOException {
345     // Set table disabling flag up in zk.
346     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
347       tableName,
348       ZooKeeperProtos.Table.State.DISABLING);
349   }
350 
351   /**
352    * Mark regions of the table offline with retries
353    * @param env MasterProcedureEnv
354    * @param tableName the target table
355    * @param retryRequired whether to retry if the first run failed
356    * @return whether the operation is fully completed or being interrupted.
357    * @throws IOException
358    */
359   protected static MarkRegionOfflineOpResult markRegionsOffline(
360       final MasterProcedureEnv env,
361       final TableName tableName,
362       final Boolean retryRequired) throws IOException {
363     // Dev consideration: add a config to control max number of retry. For now, it is hard coded.
364     int maxTry = (retryRequired ? 10 : 1);
365     MarkRegionOfflineOpResult operationResult =
366         MarkRegionOfflineOpResult.BULK_ASSIGN_REGIONS_FAILED;
367     do {
368       try {
369         operationResult = markRegionsOffline(env, tableName);
370         if (operationResult == MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
371           break;
372         }
373         maxTry--;
374       } catch (Exception e) {
375         LOG.warn("Received exception while marking regions online. tries left: " + maxTry, e);
376         maxTry--;
377         if (maxTry > 0) {
378           continue; // we still have some retry left, try again.
379         }
380         throw e;
381       }
382     } while (maxTry > 0);
383 
384     if (operationResult != MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
385       LOG.warn("Some or all regions of the Table '" + tableName + "' were still online");
386     }
387 
388     return operationResult;
389   }
390 
391   /**
392    * Mark regions of the table offline
393    * @param env MasterProcedureEnv
394    * @param tableName the target table
395    * @return whether the operation is fully completed or being interrupted.
396    * @throws IOException
397    */
398   private static MarkRegionOfflineOpResult markRegionsOffline(
399       final MasterProcedureEnv env,
400       final TableName tableName) throws IOException {
401     // Get list of online regions that are of this table.  Regions that are
402     // already closed will not be included in this list; i.e. the returned
403     // list is not ALL regions in a table, its all online regions according
404     // to the in-memory state on this master.
405     MarkRegionOfflineOpResult operationResult =
406         MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL;
407     final List<HRegionInfo> regions =
408         env.getMasterServices().getAssignmentManager().getRegionStates()
409             .getRegionsOfTable(tableName);
410     if (regions.size() > 0) {
411       LOG.info("Offlining " + regions.size() + " regions.");
412 
413       BulkDisabler bd = new BulkDisabler(env, tableName, regions);
414       try {
415         if (!bd.bulkAssign()) {
416           operationResult = MarkRegionOfflineOpResult.BULK_ASSIGN_REGIONS_FAILED;
417         }
418       } catch (InterruptedException e) {
419         LOG.warn("Disable was interrupted");
420         // Preserve the interrupt.
421         Thread.currentThread().interrupt();
422         operationResult = MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_INTERRUPTED;
423       }
424     }
425     return operationResult;
426   }
427 
428   /**
429    * Mark table state to Disabled
430    * @param env MasterProcedureEnv
431    * @throws IOException
432    */
433   protected static void setTableStateToDisabled(
434       final MasterProcedureEnv env,
435       final TableName tableName) throws HBaseException, IOException {
436     // Flip the table to disabled
437     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
438       tableName,
439       ZooKeeperProtos.Table.State.DISABLED);
440     LOG.info("Disabled table, " + tableName + ", is completed.");
441   }
442 
443   /**
444    * Action after disabling table.
445    * @param env MasterProcedureEnv
446    * @param state the procedure state
447    * @throws IOException
448    * @throws InterruptedException
449    */
450   protected void postDisable(final MasterProcedureEnv env, final DisableTableState state)
451       throws IOException, InterruptedException {
452     runCoprocessorAction(env, state);
453   }
454 
455   /**
456    * The procedure could be restarted from a different machine. If the variable is null, we need to
457    * retrieve it.
458    * @return traceEnabled
459    */
460   private Boolean isTraceEnabled() {
461     if (traceEnabled == null) {
462       traceEnabled = LOG.isTraceEnabled();
463     }
464     return traceEnabled;
465   }
466 
467   /**
468    * Coprocessor Action.
469    * @param env MasterProcedureEnv
470    * @param state the procedure state
471    * @throws IOException
472    * @throws InterruptedException
473    */
474   private void runCoprocessorAction(final MasterProcedureEnv env, final DisableTableState state)
475       throws IOException, InterruptedException {
476     final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
477     if (cpHost != null) {
478       user.doAs(new PrivilegedExceptionAction<Void>() {
479         @Override
480         public Void run() throws Exception {
481           switch (state) {
482           case DISABLE_TABLE_PRE_OPERATION:
483             cpHost.preDisableTableHandler(tableName);
484             break;
485           case DISABLE_TABLE_POST_OPERATION:
486             cpHost.postDisableTableHandler(tableName);
487             break;
488           default:
489             throw new UnsupportedOperationException(this + " unhandled state=" + state);
490           }
491           return null;
492         }
493       });
494     }
495   }
496 
497   /**
498    * Run bulk disable.
499    */
500   private static class BulkDisabler extends BulkAssigner {
501     private final AssignmentManager assignmentManager;
502     private final List<HRegionInfo> regions;
503     private final TableName tableName;
504     private final int waitingTimeForEvents;
505 
506     public BulkDisabler(final MasterProcedureEnv env, final TableName tableName,
507         final List<HRegionInfo> regions) {
508       super(env.getMasterServices());
509       this.assignmentManager = env.getMasterServices().getAssignmentManager();
510       this.tableName = tableName;
511       this.regions = regions;
512       this.waitingTimeForEvents =
513           env.getMasterServices().getConfiguration()
514               .getInt("hbase.master.event.waiting.time", 1000);
515     }
516 
517     @Override
518     protected void populatePool(ExecutorService pool) {
519       RegionStates regionStates = assignmentManager.getRegionStates();
520       for (final HRegionInfo region : regions) {
521         if (regionStates.isRegionInTransition(region)
522             && !regionStates.isRegionInState(region, RegionState.State.FAILED_CLOSE)) {
523           continue;
524         }
525         pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler", new Runnable() {
526           @Override
527           public void run() {
528             assignmentManager.unassign(region);
529           }
530         }));
531       }
532     }
533 
534     @Override
535     protected boolean waitUntilDone(long timeout) throws InterruptedException {
536       long startTime = EnvironmentEdgeManager.currentTime();
537       long remaining = timeout;
538       List<HRegionInfo> regions = null;
539       long lastLogTime = startTime;
540       while (!server.isStopped() && remaining > 0) {
541         Thread.sleep(waitingTimeForEvents);
542         regions = assignmentManager.getRegionStates().getRegionsOfTable(tableName);
543         long now = EnvironmentEdgeManager.currentTime();
544         // Don't log more than once every ten seconds. Its obnoxious. And only log table regions
545         // if we are waiting a while for them to go down...
546         if (LOG.isDebugEnabled() && ((now - lastLogTime) > 10000)) {
547           lastLogTime = now;
548           LOG.debug("Disable waiting until done; " + remaining + " ms remaining; " + regions);
549         }
550         if (regions.isEmpty()) break;
551         remaining = timeout - (now - startTime);
552       }
553       return regions != null && regions.isEmpty();
554     }
555   }
556 }