View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.security.PrivilegedExceptionAction;
25  import java.util.List;
26  import java.util.concurrent.ExecutorService;
27  import java.util.concurrent.atomic.AtomicBoolean;
28  
29  import org.apache.commons.logging.Log;
30  import org.apache.commons.logging.LogFactory;
31  import org.apache.hadoop.hbase.HRegionInfo;
32  import org.apache.hadoop.hbase.MetaTableAccessor;
33  import org.apache.hadoop.hbase.TableName;
34  import org.apache.hadoop.hbase.TableNotEnabledException;
35  import org.apache.hadoop.hbase.TableNotFoundException;
36  import org.apache.hadoop.hbase.TableStateManager;
37  import org.apache.hadoop.hbase.classification.InterfaceAudience;
38  import org.apache.hadoop.hbase.constraint.ConstraintException;
39  import org.apache.hadoop.hbase.executor.EventType;
40  import org.apache.hadoop.hbase.exceptions.HBaseException;
41  import org.apache.hadoop.hbase.master.AssignmentManager;
42  import org.apache.hadoop.hbase.master.BulkAssigner;
43  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
44  import org.apache.hadoop.hbase.master.RegionState;
45  import org.apache.hadoop.hbase.master.RegionStates;
46  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
47  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
48  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
49  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.DisableTableState;
50  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
51  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
52  import org.apache.hadoop.security.UserGroupInformation;
53  import org.apache.htrace.Trace;
54  
55  @InterfaceAudience.Private
56  public class DisableTableProcedure
57      extends StateMachineProcedure<MasterProcedureEnv, DisableTableState>
58      implements TableProcedureInterface {
59    private static final Log LOG = LogFactory.getLog(DisableTableProcedure.class);
60  
61    private final AtomicBoolean aborted = new AtomicBoolean(false);
62  
63    // This is for back compatible with 1.0 asynchronized operations.
64    private final ProcedurePrepareLatch syncLatch;
65  
66    private TableName tableName;
67    private boolean skipTableStateCheck;
68    private UserGroupInformation user;
69  
70    private Boolean traceEnabled = null;
71  
72    enum MarkRegionOfflineOpResult {
73      MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL,
74      BULK_ASSIGN_REGIONS_FAILED,
75      MARK_ALL_REGIONS_OFFLINE_INTERRUPTED,
76    }
77  
78    public DisableTableProcedure() {
79      syncLatch = null;
80    }
81  
82    /**
83     * Constructor
84     * @param env MasterProcedureEnv
85     * @param tableName the table to operate on
86     * @param skipTableStateCheck whether to check table state
87     */
88    public DisableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
89        final boolean skipTableStateCheck) {
90      this(env, tableName, skipTableStateCheck, null);
91    }
92  
93    /**
94     * Constructor
95     * @param env MasterProcedureEnv
96     * @param tableName the table to operate on
97     * @param skipTableStateCheck whether to check table state
98     */
99    public DisableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
100       final boolean skipTableStateCheck, final ProcedurePrepareLatch syncLatch) {
101     this.tableName = tableName;
102     this.skipTableStateCheck = skipTableStateCheck;
103     this.user = env.getRequestUser().getUGI();
104     this.setOwner(this.user.getShortUserName());
105 
106     // Compatible with 1.0: We use latch to make sure that this procedure implementation is
107     // compatible with 1.0 asynchronized operations. We need to lock the table and check
108     // whether the Disable operation could be performed (table exists and online; table state
109     // is ENABLED). Once it is done, we are good to release the latch and the client can
110     // start asynchronously wait for the operation.
111     //
112     // Note: the member syncLatch could be null if we are in failover or recovery scenario.
113     // This is ok for backward compatible, as 1.0 client would not able to peek at procedure.
114     this.syncLatch = syncLatch;
115   }
116 
117   @Override
118   protected Flow executeFromState(final MasterProcedureEnv env, final DisableTableState state) {
119     if (isTraceEnabled()) {
120       LOG.trace(this + " execute state=" + state);
121     }
122 
123     try {
124       switch (state) {
125       case DISABLE_TABLE_PREPARE:
126         if (prepareDisable(env)) {
127           setNextState(DisableTableState.DISABLE_TABLE_PRE_OPERATION);
128         } else {
129           assert isFailed() : "disable should have an exception here";
130           return Flow.NO_MORE_STATE;
131         }
132         break;
133       case DISABLE_TABLE_PRE_OPERATION:
134         preDisable(env, state);
135         setNextState(DisableTableState.DISABLE_TABLE_SET_DISABLING_TABLE_STATE);
136         break;
137       case DISABLE_TABLE_SET_DISABLING_TABLE_STATE:
138         setTableStateToDisabling(env, tableName);
139         setNextState(DisableTableState.DISABLE_TABLE_MARK_REGIONS_OFFLINE);
140         break;
141       case DISABLE_TABLE_MARK_REGIONS_OFFLINE:
142         if (markRegionsOffline(env, tableName, true) ==
143             MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
144           setNextState(DisableTableState.DISABLE_TABLE_SET_DISABLED_TABLE_STATE);
145         } else {
146           LOG.trace("Retrying later to disable the missing regions");
147         }
148         break;
149       case DISABLE_TABLE_SET_DISABLED_TABLE_STATE:
150         setTableStateToDisabled(env, tableName);
151         setNextState(DisableTableState.DISABLE_TABLE_POST_OPERATION);
152         break;
153       case DISABLE_TABLE_POST_OPERATION:
154         postDisable(env, state);
155         return Flow.NO_MORE_STATE;
156       default:
157         throw new UnsupportedOperationException("unhandled state=" + state);
158       }
159     } catch (InterruptedException|HBaseException|IOException e) {
160       LOG.warn("Retriable error trying to disable table=" + tableName + " state=" + state, e);
161     }
162     return Flow.HAS_MORE_STATE;
163   }
164 
165   @Override
166   protected void rollbackState(final MasterProcedureEnv env, final DisableTableState state)
167       throws IOException {
168     if (state == DisableTableState.DISABLE_TABLE_PREPARE) {
169       undoTableStateChange(env);
170       ProcedurePrepareLatch.releaseLatch(syncLatch, this);
171       return;
172     }
173 
174     // The delete doesn't have a rollback. The execution will succeed, at some point.
175     throw new UnsupportedOperationException("unhandled state=" + state);
176   }
177 
178   @Override
179   protected DisableTableState getState(final int stateId) {
180     return DisableTableState.valueOf(stateId);
181   }
182 
183   @Override
184   protected int getStateId(final DisableTableState state) {
185     return state.getNumber();
186   }
187 
188   @Override
189   protected DisableTableState getInitialState() {
190     return DisableTableState.DISABLE_TABLE_PREPARE;
191   }
192 
193   @Override
194   protected void setNextState(final DisableTableState state) {
195     if (aborted.get()) {
196       setAbortFailure("disable-table", "abort requested");
197     } else {
198       super.setNextState(state);
199     }
200   }
201 
202   @Override
203   public boolean abort(final MasterProcedureEnv env) {
204     aborted.set(true);
205     return true;
206   }
207 
208   @Override
209   protected boolean acquireLock(final MasterProcedureEnv env) {
210     if (!env.isInitialized()) return false;
211     return env.getProcedureQueue().tryAcquireTableWrite(
212       tableName,
213       EventType.C_M_DISABLE_TABLE.toString());
214   }
215 
216   @Override
217   protected void releaseLock(final MasterProcedureEnv env) {
218     env.getProcedureQueue().releaseTableWrite(tableName);
219   }
220 
221   @Override
222   public void serializeStateData(final OutputStream stream) throws IOException {
223     super.serializeStateData(stream);
224 
225     MasterProcedureProtos.DisableTableStateData.Builder disableTableMsg =
226         MasterProcedureProtos.DisableTableStateData.newBuilder()
227             .setUserInfo(MasterProcedureUtil.toProtoUserInfo(user))
228             .setTableName(ProtobufUtil.toProtoTableName(tableName))
229             .setSkipTableStateCheck(skipTableStateCheck);
230 
231     disableTableMsg.build().writeDelimitedTo(stream);
232   }
233 
234   @Override
235   public void deserializeStateData(final InputStream stream) throws IOException {
236     super.deserializeStateData(stream);
237 
238     MasterProcedureProtos.DisableTableStateData disableTableMsg =
239         MasterProcedureProtos.DisableTableStateData.parseDelimitedFrom(stream);
240     user = MasterProcedureUtil.toUserInfo(disableTableMsg.getUserInfo());
241     tableName = ProtobufUtil.toTableName(disableTableMsg.getTableName());
242     skipTableStateCheck = disableTableMsg.getSkipTableStateCheck();
243   }
244 
245   @Override
246   public void toStringClassDetails(StringBuilder sb) {
247     sb.append(getClass().getSimpleName());
248     sb.append(" (table=");
249     sb.append(tableName);
250     sb.append(")");
251   }
252 
253   @Override
254   public TableName getTableName() {
255     return tableName;
256   }
257 
258   @Override
259   public TableOperationType getTableOperationType() {
260     return TableOperationType.DISABLE;
261   }
262 
263   /**
264    * Action before any real action of disabling table. Set the exception in the procedure instead
265    * of throwing it.  This approach is to deal with backward compatible with 1.0.
266    * @param env MasterProcedureEnv
267    * @throws HBaseException
268    * @throws IOException
269    */
270   private boolean prepareDisable(final MasterProcedureEnv env) throws HBaseException, IOException {
271     boolean canTableBeDisabled = true;
272     if (tableName.equals(TableName.META_TABLE_NAME)) {
273       setFailure("master-disable-table", new ConstraintException("Cannot disable catalog table"));
274       canTableBeDisabled = false;
275     } else if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) {
276       setFailure("master-disable-table", new TableNotFoundException(tableName));
277       canTableBeDisabled = false;
278     } else if (!skipTableStateCheck) {
279       // There could be multiple client requests trying to disable or enable
280       // the table at the same time. Ensure only the first request is honored
281       // After that, no other requests can be accepted until the table reaches
282       // DISABLED or ENABLED.
283       //
284       // Note: A quick state check should be enough for us to move forward. However, instead of
285       // calling TableStateManager.isTableState() to just check the state, we called
286       // TableStateManager.setTableStateIfInStates() to set the state to DISABLING from ENABLED.
287       // This is because we treat empty state as enabled from 0.92-clusters. See
288       // ZKTableStateManager.setTableStateIfInStates() that has a hack solution to work around
289       // this issue.
290       TableStateManager tsm =
291         env.getMasterServices().getAssignmentManager().getTableStateManager();
292       if (!tsm.setTableStateIfInStates(tableName, ZooKeeperProtos.Table.State.DISABLING,
293             ZooKeeperProtos.Table.State.DISABLING, ZooKeeperProtos.Table.State.ENABLED)) {
294         LOG.info("Table " + tableName + " isn't enabled; skipping disable");
295         setFailure("master-disable-table", new TableNotEnabledException(tableName));
296         canTableBeDisabled = false;
297       }
298     }
299 
300     // We are done the check. Future actions in this procedure could be done asynchronously.
301     ProcedurePrepareLatch.releaseLatch(syncLatch, this);
302 
303     return canTableBeDisabled;
304   }
305 
306   /**
307    * Rollback of table state change in prepareDisable()
308    * @param env MasterProcedureEnv
309    */
310   private void undoTableStateChange(final MasterProcedureEnv env) {
311     if (!skipTableStateCheck) {
312       try {
313         // If the state was changed, undo it.
314         if (env.getMasterServices().getAssignmentManager().getTableStateManager().isTableState(
315             tableName, ZooKeeperProtos.Table.State.DISABLING)) {
316           EnableTableProcedure.setTableStateToEnabled(env, tableName);
317         }
318       } catch (Exception e) {
319         // Ignore exception.
320       }
321     }
322   }
323 
324   /**
325    * Action before disabling table.
326    * @param env MasterProcedureEnv
327    * @param state the procedure state
328    * @throws IOException
329    * @throws InterruptedException
330    */
331   protected void preDisable(final MasterProcedureEnv env, final DisableTableState state)
332       throws IOException, InterruptedException {
333     runCoprocessorAction(env, state);
334   }
335 
336   /**
337    * Mark table state to Disabling
338    * @param env MasterProcedureEnv
339    * @throws IOException
340    */
341   protected static void setTableStateToDisabling(
342       final MasterProcedureEnv env,
343       final TableName tableName) throws HBaseException, IOException {
344     // Set table disabling flag up in zk.
345     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
346       tableName,
347       ZooKeeperProtos.Table.State.DISABLING);
348   }
349 
350   /**
351    * Mark regions of the table offline with retries
352    * @param env MasterProcedureEnv
353    * @param tableName the target table
354    * @param retryRequired whether to retry if the first run failed
355    * @return whether the operation is fully completed or being interrupted.
356    * @throws IOException
357    */
358   protected static MarkRegionOfflineOpResult markRegionsOffline(
359       final MasterProcedureEnv env,
360       final TableName tableName,
361       final Boolean retryRequired) throws IOException {
362     // Dev consideration: add a config to control max number of retry. For now, it is hard coded.
363     int maxTry = (retryRequired ? 10 : 1);
364     MarkRegionOfflineOpResult operationResult =
365         MarkRegionOfflineOpResult.BULK_ASSIGN_REGIONS_FAILED;
366     do {
367       try {
368         operationResult = markRegionsOffline(env, tableName);
369         if (operationResult == MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
370           break;
371         }
372         maxTry--;
373       } catch (Exception e) {
374         LOG.warn("Received exception while marking regions online. tries left: " + maxTry, e);
375         maxTry--;
376         if (maxTry > 0) {
377           continue; // we still have some retry left, try again.
378         }
379         throw e;
380       }
381     } while (maxTry > 0);
382 
383     if (operationResult != MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL) {
384       LOG.warn("Some or all regions of the Table '" + tableName + "' were still online");
385     }
386 
387     return operationResult;
388   }
389 
390   /**
391    * Mark regions of the table offline
392    * @param env MasterProcedureEnv
393    * @param tableName the target table
394    * @return whether the operation is fully completed or being interrupted.
395    * @throws IOException
396    */
397   private static MarkRegionOfflineOpResult markRegionsOffline(
398       final MasterProcedureEnv env,
399       final TableName tableName) throws IOException {
400     // Get list of online regions that are of this table.  Regions that are
401     // already closed will not be included in this list; i.e. the returned
402     // list is not ALL regions in a table, its all online regions according
403     // to the in-memory state on this master.
404     MarkRegionOfflineOpResult operationResult =
405         MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_SUCCESSFUL;
406     final List<HRegionInfo> regions =
407         env.getMasterServices().getAssignmentManager().getRegionStates()
408             .getRegionsOfTable(tableName);
409     if (regions.size() > 0) {
410       LOG.info("Offlining " + regions.size() + " regions.");
411 
412       BulkDisabler bd = new BulkDisabler(env, tableName, regions);
413       try {
414         if (!bd.bulkAssign()) {
415           operationResult = MarkRegionOfflineOpResult.BULK_ASSIGN_REGIONS_FAILED;
416         }
417       } catch (InterruptedException e) {
418         LOG.warn("Disable was interrupted");
419         // Preserve the interrupt.
420         Thread.currentThread().interrupt();
421         operationResult = MarkRegionOfflineOpResult.MARK_ALL_REGIONS_OFFLINE_INTERRUPTED;
422       }
423     }
424     return operationResult;
425   }
426 
427   /**
428    * Mark table state to Disabled
429    * @param env MasterProcedureEnv
430    * @throws IOException
431    */
432   protected static void setTableStateToDisabled(
433       final MasterProcedureEnv env,
434       final TableName tableName) throws HBaseException, IOException {
435     // Flip the table to disabled
436     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
437       tableName,
438       ZooKeeperProtos.Table.State.DISABLED);
439     LOG.info("Disabled table, " + tableName + ", is completed.");
440   }
441 
442   /**
443    * Action after disabling table.
444    * @param env MasterProcedureEnv
445    * @param state the procedure state
446    * @throws IOException
447    * @throws InterruptedException
448    */
449   protected void postDisable(final MasterProcedureEnv env, final DisableTableState state)
450       throws IOException, InterruptedException {
451     runCoprocessorAction(env, state);
452   }
453 
454   /**
455    * The procedure could be restarted from a different machine. If the variable is null, we need to
456    * retrieve it.
457    * @return traceEnabled
458    */
459   private Boolean isTraceEnabled() {
460     if (traceEnabled == null) {
461       traceEnabled = LOG.isTraceEnabled();
462     }
463     return traceEnabled;
464   }
465 
466   /**
467    * Coprocessor Action.
468    * @param env MasterProcedureEnv
469    * @param state the procedure state
470    * @throws IOException
471    * @throws InterruptedException
472    */
473   private void runCoprocessorAction(final MasterProcedureEnv env, final DisableTableState state)
474       throws IOException, InterruptedException {
475     final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
476     if (cpHost != null) {
477       user.doAs(new PrivilegedExceptionAction<Void>() {
478         @Override
479         public Void run() throws Exception {
480           switch (state) {
481           case DISABLE_TABLE_PRE_OPERATION:
482             cpHost.preDisableTableHandler(tableName);
483             break;
484           case DISABLE_TABLE_POST_OPERATION:
485             cpHost.postDisableTableHandler(tableName);
486             break;
487           default:
488             throw new UnsupportedOperationException(this + " unhandled state=" + state);
489           }
490           return null;
491         }
492       });
493     }
494   }
495 
496   /**
497    * Run bulk disable.
498    */
499   private static class BulkDisabler extends BulkAssigner {
500     private final AssignmentManager assignmentManager;
501     private final List<HRegionInfo> regions;
502     private final TableName tableName;
503     private final int waitingTimeForEvents;
504 
505     public BulkDisabler(final MasterProcedureEnv env, final TableName tableName,
506         final List<HRegionInfo> regions) {
507       super(env.getMasterServices());
508       this.assignmentManager = env.getMasterServices().getAssignmentManager();
509       this.tableName = tableName;
510       this.regions = regions;
511       this.waitingTimeForEvents =
512           env.getMasterServices().getConfiguration()
513               .getInt("hbase.master.event.waiting.time", 1000);
514     }
515 
516     @Override
517     protected void populatePool(ExecutorService pool) {
518       RegionStates regionStates = assignmentManager.getRegionStates();
519       for (final HRegionInfo region : regions) {
520         if (regionStates.isRegionInTransition(region)
521             && !regionStates.isRegionInState(region, RegionState.State.FAILED_CLOSE)) {
522           continue;
523         }
524         pool.execute(Trace.wrap("DisableTableHandler.BulkDisabler", new Runnable() {
525           @Override
526           public void run() {
527             assignmentManager.unassign(region);
528           }
529         }));
530       }
531     }
532 
533     @Override
534     protected boolean waitUntilDone(long timeout) throws InterruptedException {
535       long startTime = EnvironmentEdgeManager.currentTime();
536       long remaining = timeout;
537       List<HRegionInfo> regions = null;
538       long lastLogTime = startTime;
539       while (!server.isStopped() && remaining > 0) {
540         Thread.sleep(waitingTimeForEvents);
541         regions = assignmentManager.getRegionStates().getRegionsOfTable(tableName);
542         long now = EnvironmentEdgeManager.currentTime();
543         // Don't log more than once every ten seconds. Its obnoxious. And only log table regions
544         // if we are waiting a while for them to go down...
545         if (LOG.isDebugEnabled() && ((now - lastLogTime) > 10000)) {
546           lastLogTime = now;
547           LOG.debug("Disable waiting until done; " + remaining + " ms remaining; " + regions);
548         }
549         if (regions.isEmpty()) break;
550         remaining = timeout - (now - startTime);
551       }
552       return regions != null && regions.isEmpty();
553     }
554   }
555 }