View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  package org.apache.hadoop.hbase.master.procedure;
20  
21  import java.io.IOException;
22  import java.io.InputStream;
23  import java.io.OutputStream;
24  import java.security.PrivilegedExceptionAction;
25  import java.util.HashMap;
26  import java.util.HashSet;
27  import java.util.List;
28  import java.util.Map;
29  import java.util.concurrent.atomic.AtomicBoolean;
30  
31  import org.apache.commons.logging.Log;
32  import org.apache.commons.logging.LogFactory;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.MetaTableAccessor;
35  import org.apache.hadoop.hbase.ServerName;
36  import org.apache.hadoop.hbase.TableName;
37  import org.apache.hadoop.hbase.TableNotDisabledException;
38  import org.apache.hadoop.hbase.TableNotFoundException;
39  import org.apache.hadoop.hbase.TableStateManager;
40  import org.apache.hadoop.hbase.classification.InterfaceAudience;
41  import org.apache.hadoop.hbase.executor.EventType;
42  import org.apache.hadoop.hbase.exceptions.HBaseException;
43  import org.apache.hadoop.hbase.master.AssignmentManager;
44  import org.apache.hadoop.hbase.master.BulkAssigner;
45  import org.apache.hadoop.hbase.master.GeneralBulkAssigner;
46  import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
47  import org.apache.hadoop.hbase.master.MasterServices;
48  import org.apache.hadoop.hbase.master.RegionStates;
49  import org.apache.hadoop.hbase.master.ServerManager;
50  import org.apache.hadoop.hbase.procedure2.StateMachineProcedure;
51  import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
52  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos;
53  import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.EnableTableState;
54  import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos;
55  import org.apache.hadoop.hbase.util.Pair;
56  import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
57  import org.apache.hadoop.security.UserGroupInformation;
58  
59  @InterfaceAudience.Private
60  public class EnableTableProcedure
61      extends StateMachineProcedure<MasterProcedureEnv, EnableTableState>
62      implements TableProcedureInterface {
63    private static final Log LOG = LogFactory.getLog(EnableTableProcedure.class);
64  
65    private final AtomicBoolean aborted = new AtomicBoolean(false);
66  
67    // This is for back compatible with 1.0 asynchronized operations.
68    private final ProcedurePrepareLatch syncLatch;
69  
70    private TableName tableName;
71    private boolean skipTableStateCheck;
72    private UserGroupInformation user;
73  
74    private Boolean traceEnabled = null;
75  
76    public EnableTableProcedure() {
77      syncLatch = null;
78    }
79  
80    /**
81     * Constructor
82     * @param env MasterProcedureEnv
83     * @param tableName the table to operate on
84     * @param skipTableStateCheck whether to check table state
85     */
86    public EnableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
87        final boolean skipTableStateCheck) {
88      this(env, tableName, skipTableStateCheck, null);
89    }
90  
91    /**
92     * Constructor
93     * @param env MasterProcedureEnv
94     * @throws IOException
95     * @param tableName the table to operate on
96     * @param skipTableStateCheck whether to check table state
97     */
98    public EnableTableProcedure(final MasterProcedureEnv env, final TableName tableName,
99        final boolean skipTableStateCheck, final ProcedurePrepareLatch syncLatch) {
100     this.tableName = tableName;
101     this.skipTableStateCheck = skipTableStateCheck;
102     this.user = env.getRequestUser().getUGI();
103     this.setOwner(this.user.getShortUserName());
104 
105     // Compatible with 1.0: We use latch to make sure that this procedure implementation is
106     // compatible with 1.0 asynchronized operations. We need to lock the table and check
107     // whether the Enable operation could be performed (table exists and offline; table state
108     // is DISABLED). Once it is done, we are good to release the latch and the client can
109     // start asynchronously wait for the operation.
110     //
111     // Note: the member syncLatch could be null if we are in failover or recovery scenario.
112     // This is ok for backward compatible, as 1.0 client would not able to peek at procedure.
113     this.syncLatch = syncLatch;
114   }
115 
116   @Override
117   protected Flow executeFromState(final MasterProcedureEnv env, final EnableTableState state) {
118     if (isTraceEnabled()) {
119       LOG.trace(this + " execute state=" + state);
120     }
121 
122     try {
123       switch (state) {
124       case ENABLE_TABLE_PREPARE:
125         if (prepareEnable(env)) {
126           setNextState(EnableTableState.ENABLE_TABLE_PRE_OPERATION);
127         } else {
128           assert isFailed() : "enable should have an exception here";
129           return Flow.NO_MORE_STATE;
130         }
131         break;
132       case ENABLE_TABLE_PRE_OPERATION:
133         preEnable(env, state);
134         setNextState(EnableTableState.ENABLE_TABLE_SET_ENABLING_TABLE_STATE);
135         break;
136       case ENABLE_TABLE_SET_ENABLING_TABLE_STATE:
137         setTableStateToEnabling(env, tableName);
138         setNextState(EnableTableState.ENABLE_TABLE_MARK_REGIONS_ONLINE);
139         break;
140       case ENABLE_TABLE_MARK_REGIONS_ONLINE:
141         markRegionsOnline(env, tableName, true);
142         setNextState(EnableTableState.ENABLE_TABLE_SET_ENABLED_TABLE_STATE);
143         break;
144       case ENABLE_TABLE_SET_ENABLED_TABLE_STATE:
145         setTableStateToEnabled(env, tableName);
146         setNextState(EnableTableState.ENABLE_TABLE_POST_OPERATION);
147         break;
148       case ENABLE_TABLE_POST_OPERATION:
149         postEnable(env, state);
150         return Flow.NO_MORE_STATE;
151       default:
152         throw new UnsupportedOperationException("unhandled state=" + state);
153       }
154     } catch (InterruptedException|HBaseException|IOException e) {
155       LOG.error("Error trying to enable table=" + tableName + " state=" + state, e);
156       setFailure("master-enable-table", e);
157     }
158     return Flow.HAS_MORE_STATE;
159   }
160 
161   @Override
162   protected void rollbackState(final MasterProcedureEnv env, final EnableTableState state)
163       throws IOException {
164     if (isTraceEnabled()) {
165       LOG.trace(this + " rollback state=" + state);
166     }
167     try {
168       switch (state) {
169       case ENABLE_TABLE_POST_OPERATION:
170         // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.preDisable())?
171         break;
172       case ENABLE_TABLE_SET_ENABLED_TABLE_STATE:
173         DisableTableProcedure.setTableStateToDisabling(env, tableName);
174         break;
175       case ENABLE_TABLE_MARK_REGIONS_ONLINE:
176         markRegionsOfflineDuringRecovery(env);
177         break;
178       case ENABLE_TABLE_SET_ENABLING_TABLE_STATE:
179         DisableTableProcedure.setTableStateToDisabled(env, tableName);
180         break;
181       case ENABLE_TABLE_PRE_OPERATION:
182         // TODO-MAYBE: call the coprocessor event to undo (eg. DisableTableProcedure.postDisable())?
183         break;
184       case ENABLE_TABLE_PREPARE:
185         // Nothing to undo for this state.
186         // We do need to count down the latch count so that we don't stuck.
187         ProcedurePrepareLatch.releaseLatch(syncLatch, this);
188         break;
189       default:
190         throw new UnsupportedOperationException("unhandled state=" + state);
191       }
192     } catch (HBaseException e) {
193       LOG.warn("Failed enable table rollback attempt step=" + state + " table=" + tableName, e);
194       throw new IOException(e);
195     } catch (IOException e) {
196       // This will be retried. Unless there is a bug in the code,
197       // this should be just a "temporary error" (e.g. network down)
198       LOG.warn("Failed enable table rollback attempt step=" + state + " table=" + tableName, e);
199       throw e;
200     }
201   }
202 
203   @Override
204   protected EnableTableState getState(final int stateId) {
205     return EnableTableState.valueOf(stateId);
206   }
207 
208   @Override
209   protected int getStateId(final EnableTableState state) {
210     return state.getNumber();
211   }
212 
213   @Override
214   protected EnableTableState getInitialState() {
215     return EnableTableState.ENABLE_TABLE_PREPARE;
216   }
217 
218   @Override
219   protected void setNextState(final EnableTableState state) {
220     if (aborted.get()) {
221       setAbortFailure("Enable-table", "abort requested");
222     } else {
223       super.setNextState(state);
224     }
225   }
226 
227   @Override
228   public boolean abort(final MasterProcedureEnv env) {
229     aborted.set(true);
230     return true;
231   }
232 
233   @Override
234   protected boolean acquireLock(final MasterProcedureEnv env) {
235     if (!env.isInitialized()) return false;
236     return env.getProcedureQueue().tryAcquireTableWrite(
237       tableName,
238       EventType.C_M_ENABLE_TABLE.toString());
239   }
240 
241   @Override
242   protected void releaseLock(final MasterProcedureEnv env) {
243     env.getProcedureQueue().releaseTableWrite(tableName);
244   }
245 
246   @Override
247   public void serializeStateData(final OutputStream stream) throws IOException {
248     super.serializeStateData(stream);
249 
250     MasterProcedureProtos.EnableTableStateData.Builder enableTableMsg =
251         MasterProcedureProtos.EnableTableStateData.newBuilder()
252             .setUserInfo(MasterProcedureUtil.toProtoUserInfo(user))
253             .setTableName(ProtobufUtil.toProtoTableName(tableName))
254             .setSkipTableStateCheck(skipTableStateCheck);
255 
256     enableTableMsg.build().writeDelimitedTo(stream);
257   }
258 
259   @Override
260   public void deserializeStateData(final InputStream stream) throws IOException {
261     super.deserializeStateData(stream);
262 
263     MasterProcedureProtos.EnableTableStateData enableTableMsg =
264         MasterProcedureProtos.EnableTableStateData.parseDelimitedFrom(stream);
265     user = MasterProcedureUtil.toUserInfo(enableTableMsg.getUserInfo());
266     tableName = ProtobufUtil.toTableName(enableTableMsg.getTableName());
267     skipTableStateCheck = enableTableMsg.getSkipTableStateCheck();
268   }
269 
270   @Override
271   public void toStringClassDetails(StringBuilder sb) {
272     sb.append(getClass().getSimpleName());
273     sb.append(" (table=");
274     sb.append(tableName);
275     sb.append(")");
276   }
277 
278   @Override
279   public TableName getTableName() {
280     return tableName;
281   }
282 
283   @Override
284   public TableOperationType getTableOperationType() {
285     return TableOperationType.ENABLE;
286   }
287 
288 
289   /**
290    * Action before any real action of enabling table. Set the exception in the procedure instead
291    * of throwing it.  This approach is to deal with backward compatible with 1.0.
292    * @param env MasterProcedureEnv
293    * @return whether the table passes the necessary checks
294    * @throws IOException
295    */
296   private boolean prepareEnable(final MasterProcedureEnv env) throws IOException {
297     boolean canTableBeEnabled = true;
298 
299     // Check whether table exists
300     if (!MetaTableAccessor.tableExists(env.getMasterServices().getConnection(), tableName)) {
301       setFailure("master-enable-table", new TableNotFoundException(tableName));
302       canTableBeEnabled = false;
303     } else if (!skipTableStateCheck) {
304       // There could be multiple client requests trying to disable or enable
305       // the table at the same time. Ensure only the first request is honored
306       // After that, no other requests can be accepted until the table reaches
307       // DISABLED or ENABLED.
308       //
309       // Note: in 1.0 release, we called TableStateManager.setTableStateIfInStates() to set
310       // the state to ENABLING from DISABLED. The implementation was done before table lock
311       // was implemented. With table lock, there is no need to set the state here (it will
312       // set the state later on). A quick state check should be enough for us to move forward.
313       TableStateManager tsm = env.getMasterServices().getAssignmentManager().getTableStateManager();
314       if (!tsm.isTableState(tableName, ZooKeeperProtos.Table.State.DISABLED)) {
315         LOG.info("Table " + tableName + " isn't disabled; skipping enable");
316         setFailure("master-enable-table", new TableNotDisabledException(this.tableName));
317         canTableBeEnabled = false;
318       }
319     }
320 
321     // We are done the check. Future actions in this procedure could be done asynchronously.
322     ProcedurePrepareLatch.releaseLatch(syncLatch, this);
323 
324     return canTableBeEnabled;
325   }
326 
327   /**
328    * Action before enabling table.
329    * @param env MasterProcedureEnv
330    * @param state the procedure state
331    * @throws IOException
332    * @throws InterruptedException
333    */
334   private void preEnable(final MasterProcedureEnv env, final EnableTableState state)
335       throws IOException, InterruptedException {
336     runCoprocessorAction(env, state);
337   }
338 
339   /**
340    * Mark table state to Enabling
341    * @param env MasterProcedureEnv
342    * @param tableName the target table
343    * @throws IOException
344    */
345   protected static void setTableStateToEnabling(
346       final MasterProcedureEnv env,
347       final TableName tableName) throws HBaseException, IOException {
348     // Set table disabling flag up in zk.
349     LOG.info("Attempting to enable the table " + tableName);
350     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
351       tableName,
352       ZooKeeperProtos.Table.State.ENABLING);
353   }
354 
355   /**
356    * Mark offline regions of the table online with retry
357    * @param env MasterProcedureEnv
358    * @param tableName the target table
359    * @param retryRequired whether to retry if the first run failed
360    * @throws IOException
361    */
362   protected static void markRegionsOnline(
363       final MasterProcedureEnv env,
364       final TableName tableName,
365       final Boolean retryRequired) throws IOException {
366     // This is best effort approach to make all regions of a table online.  If we fail to do
367     // that, it is ok that the table has some offline regions; user can fix it manually.
368 
369     // Dev consideration: add a config to control max number of retry. For now, it is hard coded.
370     int maxTry = (retryRequired ? 10 : 1);
371     boolean done = false;
372 
373     do {
374       try {
375         done = markRegionsOnline(env, tableName);
376         if (done) {
377           break;
378         }
379         maxTry--;
380       } catch (Exception e) {
381         LOG.warn("Received exception while marking regions online. tries left: " + maxTry, e);
382         maxTry--;
383         if (maxTry > 0) {
384           continue; // we still have some retry left, try again.
385         }
386         throw e;
387       }
388     } while (maxTry > 0);
389 
390     if (!done) {
391       LOG.warn("Some or all regions of the Table '" + tableName + "' were offline");
392     }
393   }
394 
395   /**
396    * Mark offline regions of the table online
397    * @param env MasterProcedureEnv
398    * @param tableName the target table
399    * @return whether the operation is fully completed or being interrupted.
400    * @throws IOException
401    */
402   private static boolean markRegionsOnline(final MasterProcedureEnv env, final TableName tableName)
403       throws IOException {
404     final AssignmentManager assignmentManager = env.getMasterServices().getAssignmentManager();
405     final MasterServices masterServices = env.getMasterServices();
406     final ServerManager serverManager = masterServices.getServerManager();
407     boolean done = false;
408     // Get the regions of this table. We're done when all listed
409     // tables are onlined.
410     List<Pair<HRegionInfo, ServerName>> tableRegionsAndLocations;
411 
412     if (TableName.META_TABLE_NAME.equals(tableName)) {
413       tableRegionsAndLocations =
414           new MetaTableLocator().getMetaRegionsAndLocations(masterServices.getZooKeeper());
415     } else {
416       tableRegionsAndLocations =
417           MetaTableAccessor.getTableRegionsAndLocations(
418             masterServices.getZooKeeper(), masterServices.getConnection(), tableName, true);
419     }
420 
421     int countOfRegionsInTable = tableRegionsAndLocations.size();
422     Map<HRegionInfo, ServerName> regionsToAssign =
423         regionsToAssignWithServerName(env, tableRegionsAndLocations);
424 
425     // need to potentially create some regions for the replicas
426     List<HRegionInfo> unrecordedReplicas =
427         AssignmentManager.replicaRegionsNotRecordedInMeta(new HashSet<HRegionInfo>(
428             regionsToAssign.keySet()), masterServices);
429     Map<ServerName, List<HRegionInfo>> srvToUnassignedRegs =
430         assignmentManager.getBalancer().roundRobinAssignment(unrecordedReplicas,
431           serverManager.getOnlineServersList());
432     if (srvToUnassignedRegs != null) {
433       for (Map.Entry<ServerName, List<HRegionInfo>> entry : srvToUnassignedRegs.entrySet()) {
434         for (HRegionInfo h : entry.getValue()) {
435           regionsToAssign.put(h, entry.getKey());
436         }
437       }
438     }
439 
440     int offlineRegionsCount = regionsToAssign.size();
441 
442     LOG.info("Table '" + tableName + "' has " + countOfRegionsInTable + " regions, of which "
443         + offlineRegionsCount + " are offline.");
444     if (offlineRegionsCount == 0) {
445       return true;
446     }
447 
448     List<ServerName> onlineServers = serverManager.createDestinationServersList();
449     Map<ServerName, List<HRegionInfo>> bulkPlan =
450         env.getMasterServices().getAssignmentManager().getBalancer()
451             .retainAssignment(regionsToAssign, onlineServers);
452     if (bulkPlan != null) {
453       LOG.info("Bulk assigning " + offlineRegionsCount + " region(s) across " + bulkPlan.size()
454           + " server(s), retainAssignment=true");
455 
456       BulkAssigner ba = new GeneralBulkAssigner(masterServices, bulkPlan, assignmentManager, true);
457       try {
458         if (ba.bulkAssign()) {
459           done = true;
460         }
461       } catch (InterruptedException e) {
462         LOG.warn("Enable operation was interrupted when enabling table '" + tableName + "'");
463         // Preserve the interrupt.
464         Thread.currentThread().interrupt();
465       }
466     } else {
467       LOG.info("Balancer was unable to find suitable servers for table " + tableName
468           + ", leaving unassigned");
469     }
470     return done;
471   }
472 
473   /**
474    * Mark regions of the table offline during recovery
475    * @param env MasterProcedureEnv
476    */
477   private void markRegionsOfflineDuringRecovery(final MasterProcedureEnv env) {
478     try {
479       // This is a best effort attempt. We will move on even it does not succeed. We will retry
480       // several times until we giving up.
481       DisableTableProcedure.markRegionsOffline(env, tableName, true);
482     } catch (Exception e) {
483       LOG.debug("Failed to offline all regions of table " + tableName + ". Ignoring", e);
484     }
485   }
486 
487   /**
488    * Mark table state to Enabled
489    * @param env MasterProcedureEnv
490    * @throws IOException
491    */
492   protected static void setTableStateToEnabled(
493       final MasterProcedureEnv env,
494       final TableName tableName) throws HBaseException, IOException {
495     // Flip the table to Enabled
496     env.getMasterServices().getAssignmentManager().getTableStateManager().setTableState(
497       tableName,
498       ZooKeeperProtos.Table.State.ENABLED);
499     LOG.info("Table '" + tableName + "' was successfully enabled.");
500   }
501 
502   /**
503    * Action after enabling table.
504    * @param env MasterProcedureEnv
505    * @param state the procedure state
506    * @throws IOException
507    * @throws InterruptedException
508    */
509   private void postEnable(final MasterProcedureEnv env, final EnableTableState state)
510       throws IOException, InterruptedException {
511     runCoprocessorAction(env, state);
512   }
513 
514   /**
515    * The procedure could be restarted from a different machine. If the variable is null, we need to
516    * retrieve it.
517    * @return traceEnabled
518    */
519   private Boolean isTraceEnabled() {
520     if (traceEnabled == null) {
521       traceEnabled = LOG.isTraceEnabled();
522     }
523     return traceEnabled;
524   }
525 
526   /**
527    * @param regionsInMeta
528    * @return List of regions neither in transition nor assigned.
529    * @throws IOException
530    */
531   private static Map<HRegionInfo, ServerName> regionsToAssignWithServerName(
532       final MasterProcedureEnv env,
533       final List<Pair<HRegionInfo, ServerName>> regionsInMeta) throws IOException {
534     Map<HRegionInfo, ServerName> regionsToAssign =
535         new HashMap<HRegionInfo, ServerName>(regionsInMeta.size());
536     RegionStates regionStates = env.getMasterServices().getAssignmentManager().getRegionStates();
537     for (Pair<HRegionInfo, ServerName> regionLocation : regionsInMeta) {
538       HRegionInfo hri = regionLocation.getFirst();
539       ServerName sn = regionLocation.getSecond();
540       if (regionStates.isRegionOffline(hri)) {
541         regionsToAssign.put(hri, sn);
542       } else {
543         if (LOG.isDebugEnabled()) {
544           LOG.debug("Skipping assign for the region " + hri + " during enable table "
545               + hri.getTable() + " because its already in tranition or assigned.");
546         }
547       }
548     }
549     return regionsToAssign;
550   }
551 
552   /**
553    * Coprocessor Action.
554    * @param env MasterProcedureEnv
555    * @param state the procedure state
556    * @throws IOException
557    * @throws InterruptedException
558    */
559   private void runCoprocessorAction(final MasterProcedureEnv env, final EnableTableState state)
560       throws IOException, InterruptedException {
561     final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
562     if (cpHost != null) {
563       user.doAs(new PrivilegedExceptionAction<Void>() {
564         @Override
565         public Void run() throws Exception {
566           switch (state) {
567           case ENABLE_TABLE_PRE_OPERATION:
568             cpHost.preEnableTableHandler(getTableName());
569             break;
570           case ENABLE_TABLE_POST_OPERATION:
571             cpHost.postEnableTableHandler(getTableName());
572             break;
573           default:
574             throw new UnsupportedOperationException(this + " unhandled state=" + state);
575           }
576           return null;
577         }
578       });
579     }
580   }
581 }