001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.procedure;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.List;
023import java.util.function.Supplier;
024import org.apache.hadoop.fs.Path;
025import org.apache.hadoop.hbase.DoNotRetryIOException;
026import org.apache.hadoop.hbase.MetaTableAccessor;
027import org.apache.hadoop.hbase.NamespaceDescriptor;
028import org.apache.hadoop.hbase.TableExistsException;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.RegionInfo;
031import org.apache.hadoop.hbase.client.RegionReplicaUtil;
032import org.apache.hadoop.hbase.client.TableDescriptor;
033import org.apache.hadoop.hbase.client.TableState;
034import org.apache.hadoop.hbase.fs.ErasureCodingUtils;
035import org.apache.hadoop.hbase.master.MasterCoprocessorHost;
036import org.apache.hadoop.hbase.master.MasterFileSystem;
037import org.apache.hadoop.hbase.procedure2.ProcedureStateSerializer;
038import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
039import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerValidationUtils;
040import org.apache.hadoop.hbase.rsgroup.RSGroupInfo;
041import org.apache.hadoop.hbase.util.CommonFSUtils;
042import org.apache.hadoop.hbase.util.FSTableDescriptors;
043import org.apache.hadoop.hbase.util.ModifyRegionUtils;
044import org.apache.yetus.audience.InterfaceAudience;
045import org.slf4j.Logger;
046import org.slf4j.LoggerFactory;
047
048import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
049
050import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
051import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
052import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos;
053import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProcedureProtos.CreateTableState;
054
055@InterfaceAudience.Private
056public class CreateTableProcedure extends AbstractStateMachineTableProcedure<CreateTableState> {
057  private static final Logger LOG = LoggerFactory.getLogger(CreateTableProcedure.class);
058
059  private static final int MAX_REGION_REPLICATION = 0x10000;
060
061  private TableDescriptor tableDescriptor;
062  private List<RegionInfo> newRegions;
063
064  public CreateTableProcedure() {
065    // Required by the Procedure framework to create the procedure on replay
066    super();
067  }
068
069  public CreateTableProcedure(final MasterProcedureEnv env, final TableDescriptor tableDescriptor,
070    final RegionInfo[] newRegions) {
071    this(env, tableDescriptor, newRegions, null);
072  }
073
074  public CreateTableProcedure(final MasterProcedureEnv env, final TableDescriptor tableDescriptor,
075    final RegionInfo[] newRegions, final ProcedurePrepareLatch syncLatch) {
076    super(env, syncLatch);
077    this.tableDescriptor = tableDescriptor;
078    this.newRegions = newRegions != null ? Lists.newArrayList(newRegions) : null;
079  }
080
081  @Override
082  protected Flow executeFromState(final MasterProcedureEnv env, final CreateTableState state)
083    throws InterruptedException {
084    LOG.info("{} execute state={}", this, state);
085    try {
086      switch (state) {
087        case CREATE_TABLE_PRE_OPERATION:
088          // Verify if we can create the table
089          boolean success = prepareCreate(env);
090          releaseSyncLatch();
091
092          if (!success) {
093            assert isFailed() : "the delete should have an exception here";
094            return Flow.NO_MORE_STATE;
095          }
096
097          preCreate(env);
098          setNextState(CreateTableState.CREATE_TABLE_WRITE_FS_LAYOUT);
099          break;
100        case CREATE_TABLE_WRITE_FS_LAYOUT:
101          DeleteTableProcedure.deleteFromFs(env, getTableName(), newRegions, true);
102          newRegions = createFsLayout(env, tableDescriptor, newRegions);
103          env.getMasterServices().getTableDescriptors().update(tableDescriptor, true);
104          if (tableDescriptor.getErasureCodingPolicy() != null) {
105            setNextState(CreateTableState.CREATE_TABLE_SET_ERASURE_CODING_POLICY);
106          } else {
107            setNextState(CreateTableState.CREATE_TABLE_ADD_TO_META);
108          }
109          break;
110        case CREATE_TABLE_SET_ERASURE_CODING_POLICY:
111          ErasureCodingUtils.setPolicy(env.getMasterFileSystem().getFileSystem(),
112            env.getMasterFileSystem().getRootDir(), getTableName(),
113            tableDescriptor.getErasureCodingPolicy());
114          setNextState(CreateTableState.CREATE_TABLE_ADD_TO_META);
115          break;
116        case CREATE_TABLE_ADD_TO_META:
117          newRegions = addTableToMeta(env, tableDescriptor, newRegions);
118          setNextState(CreateTableState.CREATE_TABLE_ASSIGN_REGIONS);
119          break;
120        case CREATE_TABLE_ASSIGN_REGIONS:
121          setEnablingState(env, getTableName());
122          addChildProcedure(
123            env.getAssignmentManager().createRoundRobinAssignProcedures(newRegions));
124          setNextState(CreateTableState.CREATE_TABLE_UPDATE_DESC_CACHE);
125          break;
126        case CREATE_TABLE_UPDATE_DESC_CACHE:
127          // XXX: this stage should be named as set table enabled, as now we will cache the
128          // descriptor after writing fs layout.
129          setEnabledState(env, getTableName());
130          setNextState(CreateTableState.CREATE_TABLE_POST_OPERATION);
131          break;
132        case CREATE_TABLE_POST_OPERATION:
133          postCreate(env);
134          return Flow.NO_MORE_STATE;
135        default:
136          throw new UnsupportedOperationException("unhandled state=" + state);
137      }
138    } catch (IOException e) {
139      if (isRollbackSupported(state)) {
140        setFailure("master-create-table", e);
141      } else {
142        LOG.warn("Retriable error trying to create table=" + getTableName() + " state=" + state, e);
143      }
144    }
145    return Flow.HAS_MORE_STATE;
146  }
147
148  @Override
149  protected void rollbackState(final MasterProcedureEnv env, final CreateTableState state)
150    throws IOException {
151    if (state == CreateTableState.CREATE_TABLE_PRE_OPERATION) {
152      // nothing to rollback, pre-create is just table-state checks.
153      // We can fail if the table does exist or the descriptor is malformed.
154      // TODO: coprocessor rollback semantic is still undefined.
155      if (
156        hasException()
157          /* avoid NPE */ && getException().getCause().getClass() != TableExistsException.class
158      ) {
159        DeleteTableProcedure.deleteTableStates(env, getTableName());
160
161        final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
162        if (cpHost != null) {
163          cpHost.postDeleteTable(getTableName());
164        }
165      }
166
167      releaseSyncLatch();
168      return;
169    }
170
171    // The procedure doesn't have a rollback. The execution will succeed, at some point.
172    throw new UnsupportedOperationException("unhandled state=" + state);
173  }
174
175  @Override
176  protected boolean isRollbackSupported(final CreateTableState state) {
177    switch (state) {
178      case CREATE_TABLE_PRE_OPERATION:
179        return true;
180      default:
181        return false;
182    }
183  }
184
185  @Override
186  protected CreateTableState getState(final int stateId) {
187    return CreateTableState.forNumber(stateId);
188  }
189
190  @Override
191  protected int getStateId(final CreateTableState state) {
192    return state.getNumber();
193  }
194
195  @Override
196  protected CreateTableState getInitialState() {
197    return CreateTableState.CREATE_TABLE_PRE_OPERATION;
198  }
199
200  @Override
201  public TableName getTableName() {
202    return tableDescriptor.getTableName();
203  }
204
205  @Override
206  public TableOperationType getTableOperationType() {
207    return TableOperationType.CREATE;
208  }
209
210  @Override
211  protected void serializeStateData(ProcedureStateSerializer serializer) throws IOException {
212    super.serializeStateData(serializer);
213
214    MasterProcedureProtos.CreateTableStateData.Builder state =
215      MasterProcedureProtos.CreateTableStateData.newBuilder()
216        .setUserInfo(MasterProcedureUtil.toProtoUserInfo(getUser()))
217        .setTableSchema(ProtobufUtil.toTableSchema(tableDescriptor));
218    if (newRegions != null) {
219      for (RegionInfo hri : newRegions) {
220        state.addRegionInfo(ProtobufUtil.toRegionInfo(hri));
221      }
222    }
223    serializer.serialize(state.build());
224  }
225
226  @Override
227  protected void deserializeStateData(ProcedureStateSerializer serializer) throws IOException {
228    super.deserializeStateData(serializer);
229
230    MasterProcedureProtos.CreateTableStateData state =
231      serializer.deserialize(MasterProcedureProtos.CreateTableStateData.class);
232    setUser(MasterProcedureUtil.toUserInfo(state.getUserInfo()));
233    tableDescriptor = ProtobufUtil.toTableDescriptor(state.getTableSchema());
234    if (state.getRegionInfoCount() == 0) {
235      newRegions = null;
236    } else {
237      newRegions = new ArrayList<>(state.getRegionInfoCount());
238      for (HBaseProtos.RegionInfo hri : state.getRegionInfoList()) {
239        newRegions.add(ProtobufUtil.toRegionInfo(hri));
240      }
241    }
242  }
243
244  @Override
245  protected boolean waitInitialized(MasterProcedureEnv env) {
246    if (getTableName().isSystemTable()) {
247      // Creating system table is part of the initialization, so only wait for meta loaded instead
248      // of waiting for master fully initialized.
249      return env.getAssignmentManager().waitMetaLoaded(this);
250    }
251    return super.waitInitialized(env);
252  }
253
254  private boolean prepareCreate(final MasterProcedureEnv env) throws IOException {
255    final TableName tableName = getTableName();
256    if (env.getMasterServices().getTableDescriptors().exists(tableName)) {
257      setFailure("master-create-table", new TableExistsException(getTableName()));
258      return false;
259    }
260
261    // check that we have at least 1 CF
262    if (tableDescriptor.getColumnFamilyCount() == 0) {
263      setFailure("master-create-table", new DoNotRetryIOException(
264        "Table " + getTableName().toString() + " should have at least one column family."));
265      return false;
266    }
267
268    int regionReplicationCount = tableDescriptor.getRegionReplication();
269    if (regionReplicationCount > MAX_REGION_REPLICATION) {
270      setFailure("master-create-table", new IllegalArgumentException(
271        "Region Replication cannot exceed " + MAX_REGION_REPLICATION + "."));
272      return false;
273    }
274
275    if (!tableName.isSystemTable()) {
276      // do not check rs group for system tables as we may block the bootstrap.
277      Supplier<String> forWhom = () -> "table " + tableName;
278      RSGroupInfo rsGroupInfo = MasterProcedureUtil.checkGroupExists(
279        env.getMasterServices().getRSGroupInfoManager()::getRSGroup,
280        tableDescriptor.getRegionServerGroup(), forWhom);
281      if (rsGroupInfo == null) {
282        // we do not set rs group info on table, check if we have one on namespace
283        String namespace = tableName.getNamespaceAsString();
284        NamespaceDescriptor nd = env.getMasterServices().getClusterSchema().getNamespace(namespace);
285        forWhom = () -> "table " + tableName + "(inherit from namespace)";
286        rsGroupInfo = MasterProcedureUtil.checkGroupExists(
287          env.getMasterServices().getRSGroupInfoManager()::getRSGroup,
288          MasterProcedureUtil.getNamespaceGroup(nd), forWhom);
289      }
290      MasterProcedureUtil.checkGroupNotEmpty(rsGroupInfo, forWhom);
291    }
292
293    // check for store file tracker configurations
294    StoreFileTrackerValidationUtils.checkForCreateTable(env.getMasterConfiguration(),
295      tableDescriptor);
296
297    return true;
298  }
299
300  private void preCreate(final MasterProcedureEnv env) throws IOException, InterruptedException {
301    if (!getTableName().isSystemTable()) {
302      ProcedureSyncWait.getMasterQuotaManager(env).checkNamespaceTableAndRegionQuota(getTableName(),
303        (newRegions != null ? newRegions.size() : 0));
304    }
305
306    tableDescriptor = StoreFileTrackerFactory.updateWithTrackerConfigs(env.getMasterConfiguration(),
307      tableDescriptor);
308
309    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
310    if (cpHost != null) {
311      final RegionInfo[] regions =
312        newRegions == null ? null : newRegions.toArray(new RegionInfo[newRegions.size()]);
313      cpHost.preCreateTableAction(tableDescriptor, regions, getUser());
314    }
315  }
316
317  private void postCreate(final MasterProcedureEnv env) throws IOException, InterruptedException {
318    final MasterCoprocessorHost cpHost = env.getMasterCoprocessorHost();
319    if (cpHost != null) {
320      final RegionInfo[] regions =
321        (newRegions == null) ? null : newRegions.toArray(new RegionInfo[newRegions.size()]);
322      cpHost.postCompletedCreateTableAction(tableDescriptor, regions, getUser());
323    }
324  }
325
326  protected interface CreateHdfsRegions {
327    List<RegionInfo> createHdfsRegions(final MasterProcedureEnv env, final Path tableRootDir,
328      final TableName tableName, final List<RegionInfo> newRegions) throws IOException;
329  }
330
331  protected static List<RegionInfo> createFsLayout(final MasterProcedureEnv env,
332    final TableDescriptor tableDescriptor, final List<RegionInfo> newRegions) throws IOException {
333    return createFsLayout(env, tableDescriptor, newRegions, new CreateHdfsRegions() {
334      @Override
335      public List<RegionInfo> createHdfsRegions(final MasterProcedureEnv env,
336        final Path tableRootDir, final TableName tableName, final List<RegionInfo> newRegions)
337        throws IOException {
338        RegionInfo[] regions =
339          newRegions != null ? newRegions.toArray(new RegionInfo[newRegions.size()]) : null;
340        return ModifyRegionUtils.createRegions(env.getMasterConfiguration(), tableRootDir,
341          tableDescriptor, regions, null);
342      }
343    });
344  }
345
346  protected static List<RegionInfo> createFsLayout(final MasterProcedureEnv env,
347    final TableDescriptor tableDescriptor, List<RegionInfo> newRegions,
348    final CreateHdfsRegions hdfsRegionHandler) throws IOException {
349    final MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem();
350
351    // 1. Create Table Descriptor
352    // using a copy of descriptor, table will be created enabling first
353    final Path tableDir =
354      CommonFSUtils.getTableDir(mfs.getRootDir(), tableDescriptor.getTableName());
355    ((FSTableDescriptors) (env.getMasterServices().getTableDescriptors()))
356      .createTableDescriptorForTableDirectory(tableDir, tableDescriptor, false);
357
358    // 2. Create Regions
359    newRegions = hdfsRegionHandler.createHdfsRegions(env, mfs.getRootDir(),
360      tableDescriptor.getTableName(), newRegions);
361
362    return newRegions;
363  }
364
365  protected static List<RegionInfo> addTableToMeta(final MasterProcedureEnv env,
366    final TableDescriptor tableDescriptor, final List<RegionInfo> regions) throws IOException {
367    assert (regions != null && regions.size() > 0) : "expected at least 1 region, got " + regions;
368
369    ProcedureSyncWait.waitMetaRegions(env);
370
371    // Add replicas if needed
372    // we need to create regions with replicaIds starting from 1
373    List<RegionInfo> newRegions =
374      RegionReplicaUtil.addReplicas(regions, 1, tableDescriptor.getRegionReplication());
375
376    // Add regions to META
377    addRegionsToMeta(env, tableDescriptor, newRegions);
378
379    return newRegions;
380  }
381
382  protected static void setEnablingState(final MasterProcedureEnv env, final TableName tableName)
383    throws IOException {
384    // Mark the table as Enabling
385    env.getMasterServices().getTableStateManager().setTableState(tableName,
386      TableState.State.ENABLING);
387  }
388
389  protected static void setEnabledState(final MasterProcedureEnv env, final TableName tableName)
390    throws IOException {
391    // Enable table
392    env.getMasterServices().getTableStateManager().setTableState(tableName,
393      TableState.State.ENABLED);
394  }
395
396  /**
397   * Add the specified set of regions to the hbase:meta table.
398   */
399  private static void addRegionsToMeta(final MasterProcedureEnv env,
400    final TableDescriptor tableDescriptor, final List<RegionInfo> regionInfos) throws IOException {
401    MetaTableAccessor.addRegionsToMeta(env.getMasterServices().getConnection(), regionInfos,
402      tableDescriptor.getRegionReplication());
403  }
404
405  @Override
406  protected boolean shouldWaitClientAck(MasterProcedureEnv env) {
407    // system tables are created on bootstrap internally by the system
408    // the client does not know about this procedures.
409    return !getTableName().isSystemTable();
410  }
411
412  RegionInfo getFirstRegionInfo() {
413    if (newRegions == null || newRegions.isEmpty()) {
414      return null;
415    }
416    return newRegions.get(0);
417  }
418}