001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Collection;
024import java.util.Collections;
025import java.util.List;
026import java.util.concurrent.CompletableFuture;
027import java.util.stream.Collectors;
028import org.apache.hadoop.hbase.CatalogFamilyFormat;
029import org.apache.hadoop.hbase.Cell;
030import org.apache.hadoop.hbase.Cell.Type;
031import org.apache.hadoop.hbase.CellBuilderFactory;
032import org.apache.hadoop.hbase.CellBuilderType;
033import org.apache.hadoop.hbase.CellUtil;
034import org.apache.hadoop.hbase.ClientMetaTableAccessor;
035import org.apache.hadoop.hbase.DoNotRetryIOException;
036import org.apache.hadoop.hbase.HConstants;
037import org.apache.hadoop.hbase.HRegionLocation;
038import org.apache.hadoop.hbase.MetaTableAccessor;
039import org.apache.hadoop.hbase.RegionLocations;
040import org.apache.hadoop.hbase.ServerName;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.client.AsyncTable;
043import org.apache.hadoop.hbase.client.Delete;
044import org.apache.hadoop.hbase.client.Get;
045import org.apache.hadoop.hbase.client.Mutation;
046import org.apache.hadoop.hbase.client.Put;
047import org.apache.hadoop.hbase.client.RegionInfo;
048import org.apache.hadoop.hbase.client.RegionInfoBuilder;
049import org.apache.hadoop.hbase.client.RegionReplicaUtil;
050import org.apache.hadoop.hbase.client.Result;
051import org.apache.hadoop.hbase.client.ResultScanner;
052import org.apache.hadoop.hbase.client.Scan;
053import org.apache.hadoop.hbase.client.Table;
054import org.apache.hadoop.hbase.client.TableDescriptor;
055import org.apache.hadoop.hbase.master.MasterFileSystem;
056import org.apache.hadoop.hbase.master.MasterServices;
057import org.apache.hadoop.hbase.master.RegionState;
058import org.apache.hadoop.hbase.master.RegionState.State;
059import org.apache.hadoop.hbase.master.region.MasterRegion;
060import org.apache.hadoop.hbase.procedure2.Procedure;
061import org.apache.hadoop.hbase.procedure2.util.StringUtils;
062import org.apache.hadoop.hbase.replication.ReplicationBarrierFamilyFormat;
063import org.apache.hadoop.hbase.util.Bytes;
064import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
065import org.apache.hadoop.hbase.util.FutureUtils;
066import org.apache.hadoop.hbase.wal.WALSplitUtil;
067import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
068import org.apache.yetus.audience.InterfaceAudience;
069import org.apache.zookeeper.KeeperException;
070import org.slf4j.Logger;
071import org.slf4j.LoggerFactory;
072
073import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
074
075import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
076import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos;
077import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MultiRowMutationService;
078import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest;
079import org.apache.hadoop.hbase.shaded.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse;
080
081/**
082 * Store Region State to hbase:meta table.
083 */
084@InterfaceAudience.Private
085public class RegionStateStore {
086  private static final Logger LOG = LoggerFactory.getLogger(RegionStateStore.class);
087  private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META");
088
089  /** The delimiter for meta columns for replicaIds > 0 */
090  protected static final char META_REPLICA_ID_DELIMITER = '_';
091
092  private final MasterServices master;
093
094  private final MasterRegion masterRegion;
095
096  public RegionStateStore(MasterServices master, MasterRegion masterRegion) {
097    this.master = master;
098    this.masterRegion = masterRegion;
099  }
100
101  @FunctionalInterface
102  public interface RegionStateVisitor {
103    void visitRegionState(Result result, RegionInfo regionInfo, State state,
104      ServerName regionLocation, ServerName lastHost, long openSeqNum);
105  }
106
107  public void visitMeta(final RegionStateVisitor visitor) throws IOException {
108    MetaTableAccessor.fullScanRegions(master.getConnection(),
109      new ClientMetaTableAccessor.Visitor() {
110        final boolean isDebugEnabled = LOG.isDebugEnabled();
111
112        @Override
113        public boolean visit(final Result r) throws IOException {
114          if (r != null && !r.isEmpty()) {
115            long st = 0;
116            if (LOG.isTraceEnabled()) {
117              st = EnvironmentEdgeManager.currentTime();
118            }
119            visitMetaEntry(visitor, r);
120            if (LOG.isTraceEnabled()) {
121              long et = EnvironmentEdgeManager.currentTime();
122              LOG.trace("[T] LOAD META PERF " + StringUtils.humanTimeDiff(et - st));
123            }
124          } else if (isDebugEnabled) {
125            LOG.debug("NULL result from meta - ignoring but this is strange.");
126          }
127          return true;
128        }
129      });
130  }
131
132  /**
133   * Queries META table for the passed region encoded name, delegating action upon results to the
134   * {@code RegionStateVisitor} passed as second parameter.
135   * @param regionEncodedName encoded name for the Region we want to query META for.
136   * @param visitor           The {@code RegionStateVisitor} instance to react over the query
137   *                          results.
138   * @throws IOException If some error occurs while querying META or parsing results.
139   */
140  public void visitMetaForRegion(final String regionEncodedName, final RegionStateVisitor visitor)
141    throws IOException {
142    Result result =
143      MetaTableAccessor.scanByRegionEncodedName(master.getConnection(), regionEncodedName);
144    if (result != null) {
145      visitMetaEntry(visitor, result);
146    }
147  }
148
149  public static void visitMetaEntry(final RegionStateVisitor visitor, final Result result)
150    throws IOException {
151    final RegionLocations rl = CatalogFamilyFormat.getRegionLocations(result);
152    if (rl == null) return;
153
154    final HRegionLocation[] locations = rl.getRegionLocations();
155    if (locations == null) return;
156
157    for (int i = 0; i < locations.length; ++i) {
158      final HRegionLocation hrl = locations[i];
159      if (hrl == null) continue;
160
161      final RegionInfo regionInfo = hrl.getRegion();
162      if (regionInfo == null) continue;
163
164      final int replicaId = regionInfo.getReplicaId();
165      final State state = getRegionState(result, regionInfo);
166
167      final ServerName lastHost = hrl.getServerName();
168      ServerName regionLocation = MetaTableAccessor.getTargetServerName(result, replicaId);
169      final long openSeqNum = hrl.getSeqNum();
170
171      LOG.debug(
172        "Load {} entry region={}, regionState={}, lastHost={}, "
173          + "regionLocation={}, openSeqNum={}",
174        TableName.META_TABLE_NAME, regionInfo.getEncodedName(), state, lastHost, regionLocation,
175        openSeqNum);
176      visitor.visitRegionState(result, regionInfo, state, regionLocation, lastHost, openSeqNum);
177    }
178  }
179
180  private Put generateUpdateRegionLocationPut(RegionStateNode regionStateNode) throws IOException {
181    long time = EnvironmentEdgeManager.currentTime();
182    long openSeqNum = regionStateNode.getState() == State.OPEN
183      ? regionStateNode.getOpenSeqNum()
184      : HConstants.NO_SEQNUM;
185    RegionInfo regionInfo = regionStateNode.getRegionInfo();
186    State state = regionStateNode.getState();
187    ServerName regionLocation = regionStateNode.getRegionLocation();
188    TransitRegionStateProcedure rit = regionStateNode.getProcedure();
189    long pid = rit != null ? rit.getProcId() : Procedure.NO_PROC_ID;
190    final int replicaId = regionInfo.getReplicaId();
191    final Put put = new Put(CatalogFamilyFormat.getMetaKeyForRegion(regionInfo), time);
192    MetaTableAccessor.addRegionInfo(put, regionInfo);
193    final StringBuilder info =
194      new StringBuilder("pid=").append(pid).append(" updating ").append(TableName.META_TABLE_NAME)
195        .append(" row=").append(regionInfo.getEncodedName()).append(", regionState=").append(state);
196    if (openSeqNum >= 0) {
197      Preconditions.checkArgument(state == State.OPEN && regionLocation != null,
198        "Open region should be on a server");
199      MetaTableAccessor.addLocation(put, regionLocation, openSeqNum, replicaId);
200      // only update replication barrier for default replica
201      if (
202        regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID
203          && hasGlobalReplicationScope(regionInfo.getTable())
204      ) {
205        ReplicationBarrierFamilyFormat.addReplicationBarrier(put, openSeqNum);
206        info.append(", repBarrier=").append(openSeqNum);
207      }
208      info.append(", openSeqNum=").append(openSeqNum);
209      info.append(", regionLocation=").append(regionLocation);
210    } else if (regionLocation != null) {
211      // Ideally, if no regionLocation, write null to the hbase:meta but this will confuse clients
212      // currently; they want a server to hit. TODO: Make clients wait if no location.
213      put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
214        .setFamily(HConstants.CATALOG_FAMILY)
215        .setQualifier(CatalogFamilyFormat.getServerNameColumn(replicaId))
216        .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put)
217        .setValue(Bytes.toBytes(regionLocation.getServerName())).build());
218      info.append(", regionLocation=").append(regionLocation);
219    }
220    put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
221      .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getStateColumn(replicaId))
222      .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(state.name()))
223      .build());
224    LOG.info(info.toString());
225    return put;
226  }
227
228  CompletableFuture<Void> updateRegionLocation(RegionStateNode regionStateNode) {
229    Put put;
230    try {
231      put = generateUpdateRegionLocationPut(regionStateNode);
232    } catch (IOException e) {
233      return FutureUtils.failedFuture(e);
234    }
235    RegionInfo regionInfo = regionStateNode.getRegionInfo();
236    State state = regionStateNode.getState();
237    CompletableFuture<Void> future = updateRegionLocation(regionInfo, state, put);
238    if (regionInfo.isMetaRegion() && regionInfo.isFirst()) {
239      // mirror the meta location to zookeeper
240      // we store meta location in master local region which means the above method is
241      // synchronous(we just wrap the result with a CompletableFuture to make it look like
242      // asynchronous), so it is OK to just call this method directly here
243      assert future.isDone();
244      if (!future.isCompletedExceptionally()) {
245        try {
246          mirrorMetaLocation(regionInfo, regionStateNode.getRegionLocation(), state);
247        } catch (IOException e) {
248          return FutureUtils.failedFuture(e);
249        }
250      }
251    }
252    return future;
253  }
254
255  private void mirrorMetaLocation(RegionInfo regionInfo, ServerName serverName, State state)
256    throws IOException {
257    try {
258      MetaTableLocator.setMetaLocation(master.getZooKeeper(), serverName, regionInfo.getReplicaId(),
259        state);
260    } catch (KeeperException e) {
261      throw new IOException(e);
262    }
263  }
264
265  private void removeMirrorMetaLocation(int oldReplicaCount, int newReplicaCount)
266    throws IOException {
267    try {
268      for (int i = newReplicaCount; i < oldReplicaCount; i++) {
269        MetaTableLocator.deleteMetaLocation(master.getZooKeeper(), i);
270      }
271    } catch (KeeperException e) {
272      throw new IOException(e);
273    }
274  }
275
276  private CompletableFuture<Void> updateRegionLocation(RegionInfo regionInfo, State state,
277    Put put) {
278    CompletableFuture<Void> future;
279    if (regionInfo.isMetaRegion()) {
280      try {
281        masterRegion.update(r -> r.put(put));
282        future = CompletableFuture.completedFuture(null);
283      } catch (Exception e) {
284        future = FutureUtils.failedFuture(e);
285      }
286    } else {
287      AsyncTable<?> table = master.getAsyncConnection().getTable(TableName.META_TABLE_NAME);
288      future = table.put(put);
289    }
290    FutureUtils.addListener(future, (r, e) -> {
291      if (e != null) {
292        // TODO: Revist!!!! Means that if a server is loaded, then we will abort our host!
293        // In tests we abort the Master!
294        String msg = String.format("FAILED persisting region=%s state=%s",
295          regionInfo.getShortNameToLog(), state);
296        LOG.error(msg, e);
297        master.abort(msg, e);
298      }
299    });
300    return future;
301  }
302
303  private long getOpenSeqNumForParentRegion(RegionInfo region) throws IOException {
304    MasterFileSystem fs = master.getMasterFileSystem();
305    long maxSeqId = WALSplitUtil.getMaxRegionSequenceId(master.getConfiguration(), region,
306      fs::getFileSystem, fs::getWALFileSystem);
307    return maxSeqId > 0 ? maxSeqId + 1 : HConstants.NO_SEQNUM;
308  }
309
310  /**
311   * Performs an atomic multi-mutate operation against the given table. Used by the likes of merge
312   * and split as these want to make atomic mutations across multiple rows.
313   */
314  private void multiMutate(RegionInfo ri, List<Mutation> mutations) throws IOException {
315    debugLogMutations(mutations);
316    byte[] row =
317      Bytes.toBytes(RegionReplicaUtil.getRegionInfoForDefaultReplica(ri).getRegionNameAsString()
318        + HConstants.DELIMITER);
319    MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder();
320    for (Mutation mutation : mutations) {
321      if (mutation instanceof Put) {
322        builder.addMutationRequest(
323          ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.PUT, mutation));
324      } else if (mutation instanceof Delete) {
325        builder.addMutationRequest(
326          ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.DELETE, mutation));
327      } else {
328        throw new DoNotRetryIOException(
329          "multi in MetaEditor doesn't support " + mutation.getClass().getName());
330      }
331    }
332    MutateRowsRequest request = builder.build();
333    AsyncTable<?> table =
334      master.getConnection().toAsyncConnection().getTable(TableName.META_TABLE_NAME);
335    CompletableFuture<MutateRowsResponse> future = table.<MultiRowMutationService,
336      MutateRowsResponse> coprocessorService(MultiRowMutationService::newStub,
337        (stub, controller, done) -> stub.mutateRows(controller, request, done), row);
338    FutureUtils.get(future);
339  }
340
341  private Table getMetaTable() throws IOException {
342    return master.getConnection().getTable(TableName.META_TABLE_NAME);
343  }
344
345  private Result getRegionCatalogResult(RegionInfo region) throws IOException {
346    Get get =
347      new Get(CatalogFamilyFormat.getMetaKeyForRegion(region)).addFamily(HConstants.CATALOG_FAMILY);
348    try (Table table = getMetaTable()) {
349      return table.get(get);
350    }
351  }
352
353  private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException {
354    return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow())
355      .setFamily(HConstants.CATALOG_FAMILY)
356      .setQualifier(CatalogFamilyFormat.getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp())
357      .setType(Type.Put).setValue(Bytes.toBytes(openSeqNum)).build());
358  }
359
360  // ============================================================================================
361  // Update Region Splitting State helpers
362  // ============================================================================================
363  /**
364   * Splits the region into two in an atomic operation. Offlines the parent region with the
365   * information that it is split into two, and also adds the daughter regions. Does not add the
366   * location information to the daughter regions since they are not open yet.
367   */
368  public void splitRegion(RegionInfo parent, RegionInfo splitA, RegionInfo splitB,
369    ServerName serverName, TableDescriptor htd) throws IOException {
370    long parentOpenSeqNum = HConstants.NO_SEQNUM;
371    if (htd.hasGlobalReplicationScope()) {
372      parentOpenSeqNum = getOpenSeqNumForParentRegion(parent);
373    }
374    long time = EnvironmentEdgeManager.currentTime();
375    // Put for parent
376    Put putParent = MetaTableAccessor.makePutFromRegionInfo(
377      RegionInfoBuilder.newBuilder(parent).setOffline(true).setSplit(true).build(), time);
378    MetaTableAccessor.addDaughtersToPut(putParent, splitA, splitB);
379
380    // Puts for daughters
381    Put putA = MetaTableAccessor.makePutFromRegionInfo(splitA, time);
382    Put putB = MetaTableAccessor.makePutFromRegionInfo(splitB, time);
383    if (parentOpenSeqNum > 0) {
384      ReplicationBarrierFamilyFormat.addReplicationBarrier(putParent, parentOpenSeqNum);
385      ReplicationBarrierFamilyFormat.addReplicationParent(putA, Collections.singletonList(parent));
386      ReplicationBarrierFamilyFormat.addReplicationParent(putB, Collections.singletonList(parent));
387    }
388    // Set initial state to CLOSED
389    // NOTE: If initial state is not set to CLOSED then daughter regions get added with the
390    // default OFFLINE state. If Master gets restarted after this step, start up sequence of
391    // master tries to assign these offline regions. This is followed by re-assignments of the
392    // daughter regions from resumed {@link SplitTableRegionProcedure}
393    MetaTableAccessor.addRegionStateToPut(putA, RegionInfo.DEFAULT_REPLICA_ID,
394      RegionState.State.CLOSED);
395    MetaTableAccessor.addRegionStateToPut(putB, RegionInfo.DEFAULT_REPLICA_ID,
396      RegionState.State.CLOSED);
397
398    // new regions, openSeqNum = 1 is fine.
399    addSequenceNum(putA, 1, splitA.getReplicaId());
400    addSequenceNum(putB, 1, splitB.getReplicaId());
401
402    // Add empty locations for region replicas of daughters so that number of replicas can be
403    // cached whenever the primary region is looked up from meta
404    int regionReplication = getRegionReplication(htd);
405    for (int i = 1; i < regionReplication; i++) {
406      MetaTableAccessor.addEmptyLocation(putA, i);
407      MetaTableAccessor.addEmptyLocation(putB, i);
408    }
409
410    multiMutate(parent, Arrays.asList(putParent, putA, putB));
411  }
412
413  // ============================================================================================
414  // Update Region Merging State helpers
415  // ============================================================================================
416  public void mergeRegions(RegionInfo child, RegionInfo[] parents, ServerName serverName,
417    TableDescriptor htd) throws IOException {
418    boolean globalScope = htd.hasGlobalReplicationScope();
419    long time = EnvironmentEdgeManager.currentTime();
420    List<Mutation> mutations = new ArrayList<>();
421    List<RegionInfo> replicationParents = new ArrayList<>();
422    for (RegionInfo ri : parents) {
423      long seqNum = globalScope ? getOpenSeqNumForParentRegion(ri) : -1;
424      // Deletes for merging regions
425      mutations.add(MetaTableAccessor.makeDeleteFromRegionInfo(ri, time));
426      if (seqNum > 0) {
427        mutations
428          .add(ReplicationBarrierFamilyFormat.makePutForReplicationBarrier(ri, seqNum, time));
429        replicationParents.add(ri);
430      }
431    }
432    // Put for parent
433    Put putOfMerged = MetaTableAccessor.makePutFromRegionInfo(child, time);
434    putOfMerged = addMergeRegions(putOfMerged, Arrays.asList(parents));
435    // Set initial state to CLOSED.
436    // NOTE: If initial state is not set to CLOSED then merged region gets added with the
437    // default OFFLINE state. If Master gets restarted after this step, start up sequence of
438    // master tries to assign this offline region. This is followed by re-assignments of the
439    // merged region from resumed {@link MergeTableRegionsProcedure}
440    MetaTableAccessor.addRegionStateToPut(putOfMerged, RegionInfo.DEFAULT_REPLICA_ID,
441      RegionState.State.CLOSED);
442    mutations.add(putOfMerged);
443    // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null
444    // if crash after merge happened but before we got to here.. means in-memory
445    // locations of offlined merged, now-closed, regions is lost. Should be ok. We
446    // assign the merged region later.
447    if (serverName != null) {
448      MetaTableAccessor.addLocation(putOfMerged, serverName, 1, child.getReplicaId());
449    }
450
451    // Add empty locations for region replicas of the merged region so that number of replicas
452    // can be cached whenever the primary region is looked up from meta
453    int regionReplication = getRegionReplication(htd);
454    for (int i = 1; i < regionReplication; i++) {
455      MetaTableAccessor.addEmptyLocation(putOfMerged, i);
456    }
457    // add parent reference for serial replication
458    if (!replicationParents.isEmpty()) {
459      ReplicationBarrierFamilyFormat.addReplicationParent(putOfMerged, replicationParents);
460    }
461    multiMutate(child, mutations);
462  }
463
464  /**
465   * Check whether the given {@code region} has any 'info:merge*' columns.
466   */
467  public boolean hasMergeRegions(RegionInfo region) throws IOException {
468    return CatalogFamilyFormat.hasMergeRegions(getRegionCatalogResult(region).rawCells());
469  }
470
471  /**
472   * Returns Return all regioninfos listed in the 'info:merge*' columns of the given {@code region}.
473   */
474  public List<RegionInfo> getMergeRegions(RegionInfo region) throws IOException {
475    return CatalogFamilyFormat.getMergeRegions(getRegionCatalogResult(region).rawCells());
476  }
477
478  /**
479   * Deletes merge qualifiers for the specified merge region.
480   * @param connection  connection we're using
481   * @param mergeRegion the merged region
482   */
483  public void deleteMergeQualifiers(RegionInfo mergeRegion) throws IOException {
484    // NOTE: We are doing a new hbase:meta read here.
485    Cell[] cells = getRegionCatalogResult(mergeRegion).rawCells();
486    if (cells == null || cells.length == 0) {
487      return;
488    }
489    Delete delete = new Delete(mergeRegion.getRegionName());
490    List<byte[]> qualifiers = new ArrayList<>();
491    for (Cell cell : cells) {
492      if (!CatalogFamilyFormat.isMergeQualifierPrefix(cell)) {
493        continue;
494      }
495      byte[] qualifier = CellUtil.cloneQualifier(cell);
496      qualifiers.add(qualifier);
497      delete.addColumns(HConstants.CATALOG_FAMILY, qualifier, HConstants.LATEST_TIMESTAMP);
498    }
499
500    // There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while
501    // the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second
502    // GCMultipleMergedRegionsProcedure could delete the merged region by accident!
503    if (qualifiers.isEmpty()) {
504      LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString()
505        + " in meta table, they are cleaned up already, Skip.");
506      return;
507    }
508    try (Table table = master.getConnection().getTable(TableName.META_TABLE_NAME)) {
509      table.delete(delete);
510    }
511    LOG.info(
512      "Deleted merge references in " + mergeRegion.getRegionNameAsString() + ", deleted qualifiers "
513        + qualifiers.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", ")));
514  }
515
516  static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException {
517    int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more.
518    int max = mergeRegions.size();
519    if (max > limit) {
520      // Should never happen!!!!! But just in case.
521      throw new RuntimeException(
522        "Can't merge " + max + " regions in one go; " + limit + " is upper-limit.");
523    }
524    int counter = 0;
525    for (RegionInfo ri : mergeRegions) {
526      String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++);
527      put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
528        .setFamily(HConstants.CATALOG_FAMILY).setQualifier(Bytes.toBytes(qualifier))
529        .setTimestamp(put.getTimestamp()).setType(Type.Put).setValue(RegionInfo.toByteArray(ri))
530        .build());
531    }
532    return put;
533  }
534
535  // ============================================================================================
536  // Delete Region State helpers
537  // ============================================================================================
538  /**
539   * Deletes the specified region.
540   */
541  public void deleteRegion(final RegionInfo regionInfo) throws IOException {
542    deleteRegions(Collections.singletonList(regionInfo));
543  }
544
545  /**
546   * Deletes the specified regions.
547   */
548  public void deleteRegions(final List<RegionInfo> regions) throws IOException {
549    deleteRegions(regions, EnvironmentEdgeManager.currentTime());
550  }
551
552  private void deleteRegions(List<RegionInfo> regions, long ts) throws IOException {
553    List<Delete> deletes = new ArrayList<>(regions.size());
554    for (RegionInfo hri : regions) {
555      Delete e = new Delete(hri.getRegionName());
556      e.addFamily(HConstants.CATALOG_FAMILY, ts);
557      deletes.add(e);
558    }
559    try (Table table = getMetaTable()) {
560      debugLogMutations(deletes);
561      table.delete(deletes);
562    }
563    LOG.info("Deleted {} regions from META", regions.size());
564    LOG.debug("Deleted regions: {}", regions);
565  }
566
567  /**
568   * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and
569   * adds new ones. Regions added back have state CLOSED.
570   * @param connection  connection we're using
571   * @param regionInfos list of regions to be added to META
572   */
573  public void overwriteRegions(List<RegionInfo> regionInfos, int regionReplication)
574    throws IOException {
575    // use master time for delete marker and the Put
576    long now = EnvironmentEdgeManager.currentTime();
577    deleteRegions(regionInfos, now);
578    // Why sleep? This is the easiest way to ensure that the previous deletes does not
579    // eclipse the following puts, that might happen in the same ts from the server.
580    // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed,
581    // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep.
582    //
583    // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed
584    MetaTableAccessor.addRegionsToMeta(master.getConnection(), regionInfos, regionReplication,
585      now + 1);
586    LOG.info("Overwritten " + regionInfos.size() + " regions to Meta");
587    LOG.debug("Overwritten regions: {} ", regionInfos);
588  }
589
590  private Scan getScanForUpdateRegionReplicas(TableName tableName) {
591    Scan scan;
592    if (TableName.isMetaTableName(tableName)) {
593      // Notice that, we do not use MetaCellComparator for master local region, so we can not use
594      // the same logic to set start key and end key for scanning meta table when locating entries
595      // in master local region. And since there is only one table in master local region(the record
596      // for meta table), so we do not need set start key and end key.
597      scan = new Scan();
598    } else {
599      scan = MetaTableAccessor.getScanForTableName(master.getConfiguration(), tableName);
600    }
601    return scan.addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
602  }
603
604  private List<Delete> deleteRegionReplicas(ResultScanner scanner, int oldReplicaCount,
605    int newReplicaCount, long now) throws IOException {
606    List<Delete> deletes = new ArrayList<>();
607    for (;;) {
608      Result result = scanner.next();
609      if (result == null) {
610        break;
611      }
612      RegionInfo primaryRegionInfo = CatalogFamilyFormat.getRegionInfo(result);
613      if (primaryRegionInfo == null || primaryRegionInfo.isSplit()) {
614        continue;
615      }
616      Delete delete = new Delete(result.getRow());
617      for (int i = newReplicaCount; i < oldReplicaCount; i++) {
618        delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerColumn(i), now);
619        delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getSeqNumColumn(i), now);
620        delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getStartCodeColumn(i),
621          now);
622        delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getServerNameColumn(i),
623          now);
624        delete.addColumns(HConstants.CATALOG_FAMILY, CatalogFamilyFormat.getRegionStateColumn(i),
625          now);
626      }
627      deletes.add(delete);
628    }
629    return deletes;
630  }
631
632  public void removeRegionReplicas(TableName tableName, int oldReplicaCount, int newReplicaCount)
633    throws IOException {
634    Scan scan = getScanForUpdateRegionReplicas(tableName);
635    long now = EnvironmentEdgeManager.currentTime();
636    if (TableName.isMetaTableName(tableName)) {
637      List<Delete> deletes;
638      try (ResultScanner scanner = masterRegion.getScanner(scan)) {
639        deletes = deleteRegionReplicas(scanner, oldReplicaCount, newReplicaCount, now);
640      }
641      debugLogMutations(deletes);
642      masterRegion.update(r -> {
643        for (Delete d : deletes) {
644          r.delete(d);
645        }
646      });
647      // also delete the mirrored location on zk
648      removeMirrorMetaLocation(oldReplicaCount, newReplicaCount);
649    } else {
650      try (Table metaTable = getMetaTable(); ResultScanner scanner = metaTable.getScanner(scan)) {
651        List<Delete> deletes = deleteRegionReplicas(scanner, oldReplicaCount, newReplicaCount, now);
652        debugLogMutations(deletes);
653        metaTable.delete(deletes);
654      }
655    }
656  }
657
658  // ==========================================================================
659  // Table Descriptors helpers
660  // ==========================================================================
661  private boolean hasGlobalReplicationScope(TableName tableName) throws IOException {
662    return hasGlobalReplicationScope(getDescriptor(tableName));
663  }
664
665  private boolean hasGlobalReplicationScope(TableDescriptor htd) {
666    return htd != null ? htd.hasGlobalReplicationScope() : false;
667  }
668
669  private int getRegionReplication(TableDescriptor htd) {
670    return htd != null ? htd.getRegionReplication() : 1;
671  }
672
673  private TableDescriptor getDescriptor(TableName tableName) throws IOException {
674    return master.getTableDescriptors().get(tableName);
675  }
676
677  // ==========================================================================
678  // Region State
679  // ==========================================================================
680
681  /**
682   * Pull the region state from a catalog table {@link Result}.
683   * @return the region state, or null if unknown.
684   */
685  public static State getRegionState(final Result r, RegionInfo regionInfo) {
686    Cell cell =
687      r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getStateColumn(regionInfo.getReplicaId()));
688    if (cell == null || cell.getValueLength() == 0) {
689      return null;
690    }
691
692    String state =
693      Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
694    try {
695      return State.valueOf(state);
696    } catch (IllegalArgumentException e) {
697      LOG.warn(
698        "BAD value {} in " + TableName.META_TABLE_NAME + " info:state column for region {} , "
699          + "Consider using HBCK2 setRegionState ENCODED_REGION_NAME STATE",
700        state, regionInfo.getEncodedName());
701      return null;
702    }
703  }
704
705  public static byte[] getStateColumn(int replicaId) {
706    return replicaId == 0
707      ? HConstants.STATE_QUALIFIER
708      : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
709        + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
710  }
711
712  private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException {
713    if (!METALOG.isDebugEnabled()) {
714      return;
715    }
716    // Logging each mutation in separate line makes it easier to see diff between them visually
717    // because of common starting indentation.
718    for (Mutation mutation : mutations) {
719      debugLogMutation(mutation);
720    }
721  }
722
723  private static void debugLogMutation(Mutation p) throws IOException {
724    METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON());
725  }
726}