001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import edu.umd.cs.findbugs.annotations.NonNull;
021import edu.umd.cs.findbugs.annotations.Nullable;
022import java.io.ByteArrayOutputStream;
023import java.io.Closeable;
024import java.io.IOException;
025import java.util.ArrayList;
026import java.util.Arrays;
027import java.util.Collection;
028import java.util.Collections;
029import java.util.Iterator;
030import java.util.LinkedHashMap;
031import java.util.List;
032import java.util.Map;
033import java.util.NavigableMap;
034import java.util.SortedMap;
035import java.util.TreeMap;
036import java.util.concurrent.ThreadLocalRandom;
037import java.util.regex.Matcher;
038import java.util.regex.Pattern;
039import java.util.stream.Collectors;
040import org.apache.hadoop.conf.Configuration;
041import org.apache.hadoop.hbase.client.Connection;
042import org.apache.hadoop.hbase.client.Consistency;
043import org.apache.hadoop.hbase.client.Delete;
044import org.apache.hadoop.hbase.client.Get;
045import org.apache.hadoop.hbase.client.Mutation;
046import org.apache.hadoop.hbase.client.Put;
047import org.apache.hadoop.hbase.client.RegionInfo;
048import org.apache.hadoop.hbase.client.RegionInfoBuilder;
049import org.apache.hadoop.hbase.client.RegionReplicaUtil;
050import org.apache.hadoop.hbase.client.Result;
051import org.apache.hadoop.hbase.client.ResultScanner;
052import org.apache.hadoop.hbase.client.Scan;
053import org.apache.hadoop.hbase.client.Table;
054import org.apache.hadoop.hbase.client.TableState;
055import org.apache.hadoop.hbase.client.coprocessor.Batch;
056import org.apache.hadoop.hbase.exceptions.DeserializationException;
057import org.apache.hadoop.hbase.filter.Filter;
058import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
059import org.apache.hadoop.hbase.filter.RowFilter;
060import org.apache.hadoop.hbase.filter.SubstringComparator;
061import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils;
062import org.apache.hadoop.hbase.ipc.ServerRpcController;
063import org.apache.hadoop.hbase.master.RegionState;
064import org.apache.hadoop.hbase.master.RegionState.State;
065import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
066import org.apache.hadoop.hbase.protobuf.generated.ClientProtos;
067import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MultiRowMutationService;
068import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsRequest;
069import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos.MutateRowsResponse;
070import org.apache.hadoop.hbase.util.Bytes;
071import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
072import org.apache.hadoop.hbase.util.ExceptionUtil;
073import org.apache.hadoop.hbase.util.Pair;
074import org.apache.hadoop.hbase.util.PairOfSameType;
075import org.apache.yetus.audience.InterfaceAudience;
076import org.slf4j.Logger;
077import org.slf4j.LoggerFactory;
078
079import org.apache.hbase.thirdparty.com.google.common.base.Throwables;
080
081/**
082 * <p>
083 * Read/write operations on <code>hbase:meta</code> region as well as assignment information stored
084 * to <code>hbase:meta</code>.
085 * </p>
086 * <p>
087 * Some of the methods of this class take ZooKeeperWatcher as a param. The only reason for this is
088 * when this class is used on client-side (e.g. HBaseAdmin), we want to use short-lived connection
089 * (opened before each operation, closed right after), while when used on HM or HRS (like in
090 * AssignmentManager) we want permanent connection.
091 * </p>
092 * <p>
093 * HBASE-10070 adds a replicaId to HRI, meaning more than one HRI can be defined for the same table
094 * range (table, startKey, endKey). For every range, there will be at least one HRI defined which is
095 * called default replica.
096 * </p>
097 * <p>
098 * <h2>Meta layout</h2>
099 *
100 * <pre>
101 * For each table there is single row named for the table with a 'table' column family.
102 * The column family currently has one column in it, the 'state' column:
103 *
104 * table:state             => contains table state
105 *
106 * Then for each table range ('Region'), there is a single row, formatted as:
107 * &lt;tableName&gt;,&lt;startKey&gt;,&lt;regionId&gt;,&lt;encodedRegionName&gt;.
108 * This row is the serialized regionName of the default region replica.
109 * Columns are:
110 * info:regioninfo         => contains serialized HRI for the default region replica
111 * info:server             => contains hostname:port (in string form) for the server hosting
112 *                            the default regionInfo replica
113 * info:server_&lt;replicaId&gt => contains hostname:port (in string form) for the server hosting
114 *                                 the regionInfo replica with replicaId
115 * info:serverstartcode    => contains server start code (in binary long form) for the server
116 *                            hosting the default regionInfo replica
117 * info:serverstartcode_&lt;replicaId&gt => contains server start code (in binary long form) for
118 *                                          the server hosting the regionInfo replica with
119 *                                          replicaId
120 * info:seqnumDuringOpen   => contains seqNum (in binary long form) for the region at the time
121 *                             the server opened the region with default replicaId
122 * info:seqnumDuringOpen_&lt;replicaId&gt => contains seqNum (in binary long form) for the region
123 *                                           at the time the server opened the region with
124 *                                           replicaId
125 * info:splitA             => contains a serialized HRI for the first daughter region if the
126 *                             region is split
127 * info:splitB             => contains a serialized HRI for the second daughter region if the
128 *                             region is split
129 * info:merge*             => contains a serialized HRI for a merge parent region. There will be two
130 *                             or more of these columns in a row. A row that has these columns is
131 *                             undergoing a merge and is the result of the merge. Columns listed
132 *                             in marge* columns are the parents of this merged region. Example
133 *                             columns: info:merge0001, info:merge0002. You make also see 'mergeA',
134 *                             and 'mergeB'. This is old form replaced by the new format that allows
135 *                             for more than two parents to be merged at a time.
136 * TODO: Add rep_barrier for serial replication explaination. See SerialReplicationChecker.
137 * </pre>
138 * </p>
139 * <p>
140 * The actual layout of meta should be encapsulated inside MetaTableAccessor methods, and should not
141 * leak out of it (through Result objects, etc)
142 * </p>
143 */
144@InterfaceAudience.Private
145public class MetaTableAccessor {
146
147  private static final Logger LOG = LoggerFactory.getLogger(MetaTableAccessor.class);
148  private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META");
149
150  public static final byte[] REPLICATION_PARENT_QUALIFIER = Bytes.toBytes("parent");
151
152  private static final byte ESCAPE_BYTE = (byte) 0xFF;
153
154  private static final byte SEPARATED_BYTE = 0x00;
155
156  @InterfaceAudience.Private
157  @SuppressWarnings("ImmutableEnumChecker")
158  public enum QueryType {
159    ALL(HConstants.TABLE_FAMILY, HConstants.CATALOG_FAMILY),
160    REGION(HConstants.CATALOG_FAMILY),
161    TABLE(HConstants.TABLE_FAMILY),
162    REPLICATION(HConstants.REPLICATION_BARRIER_FAMILY);
163
164    private final byte[][] families;
165
166    QueryType(byte[]... families) {
167      this.families = families;
168    }
169
170    byte[][] getFamilies() {
171      return this.families;
172    }
173  }
174
175  /** The delimiter for meta columns for replicaIds &gt; 0 */
176  static final char META_REPLICA_ID_DELIMITER = '_';
177
178  /** A regex for parsing server columns from meta. See above javadoc for meta layout */
179  private static final Pattern SERVER_COLUMN_PATTERN =
180    Pattern.compile("^server(_[0-9a-fA-F]{4})?$");
181
182  ////////////////////////
183  // Reading operations //
184  ////////////////////////
185
186  /**
187   * Performs a full scan of <code>hbase:meta</code> for regions.
188   * @param connection connection we're using
189   * @param visitor    Visitor invoked against each row in regions family.
190   */
191  public static void fullScanRegions(Connection connection, final Visitor visitor)
192    throws IOException {
193    scanMeta(connection, null, null, QueryType.REGION, visitor);
194  }
195
196  /**
197   * Performs a full scan of <code>hbase:meta</code> for regions.
198   * @param connection connection we're using
199   */
200  public static List<Result> fullScanRegions(Connection connection) throws IOException {
201    return fullScan(connection, QueryType.REGION);
202  }
203
204  /**
205   * Performs a full scan of <code>hbase:meta</code> for tables.
206   * @param connection connection we're using
207   * @param visitor    Visitor invoked against each row in tables family.
208   */
209  public static void fullScanTables(Connection connection, final Visitor visitor)
210    throws IOException {
211    scanMeta(connection, null, null, QueryType.TABLE, visitor);
212  }
213
214  /**
215   * Performs a full scan of <code>hbase:meta</code>.
216   * @param connection connection we're using
217   * @param type       scanned part of meta
218   * @return List of {@link Result}
219   */
220  private static List<Result> fullScan(Connection connection, QueryType type) throws IOException {
221    CollectAllVisitor v = new CollectAllVisitor();
222    scanMeta(connection, null, null, type, v);
223    return v.getResults();
224  }
225
226  /**
227   * Callers should call close on the returned {@link Table} instance.
228   * @param connection connection we're using to access Meta
229   * @return An {@link Table} for <code>hbase:meta</code>
230   */
231  public static Table getMetaHTable(final Connection connection) throws IOException {
232    // We used to pass whole CatalogTracker in here, now we just pass in Connection
233    if (connection == null) {
234      throw new NullPointerException("No connection");
235    } else if (connection.isClosed()) {
236      throw new IOException("connection is closed");
237    }
238    return connection.getTable(TableName.META_TABLE_NAME);
239  }
240
241  /**
242   * @param t Table to use (will be closed when done).
243   * @param g Get to run
244   */
245  private static Result get(final Table t, final Get g) throws IOException {
246    if (t == null) return null;
247    try {
248      return t.get(g);
249    } finally {
250      t.close();
251    }
252  }
253
254  /**
255   * Gets the region info and assignment for the specified region.
256   * @param connection connection we're using
257   * @param regionName Region to lookup.
258   * @return Location and RegionInfo for <code>regionName</code>
259   * @deprecated use {@link #getRegionLocation(Connection, byte[])} instead
260   */
261  @Deprecated
262  public static Pair<RegionInfo, ServerName> getRegion(Connection connection, byte[] regionName)
263    throws IOException {
264    HRegionLocation location = getRegionLocation(connection, regionName);
265    return location == null ? null : new Pair<>(location.getRegionInfo(), location.getServerName());
266  }
267
268  /**
269   * Returns the HRegionLocation from meta for the given region
270   * @param connection connection we're using
271   * @param regionName region we're looking for
272   * @return HRegionLocation for the given region
273   */
274  public static HRegionLocation getRegionLocation(Connection connection, byte[] regionName)
275    throws IOException {
276    byte[] row = regionName;
277    RegionInfo parsedInfo = null;
278    try {
279      parsedInfo = parseRegionInfoFromRegionName(regionName);
280      row = getMetaKeyForRegion(parsedInfo);
281    } catch (Exception parseEx) {
282      // If it is not a valid regionName(i.e, tableName), it needs to return null here
283      // as querying meta table wont help.
284      return null;
285    }
286    Get get = new Get(row);
287    get.addFamily(HConstants.CATALOG_FAMILY);
288    Result r = get(getMetaHTable(connection), get);
289    RegionLocations locations = getRegionLocations(r);
290    return locations == null
291      ? null
292      : locations.getRegionLocation(parsedInfo == null ? 0 : parsedInfo.getReplicaId());
293  }
294
295  /**
296   * Returns the HRegionLocation from meta for the given region
297   * @param connection connection we're using
298   * @param regionInfo region information
299   * @return HRegionLocation for the given region
300   */
301  public static HRegionLocation getRegionLocation(Connection connection, RegionInfo regionInfo)
302    throws IOException {
303    return getRegionLocation(getCatalogFamilyRow(connection, regionInfo), regionInfo,
304      regionInfo.getReplicaId());
305  }
306
307  /** Returns Return the {@link HConstants#CATALOG_FAMILY} row from hbase:meta. */
308  public static Result getCatalogFamilyRow(Connection connection, RegionInfo ri)
309    throws IOException {
310    Get get = new Get(getMetaKeyForRegion(ri));
311    get.addFamily(HConstants.CATALOG_FAMILY);
312    return get(getMetaHTable(connection), get);
313  }
314
315  /** Returns the row key to use for this regionInfo */
316  public static byte[] getMetaKeyForRegion(RegionInfo regionInfo) {
317    return RegionReplicaUtil.getRegionInfoForDefaultReplica(regionInfo).getRegionName();
318  }
319
320  /**
321   * Returns an HRI parsed from this regionName. Not all the fields of the HRI is stored in the
322   * name, so the returned object should only be used for the fields in the regionName.
323   */
324  // This should be moved to RegionInfo? TODO.
325  public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException {
326    byte[][] fields = RegionInfo.parseRegionName(regionName);
327    long regionId = Long.parseLong(Bytes.toString(fields[2]));
328    int replicaId = fields.length > 3 ? Integer.parseInt(Bytes.toString(fields[3]), 16) : 0;
329    return RegionInfoBuilder.newBuilder(TableName.valueOf(fields[0])).setStartKey(fields[1])
330      .setRegionId(regionId).setReplicaId(replicaId).build();
331  }
332
333  /**
334   * Gets the result in hbase:meta for the specified region.
335   * @param connection connection we're using
336   * @param regionName region we're looking for
337   * @return result of the specified region
338   */
339  public static Result getRegionResult(Connection connection, byte[] regionName)
340    throws IOException {
341    Get get = new Get(regionName);
342    get.addFamily(HConstants.CATALOG_FAMILY);
343    return get(getMetaHTable(connection), get);
344  }
345
346  /**
347   * Scans META table for a row whose key contains the specified <B>regionEncodedName</B>, returning
348   * a single related <code>Result</code> instance if any row is found, null otherwise.
349   * @param connection        the connection to query META table.
350   * @param regionEncodedName the region encoded name to look for at META.
351   * @return <code>Result</code> instance with the row related info in META, null otherwise.
352   * @throws IOException if any errors occur while querying META.
353   */
354  public static Result scanByRegionEncodedName(Connection connection, String regionEncodedName)
355    throws IOException {
356    RowFilter rowFilter =
357      new RowFilter(CompareOperator.EQUAL, new SubstringComparator(regionEncodedName));
358    Scan scan = getMetaScan(connection.getConfiguration(), 1);
359    scan.setFilter(rowFilter);
360    try (Table table = getMetaHTable(connection);
361      ResultScanner resultScanner = table.getScanner(scan)) {
362      return resultScanner.next();
363    }
364  }
365
366  /**
367   * Returns Return all regioninfos listed in the 'info:merge*' columns of the
368   * <code>regionName</code> row.
369   */
370  @Nullable
371  public static List<RegionInfo> getMergeRegions(Connection connection, byte[] regionName)
372    throws IOException {
373    return getMergeRegions(getRegionResult(connection, regionName).rawCells());
374  }
375
376  /**
377   * Check whether the given {@code regionName} has any 'info:merge*' columns.
378   */
379  public static boolean hasMergeRegions(Connection conn, byte[] regionName) throws IOException {
380    return hasMergeRegions(getRegionResult(conn, regionName).rawCells());
381  }
382
383  /**
384   * Returns Deserialized values of &lt;qualifier,regioninfo&gt; pairs taken from column values that
385   * match the regex 'info:merge.*' in array of <code>cells</code>.
386   */
387  @Nullable
388  public static Map<String, RegionInfo> getMergeRegionsWithName(Cell[] cells) {
389    if (cells == null) {
390      return null;
391    }
392    Map<String, RegionInfo> regionsToMerge = null;
393    for (Cell cell : cells) {
394      if (!isMergeQualifierPrefix(cell)) {
395        continue;
396      }
397      // Ok. This cell is that of a info:merge* column.
398      RegionInfo ri = RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(),
399        cell.getValueLength());
400      if (ri != null) {
401        if (regionsToMerge == null) {
402          regionsToMerge = new LinkedHashMap<>();
403        }
404        regionsToMerge.put(Bytes.toString(CellUtil.cloneQualifier(cell)), ri);
405      }
406    }
407    return regionsToMerge;
408  }
409
410  /**
411   * Returns Deserialized regioninfo values taken from column values that match the regex
412   * 'info:merge.*' in array of <code>cells</code>.
413   */
414  @Nullable
415  public static List<RegionInfo> getMergeRegions(Cell[] cells) {
416    Map<String, RegionInfo> mergeRegionsWithName = getMergeRegionsWithName(cells);
417    return (mergeRegionsWithName == null) ? null : new ArrayList<>(mergeRegionsWithName.values());
418  }
419
420  /**
421   * Returns True if any merge regions present in <code>cells</code>; i.e. the column in
422   * <code>cell</code> matches the regex 'info:merge.*'.
423   */
424  public static boolean hasMergeRegions(Cell[] cells) {
425    for (Cell cell : cells) {
426      if (!isMergeQualifierPrefix(cell)) {
427        continue;
428      }
429      return true;
430    }
431    return false;
432  }
433
434  /** Returns True if the column in <code>cell</code> matches the regex 'info:merge.*'. */
435  private static boolean isMergeQualifierPrefix(Cell cell) {
436    // Check to see if has family and that qualifier starts with the merge qualifier 'merge'
437    return CellUtil.matchingFamily(cell, HConstants.CATALOG_FAMILY)
438      && PrivateCellUtil.qualifierStartsWith(cell, HConstants.MERGE_QUALIFIER_PREFIX);
439  }
440
441  /**
442   * Lists all of the regions currently in META.
443   * @param connection                  to connect with
444   * @param excludeOfflinedSplitParents False if we are to include offlined/splitparents regions,
445   *                                    true and we'll leave out offlined regions from returned list
446   * @return List of all user-space regions.
447   */
448  public static List<RegionInfo> getAllRegions(Connection connection,
449    boolean excludeOfflinedSplitParents) throws IOException {
450    List<Pair<RegionInfo, ServerName>> result;
451
452    result = getTableRegionsAndLocations(connection, null, excludeOfflinedSplitParents);
453
454    return getListOfRegionInfos(result);
455
456  }
457
458  /**
459   * Gets all of the regions of the specified table. Do not use this method to get meta table
460   * regions, use methods in MetaTableLocator instead.
461   * @param connection connection we're using
462   * @param tableName  table we're looking for
463   * @return Ordered list of {@link RegionInfo}.
464   */
465  public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName)
466    throws IOException {
467    return getTableRegions(connection, tableName, false);
468  }
469
470  /**
471   * Gets all of the regions of the specified table. Do not use this method to get meta table
472   * regions, use methods in MetaTableLocator instead.
473   * @param connection                  connection we're using
474   * @param tableName                   table we're looking for
475   * @param excludeOfflinedSplitParents If true, do not include offlined split parents in the
476   *                                    return.
477   * @return Ordered list of {@link RegionInfo}.
478   */
479  public static List<RegionInfo> getTableRegions(Connection connection, TableName tableName,
480    final boolean excludeOfflinedSplitParents) throws IOException {
481    List<Pair<RegionInfo, ServerName>> result =
482      getTableRegionsAndLocations(connection, tableName, excludeOfflinedSplitParents);
483    return getListOfRegionInfos(result);
484  }
485
486  @SuppressWarnings("MixedMutabilityReturnType")
487  private static List<RegionInfo>
488    getListOfRegionInfos(final List<Pair<RegionInfo, ServerName>> pairs) {
489    if (pairs == null || pairs.isEmpty()) {
490      return Collections.emptyList();
491    }
492    List<RegionInfo> result = new ArrayList<>(pairs.size());
493    for (Pair<RegionInfo, ServerName> pair : pairs) {
494      result.add(pair.getFirst());
495    }
496    return result;
497  }
498
499  /**
500   * Returns start row for scanning META according to query type
501   */
502  public static byte[] getTableStartRowForMeta(TableName tableName, QueryType type) {
503    if (tableName == null) {
504      return null;
505    }
506    switch (type) {
507      case REGION:
508        byte[] startRow = new byte[tableName.getName().length + 2];
509        System.arraycopy(tableName.getName(), 0, startRow, 0, tableName.getName().length);
510        startRow[startRow.length - 2] = HConstants.DELIMITER;
511        startRow[startRow.length - 1] = HConstants.DELIMITER;
512        return startRow;
513      case ALL:
514      case TABLE:
515      default:
516        return tableName.getName();
517    }
518  }
519
520  /**
521   * Returns stop row for scanning META according to query type
522   */
523  public static byte[] getTableStopRowForMeta(TableName tableName, QueryType type) {
524    if (tableName == null) {
525      return null;
526    }
527    final byte[] stopRow;
528    switch (type) {
529      case REGION:
530        stopRow = new byte[tableName.getName().length + 3];
531        System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length);
532        stopRow[stopRow.length - 3] = ' ';
533        stopRow[stopRow.length - 2] = HConstants.DELIMITER;
534        stopRow[stopRow.length - 1] = HConstants.DELIMITER;
535        break;
536      case ALL:
537      case TABLE:
538      default:
539        stopRow = new byte[tableName.getName().length + 1];
540        System.arraycopy(tableName.getName(), 0, stopRow, 0, tableName.getName().length);
541        stopRow[stopRow.length - 1] = ' ';
542        break;
543    }
544    return stopRow;
545  }
546
547  /**
548   * This method creates a Scan object that will only scan catalog rows that belong to the specified
549   * table. It doesn't specify any columns. This is a better alternative to just using a start row
550   * and scan until it hits a new table since that requires parsing the HRI to get the table name.
551   * @param tableName bytes of table's name
552   * @return configured Scan object
553   */
554  public static Scan getScanForTableName(Configuration conf, TableName tableName) {
555    // Start key is just the table name with delimiters
556    byte[] startKey = getTableStartRowForMeta(tableName, QueryType.REGION);
557    // Stop key appends the smallest possible char to the table name
558    byte[] stopKey = getTableStopRowForMeta(tableName, QueryType.REGION);
559
560    Scan scan = getMetaScan(conf, -1);
561    scan.setStartRow(startKey);
562    scan.setStopRow(stopKey);
563    return scan;
564  }
565
566  private static Scan getMetaScan(Configuration conf, int rowUpperLimit) {
567    Scan scan = new Scan();
568    int scannerCaching = conf.getInt(HConstants.HBASE_META_SCANNER_CACHING,
569      HConstants.DEFAULT_HBASE_META_SCANNER_CACHING);
570    if (conf.getBoolean(HConstants.USE_META_REPLICAS, HConstants.DEFAULT_USE_META_REPLICAS)) {
571      scan.setConsistency(Consistency.TIMELINE);
572    }
573    if (rowUpperLimit > 0) {
574      scan.setLimit(rowUpperLimit);
575      scan.setReadType(Scan.ReadType.PREAD);
576    }
577    scan.setCaching(scannerCaching);
578    return scan;
579  }
580
581  /**
582   * Do not use this method to get meta table regions, use methods in MetaTableLocator instead.
583   * @param connection connection we're using
584   * @param tableName  table we're looking for
585   * @return Return list of regioninfos and server.
586   */
587  public static List<Pair<RegionInfo, ServerName>>
588    getTableRegionsAndLocations(Connection connection, TableName tableName) throws IOException {
589    return getTableRegionsAndLocations(connection, tableName, true);
590  }
591
592  /**
593   * Do not use this method to get meta table regions, use methods in MetaTableLocator instead.
594   * @param connection                  connection we're using
595   * @param tableName                   table to work with, can be null for getting all regions
596   * @param excludeOfflinedSplitParents don't return split parents
597   * @return Return list of regioninfos and server addresses.
598   */
599  // What happens here when 1M regions in hbase:meta? This won't scale?
600  public static List<Pair<RegionInfo, ServerName>> getTableRegionsAndLocations(
601    Connection connection, @Nullable final TableName tableName,
602    final boolean excludeOfflinedSplitParents) throws IOException {
603    if (tableName != null && tableName.equals(TableName.META_TABLE_NAME)) {
604      throw new IOException(
605        "This method can't be used to locate meta regions;" + " use MetaTableLocator instead");
606    }
607    // Make a version of CollectingVisitor that collects RegionInfo and ServerAddress
608    CollectingVisitor<Pair<RegionInfo, ServerName>> visitor =
609      new CollectingVisitor<Pair<RegionInfo, ServerName>>() {
610        private RegionLocations current = null;
611
612        @Override
613        public boolean visit(Result r) throws IOException {
614          current = getRegionLocations(r);
615          if (current == null || current.getRegionLocation().getRegion() == null) {
616            LOG.warn("No serialized RegionInfo in " + r);
617            return true;
618          }
619          RegionInfo hri = current.getRegionLocation().getRegion();
620          if (excludeOfflinedSplitParents && hri.isSplitParent()) return true;
621          // Else call super and add this Result to the collection.
622          return super.visit(r);
623        }
624
625        @Override
626        void add(Result r) {
627          if (current == null) {
628            return;
629          }
630          for (HRegionLocation loc : current.getRegionLocations()) {
631            if (loc != null) {
632              this.results.add(new Pair<>(loc.getRegion(), loc.getServerName()));
633            }
634          }
635        }
636      };
637    scanMeta(connection, getTableStartRowForMeta(tableName, QueryType.REGION),
638      getTableStopRowForMeta(tableName, QueryType.REGION), QueryType.REGION, visitor);
639    return visitor.getResults();
640  }
641
642  /**
643   * Get the user regions a given server is hosting.
644   * @param connection connection we're using
645   * @param serverName server whose regions we're interested in
646   * @return List of user regions installed on this server (does not include catalog regions).
647   */
648  public static NavigableMap<RegionInfo, Result> getServerUserRegions(Connection connection,
649    final ServerName serverName) throws IOException {
650    final NavigableMap<RegionInfo, Result> hris = new TreeMap<>();
651    // Fill the above hris map with entries from hbase:meta that have the passed
652    // servername.
653    CollectingVisitor<Result> v = new CollectingVisitor<Result>() {
654      @Override
655      void add(Result r) {
656        if (r == null || r.isEmpty()) return;
657        RegionLocations locations = getRegionLocations(r);
658        if (locations == null) return;
659        for (HRegionLocation loc : locations.getRegionLocations()) {
660          if (loc != null) {
661            if (loc.getServerName() != null && loc.getServerName().equals(serverName)) {
662              hris.put(loc.getRegion(), r);
663            }
664          }
665        }
666      }
667    };
668    scanMeta(connection, null, null, QueryType.REGION, v);
669    return hris;
670  }
671
672  public static void fullScanMetaAndPrint(Connection connection) throws IOException {
673    Visitor v = r -> {
674      if (r == null || r.isEmpty()) {
675        return true;
676      }
677      LOG.info("fullScanMetaAndPrint.Current Meta Row: " + r);
678      TableState state = getTableState(r);
679      if (state != null) {
680        LOG.info("fullScanMetaAndPrint.Table State={}" + state);
681      } else {
682        RegionLocations locations = getRegionLocations(r);
683        if (locations == null) {
684          return true;
685        }
686        for (HRegionLocation loc : locations.getRegionLocations()) {
687          if (loc != null) {
688            LOG.info("fullScanMetaAndPrint.HRI Print={}", loc.getRegion());
689          }
690        }
691      }
692      return true;
693    };
694    scanMeta(connection, null, null, QueryType.ALL, v);
695  }
696
697  public static void scanMetaForTableRegions(Connection connection, Visitor visitor,
698    TableName tableName, CatalogReplicaMode metaReplicaMode) throws IOException {
699    scanMeta(connection, tableName, QueryType.REGION, Integer.MAX_VALUE, visitor, metaReplicaMode);
700  }
701
702  public static void scanMetaForTableRegions(Connection connection, Visitor visitor,
703    TableName tableName) throws IOException {
704    scanMetaForTableRegions(connection, visitor, tableName, CatalogReplicaMode.NONE);
705  }
706
707  private static void scanMeta(Connection connection, TableName table, QueryType type, int maxRows,
708    final Visitor visitor, CatalogReplicaMode metaReplicaMode) throws IOException {
709    scanMeta(connection, getTableStartRowForMeta(table, type), getTableStopRowForMeta(table, type),
710      type, null, maxRows, visitor, metaReplicaMode);
711
712  }
713
714  private static void scanMeta(Connection connection, @Nullable final byte[] startRow,
715    @Nullable final byte[] stopRow, QueryType type, final Visitor visitor) throws IOException {
716    scanMeta(connection, startRow, stopRow, type, Integer.MAX_VALUE, visitor);
717  }
718
719  /**
720   * Performs a scan of META table for given table starting from given row.
721   * @param connection connection we're using
722   * @param visitor    visitor to call
723   * @param tableName  table withing we scan
724   * @param row        start scan from this row
725   * @param rowLimit   max number of rows to return
726   */
727  public static void scanMeta(Connection connection, final Visitor visitor,
728    final TableName tableName, final byte[] row, final int rowLimit) throws IOException {
729    byte[] startRow = null;
730    byte[] stopRow = null;
731    if (tableName != null) {
732      startRow = getTableStartRowForMeta(tableName, QueryType.REGION);
733      if (row != null) {
734        RegionInfo closestRi = getClosestRegionInfo(connection, tableName, row);
735        startRow =
736          RegionInfo.createRegionName(tableName, closestRi.getStartKey(), HConstants.ZEROES, false);
737      }
738      stopRow = getTableStopRowForMeta(tableName, QueryType.REGION);
739    }
740    scanMeta(connection, startRow, stopRow, QueryType.REGION, rowLimit, visitor);
741  }
742
743  /**
744   * Performs a scan of META table.
745   * @param connection connection we're using
746   * @param startRow   Where to start the scan. Pass null if want to begin scan at first row.
747   * @param stopRow    Where to stop the scan. Pass null if want to scan all rows from the start one
748   * @param type       scanned part of meta
749   * @param maxRows    maximum rows to return
750   * @param visitor    Visitor invoked against each row.
751   */
752  static void scanMeta(Connection connection, @Nullable final byte[] startRow,
753    @Nullable final byte[] stopRow, QueryType type, int maxRows, final Visitor visitor)
754    throws IOException {
755    scanMeta(connection, startRow, stopRow, type, null, maxRows, visitor, CatalogReplicaMode.NONE);
756  }
757
758  private static void scanMeta(Connection connection, @Nullable final byte[] startRow,
759    @Nullable final byte[] stopRow, QueryType type, @Nullable Filter filter, int maxRows,
760    final Visitor visitor, CatalogReplicaMode metaReplicaMode) throws IOException {
761    int rowUpperLimit = maxRows > 0 ? maxRows : Integer.MAX_VALUE;
762    Scan scan = getMetaScan(connection.getConfiguration(), rowUpperLimit);
763
764    for (byte[] family : type.getFamilies()) {
765      scan.addFamily(family);
766    }
767    if (startRow != null) {
768      scan.withStartRow(startRow);
769    }
770    if (stopRow != null) {
771      scan.withStopRow(stopRow);
772    }
773    if (filter != null) {
774      scan.setFilter(filter);
775    }
776
777    if (LOG.isTraceEnabled()) {
778      LOG.trace("Scanning META" + " starting at row=" + Bytes.toStringBinary(startRow)
779        + " stopping at row=" + Bytes.toStringBinary(stopRow) + " for max=" + rowUpperLimit
780        + " with caching=" + scan.getCaching());
781    }
782
783    int currentRow = 0;
784    try (Table metaTable = getMetaHTable(connection)) {
785      switch (metaReplicaMode) {
786        case LOAD_BALANCE:
787          int numOfReplicas = metaTable.getDescriptor().getRegionReplication();
788          if (numOfReplicas > 1) {
789            int replicaId = ThreadLocalRandom.current().nextInt(numOfReplicas);
790
791            // When the replicaId is 0, do not set to Consistency.TIMELINE
792            if (replicaId > 0) {
793              scan.setReplicaId(replicaId);
794              scan.setConsistency(Consistency.TIMELINE);
795            }
796          }
797          break;
798        case HEDGED_READ:
799          scan.setConsistency(Consistency.TIMELINE);
800          break;
801        default:
802          // Do nothing
803      }
804      try (ResultScanner scanner = metaTable.getScanner(scan)) {
805        Result data;
806        while ((data = scanner.next()) != null) {
807          if (data.isEmpty()) continue;
808          // Break if visit returns false.
809          if (!visitor.visit(data)) break;
810          if (++currentRow >= rowUpperLimit) break;
811        }
812      }
813    }
814    if (visitor instanceof Closeable) {
815      try {
816        ((Closeable) visitor).close();
817      } catch (Throwable t) {
818        ExceptionUtil.rethrowIfInterrupt(t);
819        LOG.debug("Got exception in closing the meta scanner visitor", t);
820      }
821    }
822  }
823
824  /** Returns Get closest metatable region row to passed <code>row</code> */
825  @NonNull
826  private static RegionInfo getClosestRegionInfo(Connection connection,
827    @NonNull final TableName tableName, @NonNull final byte[] row) throws IOException {
828    byte[] searchRow = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false);
829    Scan scan = getMetaScan(connection.getConfiguration(), 1);
830    scan.setReversed(true);
831    scan.withStartRow(searchRow);
832    try (ResultScanner resultScanner = getMetaHTable(connection).getScanner(scan)) {
833      Result result = resultScanner.next();
834      if (result == null) {
835        throw new TableNotFoundException("Cannot find row in META " + " for table: " + tableName
836          + ", row=" + Bytes.toStringBinary(row));
837      }
838      RegionInfo regionInfo = getRegionInfo(result);
839      if (regionInfo == null) {
840        throw new IOException("RegionInfo was null or empty in Meta for " + tableName + ", row="
841          + Bytes.toStringBinary(row));
842      }
843      return regionInfo;
844    }
845  }
846
847  /**
848   * Returns the column family used for meta columns.
849   * @return HConstants.CATALOG_FAMILY.
850   */
851  public static byte[] getCatalogFamily() {
852    return HConstants.CATALOG_FAMILY;
853  }
854
855  /**
856   * Returns the column family used for table columns.
857   * @return HConstants.TABLE_FAMILY.
858   */
859  private static byte[] getTableFamily() {
860    return HConstants.TABLE_FAMILY;
861  }
862
863  /**
864   * Returns the column qualifier for serialized region info
865   * @return HConstants.REGIONINFO_QUALIFIER
866   */
867  public static byte[] getRegionInfoColumn() {
868    return HConstants.REGIONINFO_QUALIFIER;
869  }
870
871  /**
872   * Returns the column qualifier for serialized table state
873   * @return HConstants.TABLE_STATE_QUALIFIER
874   */
875  private static byte[] getTableStateColumn() {
876    return HConstants.TABLE_STATE_QUALIFIER;
877  }
878
879  /**
880   * Returns the column qualifier for serialized region state
881   * @return HConstants.STATE_QUALIFIER
882   */
883  private static byte[] getRegionStateColumn() {
884    return HConstants.STATE_QUALIFIER;
885  }
886
887  /**
888   * Returns the column qualifier for serialized region state
889   * @param replicaId the replicaId of the region
890   * @return a byte[] for state qualifier
891   */
892  public static byte[] getRegionStateColumn(int replicaId) {
893    return replicaId == 0
894      ? HConstants.STATE_QUALIFIER
895      : Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
896        + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
897  }
898
899  /**
900   * Returns the column qualifier for serialized region state
901   * @param replicaId the replicaId of the region
902   * @return a byte[] for sn column qualifier
903   */
904  public static byte[] getServerNameColumn(int replicaId) {
905    return replicaId == 0
906      ? HConstants.SERVERNAME_QUALIFIER
907      : Bytes.toBytes(HConstants.SERVERNAME_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
908        + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
909  }
910
911  /**
912   * Returns the column qualifier for server column for replicaId
913   * @param replicaId the replicaId of the region
914   * @return a byte[] for server column qualifier
915   */
916  public static byte[] getServerColumn(int replicaId) {
917    return replicaId == 0
918      ? HConstants.SERVER_QUALIFIER
919      : Bytes.toBytes(HConstants.SERVER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
920        + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
921  }
922
923  /**
924   * Returns the column qualifier for server start code column for replicaId
925   * @param replicaId the replicaId of the region
926   * @return a byte[] for server start code column qualifier
927   */
928  public static byte[] getStartCodeColumn(int replicaId) {
929    return replicaId == 0
930      ? HConstants.STARTCODE_QUALIFIER
931      : Bytes.toBytes(HConstants.STARTCODE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
932        + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
933  }
934
935  /**
936   * Returns the column qualifier for seqNum column for replicaId
937   * @param replicaId the replicaId of the region
938   * @return a byte[] for seqNum column qualifier
939   */
940  public static byte[] getSeqNumColumn(int replicaId) {
941    return replicaId == 0
942      ? HConstants.SEQNUM_QUALIFIER
943      : Bytes.toBytes(HConstants.SEQNUM_QUALIFIER_STR + META_REPLICA_ID_DELIMITER
944        + String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
945  }
946
947  /**
948   * Parses the replicaId from the server column qualifier. See top of the class javadoc for the
949   * actual meta layout
950   * @param serverColumn the column qualifier
951   * @return an int for the replicaId
952   */
953  static int parseReplicaIdFromServerColumn(byte[] serverColumn) {
954    String serverStr = Bytes.toString(serverColumn);
955
956    Matcher matcher = SERVER_COLUMN_PATTERN.matcher(serverStr);
957    if (matcher.matches() && matcher.groupCount() > 0) {
958      String group = matcher.group(1);
959      if (group != null && group.length() > 0) {
960        return Integer.parseInt(group.substring(1), 16);
961      } else {
962        return 0;
963      }
964    }
965    return -1;
966  }
967
968  /**
969   * Returns a {@link ServerName} from catalog table {@link Result}.
970   * @param r Result to pull from
971   * @return A ServerName instance or null if necessary fields not found or empty.
972   */
973  @Nullable
974  @InterfaceAudience.Private // for use by HMaster#getTableRegionRow which is used for testing only
975  public static ServerName getServerName(final Result r, final int replicaId) {
976    byte[] serverColumn = getServerColumn(replicaId);
977    Cell cell = r.getColumnLatestCell(getCatalogFamily(), serverColumn);
978    if (cell == null || cell.getValueLength() == 0) return null;
979    String hostAndPort =
980      Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
981    byte[] startcodeColumn = getStartCodeColumn(replicaId);
982    cell = r.getColumnLatestCell(getCatalogFamily(), startcodeColumn);
983    if (cell == null || cell.getValueLength() == 0) return null;
984    try {
985      return ServerName.valueOf(hostAndPort,
986        Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
987    } catch (IllegalArgumentException e) {
988      LOG.error("Ignoring invalid region for server " + hostAndPort + "; cell=" + cell, e);
989      return null;
990    }
991  }
992
993  /**
994   * Returns the {@link ServerName} from catalog table {@link Result} where the region is
995   * transitioning on. It should be the same as {@link MetaTableAccessor#getServerName(Result,int)}
996   * if the server is at OPEN state.
997   * @param r Result to pull the transitioning server name from
998   * @return A ServerName instance or {@link MetaTableAccessor#getServerName(Result,int)} if
999   *         necessary fields not found or empty.
1000   */
1001  @Nullable
1002  public static ServerName getTargetServerName(final Result r, final int replicaId) {
1003    final Cell cell =
1004      r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getServerNameColumn(replicaId));
1005    if (cell == null || cell.getValueLength() == 0) {
1006      RegionLocations locations = MetaTableAccessor.getRegionLocations(r);
1007      if (locations != null) {
1008        HRegionLocation location = locations.getRegionLocation(replicaId);
1009        if (location != null) {
1010          return location.getServerName();
1011        }
1012      }
1013      return null;
1014    }
1015    return ServerName.parseServerName(
1016      Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
1017  }
1018
1019  /**
1020   * The latest seqnum that the server writing to meta observed when opening the region. E.g. the
1021   * seqNum when the result of {@link #getServerName(Result, int)} was written.
1022   * @param r Result to pull the seqNum from
1023   * @return SeqNum, or HConstants.NO_SEQNUM if there's no value written.
1024   */
1025  private static long getSeqNumDuringOpen(final Result r, final int replicaId) {
1026    Cell cell = r.getColumnLatestCell(getCatalogFamily(), getSeqNumColumn(replicaId));
1027    if (cell == null || cell.getValueLength() == 0) return HConstants.NO_SEQNUM;
1028    return Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
1029  }
1030
1031  /**
1032   * Returns the daughter regions by reading the corresponding columns of the catalog table Result.
1033   * @param data a Result object from the catalog table scan
1034   * @return pair of RegionInfo or PairOfSameType(null, null) if region is not a split parent
1035   */
1036  public static PairOfSameType<RegionInfo> getDaughterRegions(Result data) {
1037    RegionInfo splitA = getRegionInfo(data, HConstants.SPLITA_QUALIFIER);
1038    RegionInfo splitB = getRegionInfo(data, HConstants.SPLITB_QUALIFIER);
1039    return new PairOfSameType<>(splitA, splitB);
1040  }
1041
1042  /**
1043   * Returns an HRegionLocationList extracted from the result.
1044   * @return an HRegionLocationList containing all locations for the region range or null if we
1045   *         can't deserialize the result.
1046   */
1047  @Nullable
1048  public static RegionLocations getRegionLocations(final Result r) {
1049    if (r == null) return null;
1050    RegionInfo regionInfo = getRegionInfo(r, getRegionInfoColumn());
1051    if (regionInfo == null) return null;
1052
1053    List<HRegionLocation> locations = new ArrayList<>(1);
1054    NavigableMap<byte[], NavigableMap<byte[], byte[]>> familyMap = r.getNoVersionMap();
1055
1056    locations.add(getRegionLocation(r, regionInfo, 0));
1057
1058    NavigableMap<byte[], byte[]> infoMap = familyMap.get(getCatalogFamily());
1059    if (infoMap == null) return new RegionLocations(locations);
1060
1061    // iterate until all serverName columns are seen
1062    int replicaId = 0;
1063    byte[] serverColumn = getServerColumn(replicaId);
1064    SortedMap<byte[], byte[]> serverMap;
1065    serverMap = infoMap.tailMap(serverColumn, false);
1066
1067    if (serverMap.isEmpty()) return new RegionLocations(locations);
1068
1069    for (Map.Entry<byte[], byte[]> entry : serverMap.entrySet()) {
1070      replicaId = parseReplicaIdFromServerColumn(entry.getKey());
1071      if (replicaId < 0) {
1072        break;
1073      }
1074      HRegionLocation location = getRegionLocation(r, regionInfo, replicaId);
1075      // In case the region replica is newly created, it's location might be null. We usually do not
1076      // have HRL's in RegionLocations object with null ServerName. They are handled as null HRLs.
1077      if (location.getServerName() == null) {
1078        locations.add(null);
1079      } else {
1080        locations.add(location);
1081      }
1082    }
1083
1084    return new RegionLocations(locations);
1085  }
1086
1087  /**
1088   * Returns the HRegionLocation parsed from the given meta row Result for the given regionInfo and
1089   * replicaId. The regionInfo can be the default region info for the replica.
1090   * @param r          the meta row result
1091   * @param regionInfo RegionInfo for default replica
1092   * @param replicaId  the replicaId for the HRegionLocation
1093   * @return HRegionLocation parsed from the given meta row Result for the given replicaId
1094   */
1095  private static HRegionLocation getRegionLocation(final Result r, final RegionInfo regionInfo,
1096    final int replicaId) {
1097    ServerName serverName = getServerName(r, replicaId);
1098    long seqNum = getSeqNumDuringOpen(r, replicaId);
1099    RegionInfo replicaInfo = RegionReplicaUtil.getRegionInfoForReplica(regionInfo, replicaId);
1100    return new HRegionLocation(replicaInfo, serverName, seqNum);
1101  }
1102
1103  /**
1104   * Returns RegionInfo object from the column
1105   * HConstants.CATALOG_FAMILY:HConstants.REGIONINFO_QUALIFIER of the catalog table Result.
1106   * @param data a Result object from the catalog table scan
1107   * @return RegionInfo or null
1108   */
1109  public static RegionInfo getRegionInfo(Result data) {
1110    return getRegionInfo(data, HConstants.REGIONINFO_QUALIFIER);
1111  }
1112
1113  /**
1114   * Returns the RegionInfo object from the column {@link HConstants#CATALOG_FAMILY} and
1115   * <code>qualifier</code> of the catalog table result.
1116   * @param r         a Result object from the catalog table scan
1117   * @param qualifier Column family qualifier
1118   * @return An RegionInfo instance or null.
1119   */
1120  @Nullable
1121  public static RegionInfo getRegionInfo(final Result r, byte[] qualifier) {
1122    Cell cell = r.getColumnLatestCell(getCatalogFamily(), qualifier);
1123    if (cell == null) return null;
1124    return RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(),
1125      cell.getValueLength());
1126  }
1127
1128  /**
1129   * Fetch table state for given table from META table
1130   * @param conn      connection to use
1131   * @param tableName table to fetch state for
1132   */
1133  @Nullable
1134  public static TableState getTableState(Connection conn, TableName tableName) throws IOException {
1135    if (tableName.equals(TableName.META_TABLE_NAME)) {
1136      return new TableState(tableName, TableState.State.ENABLED);
1137    }
1138    Table metaHTable = getMetaHTable(conn);
1139    Get get = new Get(tableName.getName()).addColumn(getTableFamily(), getTableStateColumn());
1140    Result result = metaHTable.get(get);
1141    return getTableState(result);
1142  }
1143
1144  /**
1145   * Fetch table states from META table
1146   * @param conn connection to use
1147   * @return map {tableName -&gt; state}
1148   */
1149  public static Map<TableName, TableState> getTableStates(Connection conn) throws IOException {
1150    final Map<TableName, TableState> states = new LinkedHashMap<>();
1151    Visitor collector = r -> {
1152      TableState state = getTableState(r);
1153      if (state != null) {
1154        states.put(state.getTableName(), state);
1155      }
1156      return true;
1157    };
1158    fullScanTables(conn, collector);
1159    return states;
1160  }
1161
1162  /**
1163   * Updates state in META Do not use. For internal use only.
1164   * @param conn      connection to use
1165   * @param tableName table to look for
1166   */
1167  public static void updateTableState(Connection conn, TableName tableName, TableState.State actual)
1168    throws IOException {
1169    updateTableState(conn, new TableState(tableName, actual));
1170  }
1171
1172  /**
1173   * Decode table state from META Result. Should contain cell from HConstants.TABLE_FAMILY
1174   * @return null if not found
1175   */
1176  @Nullable
1177  public static TableState getTableState(Result r) throws IOException {
1178    Cell cell = r.getColumnLatestCell(getTableFamily(), getTableStateColumn());
1179    if (cell == null) {
1180      return null;
1181    }
1182    try {
1183      return TableState.parseFrom(TableName.valueOf(r.getRow()),
1184        Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(),
1185          cell.getValueOffset() + cell.getValueLength()));
1186    } catch (DeserializationException e) {
1187      throw new IOException(e);
1188    }
1189  }
1190
1191  /**
1192   * Implementations 'visit' a catalog table row.
1193   */
1194  public interface Visitor {
1195    /**
1196     * Visit the catalog table row.
1197     * @param r A row from catalog table
1198     * @return True if we are to proceed scanning the table, else false if we are to stop now.
1199     */
1200    boolean visit(final Result r) throws IOException;
1201  }
1202
1203  /**
1204   * Implementations 'visit' a catalog table row but with close() at the end.
1205   */
1206  public interface CloseableVisitor extends Visitor, Closeable {
1207  }
1208
1209  /**
1210   * A {@link Visitor} that collects content out of passed {@link Result}.
1211   */
1212  static abstract class CollectingVisitor<T> implements Visitor {
1213    final List<T> results = new ArrayList<>();
1214
1215    @Override
1216    public boolean visit(Result r) throws IOException {
1217      if (r != null && !r.isEmpty()) {
1218        add(r);
1219      }
1220      return true;
1221    }
1222
1223    abstract void add(Result r);
1224
1225    /** Returns Collected results; wait till visits complete to collect all possible results */
1226    List<T> getResults() {
1227      return this.results;
1228    }
1229  }
1230
1231  /**
1232   * Collects all returned.
1233   */
1234  static class CollectAllVisitor extends CollectingVisitor<Result> {
1235    @Override
1236    void add(Result r) {
1237      this.results.add(r);
1238    }
1239  }
1240
1241  /**
1242   * A Visitor that skips offline regions and split parents
1243   */
1244  public static abstract class DefaultVisitorBase implements Visitor {
1245
1246    DefaultVisitorBase() {
1247      super();
1248    }
1249
1250    public abstract boolean visitInternal(Result rowResult) throws IOException;
1251
1252    @Override
1253    public boolean visit(Result rowResult) throws IOException {
1254      RegionInfo info = getRegionInfo(rowResult);
1255      if (info == null) {
1256        return true;
1257      }
1258
1259      // skip over offline and split regions
1260      if (!(info.isOffline() || info.isSplit())) {
1261        return visitInternal(rowResult);
1262      }
1263      return true;
1264    }
1265  }
1266
1267  /**
1268   * A Visitor for a table. Provides a consistent view of the table's hbase:meta entries during
1269   * concurrent splits (see HBASE-5986 for details). This class does not guarantee ordered traversal
1270   * of meta entries, and can block until the hbase:meta entries for daughters are available during
1271   * splits.
1272   */
1273  public static abstract class TableVisitorBase extends DefaultVisitorBase {
1274    private TableName tableName;
1275
1276    public TableVisitorBase(TableName tableName) {
1277      super();
1278      this.tableName = tableName;
1279    }
1280
1281    @Override
1282    public final boolean visit(Result rowResult) throws IOException {
1283      RegionInfo info = getRegionInfo(rowResult);
1284      if (info == null) {
1285        return true;
1286      }
1287      if (!info.getTable().equals(tableName)) {
1288        return false;
1289      }
1290      return super.visit(rowResult);
1291    }
1292  }
1293
1294  ////////////////////////
1295  // Editing operations //
1296  ////////////////////////
1297  /**
1298   * Generates and returns a Put containing the region into for the catalog table
1299   */
1300  public static Put makePutFromRegionInfo(RegionInfo regionInfo, long ts) throws IOException {
1301    return addRegionInfo(new Put(regionInfo.getRegionName(), ts), regionInfo);
1302  }
1303
1304  /**
1305   * Generates and returns a Delete containing the region info for the catalog table
1306   */
1307  public static Delete makeDeleteFromRegionInfo(RegionInfo regionInfo, long ts) {
1308    if (regionInfo == null) {
1309      throw new IllegalArgumentException("Can't make a delete for null region");
1310    }
1311    Delete delete = new Delete(regionInfo.getRegionName());
1312    delete.addFamily(getCatalogFamily(), ts);
1313    return delete;
1314  }
1315
1316  /**
1317   * Adds split daughters to the Put
1318   */
1319  private static Put addDaughtersToPut(Put put, RegionInfo splitA, RegionInfo splitB)
1320    throws IOException {
1321    if (splitA != null) {
1322      put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1323        .setFamily(HConstants.CATALOG_FAMILY).setQualifier(HConstants.SPLITA_QUALIFIER)
1324        .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put)
1325        .setValue(RegionInfo.toByteArray(splitA)).build());
1326    }
1327    if (splitB != null) {
1328      put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1329        .setFamily(HConstants.CATALOG_FAMILY).setQualifier(HConstants.SPLITB_QUALIFIER)
1330        .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put)
1331        .setValue(RegionInfo.toByteArray(splitB)).build());
1332    }
1333    return put;
1334  }
1335
1336  /**
1337   * Put the passed <code>p</code> to the <code>hbase:meta</code> table.
1338   * @param connection connection we're using
1339   * @param p          Put to add to hbase:meta
1340   */
1341  private static void putToMetaTable(Connection connection, Put p) throws IOException {
1342    try (Table table = getMetaHTable(connection)) {
1343      put(table, p);
1344    }
1345  }
1346
1347  /**
1348   * @param t Table to use
1349   * @param p put to make
1350   */
1351  private static void put(Table t, Put p) throws IOException {
1352    debugLogMutation(p);
1353    t.put(p);
1354  }
1355
1356  /**
1357   * Put the passed <code>ps</code> to the <code>hbase:meta</code> table.
1358   * @param connection connection we're using
1359   * @param ps         Put to add to hbase:meta
1360   */
1361  public static void putsToMetaTable(final Connection connection, final List<Put> ps)
1362    throws IOException {
1363    if (ps.isEmpty()) {
1364      return;
1365    }
1366    try (Table t = getMetaHTable(connection)) {
1367      debugLogMutations(ps);
1368      // the implementation for putting a single Put is much simpler so here we do a check first.
1369      if (ps.size() == 1) {
1370        t.put(ps.get(0));
1371      } else {
1372        t.put(ps);
1373      }
1374    }
1375  }
1376
1377  /**
1378   * Delete the passed <code>d</code> from the <code>hbase:meta</code> table.
1379   * @param connection connection we're using
1380   * @param d          Delete to add to hbase:meta
1381   */
1382  private static void deleteFromMetaTable(final Connection connection, final Delete d)
1383    throws IOException {
1384    List<Delete> dels = new ArrayList<>(1);
1385    dels.add(d);
1386    deleteFromMetaTable(connection, dels);
1387  }
1388
1389  /**
1390   * Delete the passed <code>deletes</code> from the <code>hbase:meta</code> table.
1391   * @param connection connection we're using
1392   * @param deletes    Deletes to add to hbase:meta This list should support #remove.
1393   */
1394  private static void deleteFromMetaTable(final Connection connection, final List<Delete> deletes)
1395    throws IOException {
1396    try (Table t = getMetaHTable(connection)) {
1397      debugLogMutations(deletes);
1398      t.delete(deletes);
1399    }
1400  }
1401
1402  private static Put addRegionStateToPut(Put put, RegionState.State state) throws IOException {
1403    put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1404      .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getRegionStateColumn())
1405      .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(state.name()))
1406      .build());
1407    return put;
1408  }
1409
1410  /**
1411   * Update state column in hbase:meta.
1412   */
1413  public static void updateRegionState(Connection connection, RegionInfo ri,
1414    RegionState.State state) throws IOException {
1415    Put put = new Put(RegionReplicaUtil.getRegionInfoForDefaultReplica(ri).getRegionName());
1416    MetaTableAccessor.putsToMetaTable(connection,
1417      Collections.singletonList(addRegionStateToPut(put, state)));
1418  }
1419
1420  /**
1421   * Adds daughter region infos to hbase:meta row for the specified region. Note that this does not
1422   * add its daughter's as different rows, but adds information about the daughters in the same row
1423   * as the parent. Use
1424   * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} if
1425   * you want to do that.
1426   * @param connection connection we're using
1427   * @param regionInfo RegionInfo of parent region
1428   * @param splitA     first split daughter of the parent regionInfo
1429   * @param splitB     second split daughter of the parent regionInfo
1430   * @throws IOException if problem connecting or updating meta
1431   */
1432  public static void addSplitsToParent(Connection connection, RegionInfo regionInfo,
1433    RegionInfo splitA, RegionInfo splitB) throws IOException {
1434    try (Table meta = getMetaHTable(connection)) {
1435      Put put = makePutFromRegionInfo(regionInfo, EnvironmentEdgeManager.currentTime());
1436      addDaughtersToPut(put, splitA, splitB);
1437      meta.put(put);
1438      debugLogMutation(put);
1439      LOG.debug("Added region {}", regionInfo.getRegionNameAsString());
1440    }
1441  }
1442
1443  /**
1444   * Adds a (single) hbase:meta row for the specified new region and its daughters. Note that this
1445   * does not add its daughter's as different rows, but adds information about the daughters in the
1446   * same row as the parent. Use
1447   * {@link #splitRegion(Connection, RegionInfo, long, RegionInfo, RegionInfo, ServerName, int)} if
1448   * you want to do that.
1449   * @param connection connection we're using
1450   * @param regionInfo region information
1451   * @throws IOException if problem connecting or updating meta
1452   */
1453  public static void addRegionToMeta(Connection connection, RegionInfo regionInfo)
1454    throws IOException {
1455    addRegionsToMeta(connection, Collections.singletonList(regionInfo), 1);
1456  }
1457
1458  /**
1459   * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions is
1460   * CLOSED.
1461   * @param connection  connection we're using
1462   * @param regionInfos region information list
1463   * @throws IOException if problem connecting or updating meta
1464   */
1465  public static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos,
1466    int regionReplication) throws IOException {
1467    addRegionsToMeta(connection, regionInfos, regionReplication,
1468      EnvironmentEdgeManager.currentTime());
1469  }
1470
1471  /**
1472   * Adds a hbase:meta row for each of the specified new regions. Initial state for new regions is
1473   * CLOSED.
1474   * @param connection  connection we're using
1475   * @param regionInfos region information list
1476   * @param ts          desired timestamp
1477   * @throws IOException if problem connecting or updating meta
1478   */
1479  private static void addRegionsToMeta(Connection connection, List<RegionInfo> regionInfos,
1480    int regionReplication, long ts) throws IOException {
1481    List<Put> puts = new ArrayList<>();
1482    for (RegionInfo regionInfo : regionInfos) {
1483      if (RegionReplicaUtil.isDefaultReplica(regionInfo)) {
1484        Put put = makePutFromRegionInfo(regionInfo, ts);
1485        // New regions are added with initial state of CLOSED.
1486        addRegionStateToPut(put, RegionState.State.CLOSED);
1487        // Add empty locations for region replicas so that number of replicas can be cached
1488        // whenever the primary region is looked up from meta
1489        for (int i = 1; i < regionReplication; i++) {
1490          addEmptyLocation(put, i);
1491        }
1492        puts.add(put);
1493      }
1494    }
1495    putsToMetaTable(connection, puts);
1496    LOG.info("Added {} regions to meta.", puts.size());
1497  }
1498
1499  static Put addMergeRegions(Put put, Collection<RegionInfo> mergeRegions) throws IOException {
1500    int limit = 10000; // Arbitrary limit. No room in our formatted 'task0000' below for more.
1501    int max = mergeRegions.size();
1502    if (max > limit) {
1503      // Should never happen!!!!! But just in case.
1504      throw new RuntimeException(
1505        "Can't merge " + max + " regions in one go; " + limit + " is upper-limit.");
1506    }
1507    int counter = 0;
1508    for (RegionInfo ri : mergeRegions) {
1509      String qualifier = String.format(HConstants.MERGE_QUALIFIER_PREFIX_STR + "%04d", counter++);
1510      put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1511        .setFamily(HConstants.CATALOG_FAMILY).setQualifier(Bytes.toBytes(qualifier))
1512        .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put)
1513        .setValue(RegionInfo.toByteArray(ri)).build());
1514    }
1515    return put;
1516  }
1517
1518  /**
1519   * Merge regions into one in an atomic operation. Deletes the merging regions in hbase:meta and
1520   * adds the merged region.
1521   * @param connection   connection we're using
1522   * @param mergedRegion the merged region
1523   * @param parentSeqNum Parent regions to merge and their next open sequence id used by serial
1524   *                     replication. Set to -1 if not needed by this table.
1525   * @param sn           the location of the region
1526   */
1527  public static void mergeRegions(Connection connection, RegionInfo mergedRegion,
1528    Map<RegionInfo, Long> parentSeqNum, ServerName sn, int regionReplication) throws IOException {
1529    try (Table meta = getMetaHTable(connection)) {
1530      long time = HConstants.LATEST_TIMESTAMP;
1531      List<Mutation> mutations = new ArrayList<>();
1532      List<RegionInfo> replicationParents = new ArrayList<>();
1533      for (Map.Entry<RegionInfo, Long> e : parentSeqNum.entrySet()) {
1534        RegionInfo ri = e.getKey();
1535        long seqNum = e.getValue();
1536        // Deletes for merging regions
1537        mutations.add(makeDeleteFromRegionInfo(ri, time));
1538        if (seqNum > 0) {
1539          mutations.add(makePutForReplicationBarrier(ri, seqNum, time));
1540          replicationParents.add(ri);
1541        }
1542      }
1543      // Put for parent
1544      Put putOfMerged = makePutFromRegionInfo(mergedRegion, time);
1545      putOfMerged = addMergeRegions(putOfMerged, parentSeqNum.keySet());
1546      // Set initial state to CLOSED.
1547      // NOTE: If initial state is not set to CLOSED then merged region gets added with the
1548      // default OFFLINE state. If Master gets restarted after this step, start up sequence of
1549      // master tries to assign this offline region. This is followed by re-assignments of the
1550      // merged region from resumed {@link MergeTableRegionsProcedure}
1551      addRegionStateToPut(putOfMerged, RegionState.State.CLOSED);
1552      mutations.add(putOfMerged);
1553      // The merged is a new region, openSeqNum = 1 is fine. ServerName may be null
1554      // if crash after merge happened but before we got to here.. means in-memory
1555      // locations of offlined merged, now-closed, regions is lost. Should be ok. We
1556      // assign the merged region later.
1557      if (sn != null) {
1558        addLocation(putOfMerged, sn, 1, mergedRegion.getReplicaId());
1559      }
1560
1561      // Add empty locations for region replicas of the merged region so that number of replicas
1562      // can be cached whenever the primary region is looked up from meta
1563      for (int i = 1; i < regionReplication; i++) {
1564        addEmptyLocation(putOfMerged, i);
1565      }
1566      // add parent reference for serial replication
1567      if (!replicationParents.isEmpty()) {
1568        addReplicationParent(putOfMerged, replicationParents);
1569      }
1570      byte[] tableRow = Bytes.toBytes(mergedRegion.getRegionNameAsString() + HConstants.DELIMITER);
1571      multiMutate(meta, tableRow, mutations);
1572    }
1573  }
1574
1575  /**
1576   * Splits the region into two in an atomic operation. Offlines the parent region with the
1577   * information that it is split into two, and also adds the daughter regions. Does not add the
1578   * location information to the daughter regions since they are not open yet.
1579   * @param connection       connection we're using
1580   * @param parent           the parent region which is split
1581   * @param parentOpenSeqNum the next open sequence id for parent region, used by serial
1582   *                         replication. -1 if not necessary.
1583   * @param splitA           Split daughter region A
1584   * @param splitB           Split daughter region B
1585   * @param sn               the location of the region
1586   */
1587  public static void splitRegion(Connection connection, RegionInfo parent, long parentOpenSeqNum,
1588    RegionInfo splitA, RegionInfo splitB, ServerName sn, int regionReplication) throws IOException {
1589    try (Table meta = getMetaHTable(connection)) {
1590      long time = EnvironmentEdgeManager.currentTime();
1591      // Put for parent
1592      Put putParent = makePutFromRegionInfo(
1593        RegionInfoBuilder.newBuilder(parent).setOffline(true).setSplit(true).build(), time);
1594      addDaughtersToPut(putParent, splitA, splitB);
1595
1596      // Puts for daughters
1597      Put putA = makePutFromRegionInfo(splitA, time);
1598      Put putB = makePutFromRegionInfo(splitB, time);
1599      if (parentOpenSeqNum > 0) {
1600        addReplicationBarrier(putParent, parentOpenSeqNum);
1601        addReplicationParent(putA, Collections.singletonList(parent));
1602        addReplicationParent(putB, Collections.singletonList(parent));
1603      }
1604      // Set initial state to CLOSED
1605      // NOTE: If initial state is not set to CLOSED then daughter regions get added with the
1606      // default OFFLINE state. If Master gets restarted after this step, start up sequence of
1607      // master tries to assign these offline regions. This is followed by re-assignments of the
1608      // daughter regions from resumed {@link SplitTableRegionProcedure}
1609      addRegionStateToPut(putA, RegionState.State.CLOSED);
1610      addRegionStateToPut(putB, RegionState.State.CLOSED);
1611
1612      addSequenceNum(putA, 1, splitA.getReplicaId()); // new regions, openSeqNum = 1 is fine.
1613      addSequenceNum(putB, 1, splitB.getReplicaId());
1614
1615      // Add empty locations for region replicas of daughters so that number of replicas can be
1616      // cached whenever the primary region is looked up from meta
1617      for (int i = 1; i < regionReplication; i++) {
1618        addEmptyLocation(putA, i);
1619        addEmptyLocation(putB, i);
1620      }
1621
1622      byte[] tableRow = Bytes.toBytes(parent.getRegionNameAsString() + HConstants.DELIMITER);
1623      multiMutate(meta, tableRow, putParent, putA, putB);
1624    }
1625  }
1626
1627  /**
1628   * Update state of the table in meta.
1629   * @param connection what we use for update
1630   * @param state      new state
1631   */
1632  private static void updateTableState(Connection connection, TableState state) throws IOException {
1633    Put put = makePutFromTableState(state, EnvironmentEdgeManager.currentTime());
1634    putToMetaTable(connection, put);
1635    LOG.info("Updated {} in hbase:meta", state);
1636  }
1637
1638  /**
1639   * Construct PUT for given state
1640   * @param state new state
1641   */
1642  public static Put makePutFromTableState(TableState state, long ts) {
1643    Put put = new Put(state.getTableName().getName(), ts);
1644    put.addColumn(getTableFamily(), getTableStateColumn(), state.convert().toByteArray());
1645    return put;
1646  }
1647
1648  /**
1649   * Remove state for table from meta
1650   * @param connection to use for deletion
1651   * @param table      to delete state for
1652   */
1653  public static void deleteTableState(Connection connection, TableName table) throws IOException {
1654    long time = EnvironmentEdgeManager.currentTime();
1655    Delete delete = new Delete(table.getName());
1656    delete.addColumns(getTableFamily(), getTableStateColumn(), time);
1657    deleteFromMetaTable(connection, delete);
1658    LOG.info("Deleted table " + table + " state from META");
1659  }
1660
1661  private static void multiMutate(Table table, byte[] row, Mutation... mutations)
1662    throws IOException {
1663    multiMutate(table, row, Arrays.asList(mutations));
1664  }
1665
1666  /**
1667   * Performs an atomic multi-mutate operation against the given table. Used by the likes of merge
1668   * and split as these want to make atomic mutations across multiple rows.
1669   * @throws IOException even if we encounter a RuntimeException, we'll still wrap it in an IOE.
1670   */
1671  static void multiMutate(final Table table, byte[] row, final List<Mutation> mutations)
1672    throws IOException {
1673    debugLogMutations(mutations);
1674    Batch.Call<MultiRowMutationService, MutateRowsResponse> callable = instance -> {
1675      MutateRowsRequest.Builder builder = MutateRowsRequest.newBuilder();
1676      for (Mutation mutation : mutations) {
1677        if (mutation instanceof Put) {
1678          builder.addMutationRequest(
1679            ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.PUT, mutation));
1680        } else if (mutation instanceof Delete) {
1681          builder.addMutationRequest(
1682            ProtobufUtil.toMutation(ClientProtos.MutationProto.MutationType.DELETE, mutation));
1683        } else {
1684          throw new DoNotRetryIOException(
1685            "multi in MetaEditor doesn't support " + mutation.getClass().getName());
1686        }
1687      }
1688      ServerRpcController controller = new ServerRpcController();
1689      CoprocessorRpcUtils.BlockingRpcCallback<MutateRowsResponse> rpcCallback =
1690        new CoprocessorRpcUtils.BlockingRpcCallback<>();
1691      instance.mutateRows(controller, builder.build(), rpcCallback);
1692      MutateRowsResponse resp = rpcCallback.get();
1693      if (controller.failedOnException()) {
1694        throw controller.getFailedOn();
1695      }
1696      return resp;
1697    };
1698    try {
1699      table.coprocessorService(MultiRowMutationService.class, row, row, callable);
1700    } catch (Throwable e) {
1701      // Throw if an IOE else wrap in an IOE EVEN IF IT IS a RuntimeException (e.g.
1702      // a RejectedExecutionException because the hosting exception is shutting down.
1703      // This is old behavior worth reexamining. Procedures doing merge or split
1704      // currently don't handle RuntimeExceptions coming up out of meta table edits.
1705      // Would have to work on this at least. See HBASE-23904.
1706      Throwables.throwIfInstanceOf(e, IOException.class);
1707      throw new IOException(e);
1708    }
1709  }
1710
1711  /**
1712   * Updates the location of the specified region in hbase:meta to be the specified server hostname
1713   * and startcode.
1714   * <p>
1715   * Uses passed catalog tracker to get a connection to the server hosting hbase:meta and makes
1716   * edits to that region.
1717   * @param connection       connection we're using
1718   * @param regionInfo       region to update location of
1719   * @param openSeqNum       the latest sequence number obtained when the region was open
1720   * @param sn               Server name
1721   * @param masterSystemTime wall clock time from master if passed in the open region RPC
1722   */
1723  public static void updateRegionLocation(Connection connection, RegionInfo regionInfo,
1724    ServerName sn, long openSeqNum, long masterSystemTime) throws IOException {
1725    updateLocation(connection, regionInfo, sn, openSeqNum, masterSystemTime);
1726  }
1727
1728  /**
1729   * Updates the location of the specified region to be the specified server.
1730   * <p>
1731   * Connects to the specified server which should be hosting the specified catalog region name to
1732   * perform the edit.
1733   * @param connection       connection we're using
1734   * @param regionInfo       region to update location of
1735   * @param sn               Server name
1736   * @param openSeqNum       the latest sequence number obtained when the region was open
1737   * @param masterSystemTime wall clock time from master if passed in the open region RPC
1738   * @throws IOException In particular could throw {@link java.net.ConnectException} if the server
1739   *                     is down on other end.
1740   */
1741  private static void updateLocation(Connection connection, RegionInfo regionInfo, ServerName sn,
1742    long openSeqNum, long masterSystemTime) throws IOException {
1743    // region replicas are kept in the primary region's row
1744    Put put = new Put(getMetaKeyForRegion(regionInfo), masterSystemTime);
1745    addRegionInfo(put, regionInfo);
1746    addLocation(put, sn, openSeqNum, regionInfo.getReplicaId());
1747    putToMetaTable(connection, put);
1748    LOG.info("Updated row {} with server=", regionInfo.getRegionNameAsString(), sn);
1749  }
1750
1751  /**
1752   * Deletes the specified region from META.
1753   * @param connection connection we're using
1754   * @param regionInfo region to be deleted from META
1755   */
1756  public static void deleteRegionInfo(Connection connection, RegionInfo regionInfo)
1757    throws IOException {
1758    Delete delete = new Delete(regionInfo.getRegionName());
1759    delete.addFamily(getCatalogFamily(), HConstants.LATEST_TIMESTAMP);
1760    deleteFromMetaTable(connection, delete);
1761    LOG.info("Deleted " + regionInfo.getRegionNameAsString());
1762  }
1763
1764  /**
1765   * Deletes the specified regions from META.
1766   * @param connection  connection we're using
1767   * @param regionsInfo list of regions to be deleted from META
1768   */
1769  public static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo)
1770    throws IOException {
1771    deleteRegionInfos(connection, regionsInfo, EnvironmentEdgeManager.currentTime());
1772  }
1773
1774  /**
1775   * Deletes the specified regions from META.
1776   * @param connection  connection we're using
1777   * @param regionsInfo list of regions to be deleted from META
1778   */
1779  private static void deleteRegionInfos(Connection connection, List<RegionInfo> regionsInfo,
1780    long ts) throws IOException {
1781    List<Delete> deletes = new ArrayList<>(regionsInfo.size());
1782    for (RegionInfo hri : regionsInfo) {
1783      Delete e = new Delete(hri.getRegionName());
1784      e.addFamily(getCatalogFamily(), ts);
1785      deletes.add(e);
1786    }
1787    deleteFromMetaTable(connection, deletes);
1788    LOG.info("Deleted {} regions from META", regionsInfo.size());
1789    LOG.debug("Deleted regions: {}", regionsInfo);
1790  }
1791
1792  /**
1793   * Overwrites the specified regions from hbase:meta. Deletes old rows for the given regions and
1794   * adds new ones. Regions added back have state CLOSED.
1795   * @param connection  connection we're using
1796   * @param regionInfos list of regions to be added to META
1797   */
1798  public static void overwriteRegions(Connection connection, List<RegionInfo> regionInfos,
1799    int regionReplication) throws IOException {
1800    // use master time for delete marker and the Put
1801    long now = EnvironmentEdgeManager.currentTime();
1802    deleteRegionInfos(connection, regionInfos, now);
1803    // Why sleep? This is the easiest way to ensure that the previous deletes does not
1804    // eclipse the following puts, that might happen in the same ts from the server.
1805    // See HBASE-9906, and HBASE-9879. Once either HBASE-9879, HBASE-8770 is fixed,
1806    // or HBASE-9905 is fixed and meta uses seqIds, we do not need the sleep.
1807    //
1808    // HBASE-13875 uses master timestamp for the mutations. The 20ms sleep is not needed
1809    addRegionsToMeta(connection, regionInfos, regionReplication, now + 1);
1810    LOG.info("Overwritten " + regionInfos.size() + " regions to Meta");
1811    LOG.debug("Overwritten regions: {} ", regionInfos);
1812  }
1813
1814  /**
1815   * Deletes merge qualifiers for the specified merge region.
1816   * @param connection  connection we're using
1817   * @param mergeRegion the merged region
1818   */
1819  public static void deleteMergeQualifiers(Connection connection, final RegionInfo mergeRegion)
1820    throws IOException {
1821    Delete delete = new Delete(mergeRegion.getRegionName());
1822    // NOTE: We are doing a new hbase:meta read here.
1823    Cell[] cells = getRegionResult(connection, mergeRegion.getRegionName()).rawCells();
1824    if (cells == null || cells.length == 0) {
1825      return;
1826    }
1827    List<byte[]> qualifiers = new ArrayList<>();
1828    for (Cell cell : cells) {
1829      if (!isMergeQualifierPrefix(cell)) {
1830        continue;
1831      }
1832      byte[] qualifier = CellUtil.cloneQualifier(cell);
1833      qualifiers.add(qualifier);
1834      delete.addColumns(getCatalogFamily(), qualifier, HConstants.LATEST_TIMESTAMP);
1835    }
1836
1837    // There will be race condition that a GCMultipleMergedRegionsProcedure is scheduled while
1838    // the previous GCMultipleMergedRegionsProcedure is still going on, in this case, the second
1839    // GCMultipleMergedRegionsProcedure could delete the merged region by accident!
1840    if (qualifiers.isEmpty()) {
1841      LOG.info("No merged qualifiers for region " + mergeRegion.getRegionNameAsString()
1842        + " in meta table, they are cleaned up already, Skip.");
1843      return;
1844    }
1845
1846    deleteFromMetaTable(connection, delete);
1847    LOG.info(
1848      "Deleted merge references in " + mergeRegion.getRegionNameAsString() + ", deleted qualifiers "
1849        + qualifiers.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", ")));
1850  }
1851
1852  public static Put addRegionInfo(final Put p, final RegionInfo hri) throws IOException {
1853    p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow())
1854      .setFamily(getCatalogFamily()).setQualifier(HConstants.REGIONINFO_QUALIFIER)
1855      .setTimestamp(p.getTimestamp()).setType(Cell.Type.Put)
1856      // Serialize the Default Replica HRI otherwise scan of hbase:meta
1857      // shows an info:regioninfo value with encoded name and region
1858      // name that differs from that of the hbase;meta row.
1859      .setValue(RegionInfo.toByteArray(RegionReplicaUtil.getRegionInfoForDefaultReplica(hri)))
1860      .build());
1861    return p;
1862  }
1863
1864  public static Put addLocation(Put p, ServerName sn, long openSeqNum, int replicaId)
1865    throws IOException {
1866    CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY);
1867    return p
1868      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1869        .setQualifier(getServerColumn(replicaId)).setTimestamp(p.getTimestamp())
1870        .setType(Cell.Type.Put).setValue(Bytes.toBytes(sn.getAddress().toString())).build())
1871      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1872        .setQualifier(getStartCodeColumn(replicaId)).setTimestamp(p.getTimestamp())
1873        .setType(Cell.Type.Put).setValue(Bytes.toBytes(sn.getStartcode())).build())
1874      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1875        .setQualifier(getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp())
1876        .setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum)).build());
1877  }
1878
1879  private static void writeRegionName(ByteArrayOutputStream out, byte[] regionName) {
1880    for (byte b : regionName) {
1881      if (b == ESCAPE_BYTE) {
1882        out.write(ESCAPE_BYTE);
1883      }
1884      out.write(b);
1885    }
1886  }
1887
1888  public static byte[] getParentsBytes(List<RegionInfo> parents) {
1889    ByteArrayOutputStream bos = new ByteArrayOutputStream();
1890    Iterator<RegionInfo> iter = parents.iterator();
1891    writeRegionName(bos, iter.next().getRegionName());
1892    while (iter.hasNext()) {
1893      bos.write(ESCAPE_BYTE);
1894      bos.write(SEPARATED_BYTE);
1895      writeRegionName(bos, iter.next().getRegionName());
1896    }
1897    return bos.toByteArray();
1898  }
1899
1900  private static List<byte[]> parseParentsBytes(byte[] bytes) {
1901    List<byte[]> parents = new ArrayList<>();
1902    ByteArrayOutputStream bos = new ByteArrayOutputStream();
1903    for (int i = 0; i < bytes.length; i++) {
1904      if (bytes[i] == ESCAPE_BYTE) {
1905        i++;
1906        if (bytes[i] == SEPARATED_BYTE) {
1907          parents.add(bos.toByteArray());
1908          bos.reset();
1909          continue;
1910        }
1911        // fall through to append the byte
1912      }
1913      bos.write(bytes[i]);
1914    }
1915    if (bos.size() > 0) {
1916      parents.add(bos.toByteArray());
1917    }
1918    return parents;
1919  }
1920
1921  private static void addReplicationParent(Put put, List<RegionInfo> parents) throws IOException {
1922    byte[] value = getParentsBytes(parents);
1923    put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1924      .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(REPLICATION_PARENT_QUALIFIER)
1925      .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(value).build());
1926  }
1927
1928  public static Put makePutForReplicationBarrier(RegionInfo regionInfo, long openSeqNum, long ts)
1929    throws IOException {
1930    Put put = new Put(regionInfo.getRegionName(), ts);
1931    addReplicationBarrier(put, openSeqNum);
1932    return put;
1933  }
1934
1935  /**
1936   * See class comment on SerialReplicationChecker
1937   */
1938  public static void addReplicationBarrier(Put put, long openSeqNum) throws IOException {
1939    put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
1940      .setFamily(HConstants.REPLICATION_BARRIER_FAMILY).setQualifier(HConstants.SEQNUM_QUALIFIER)
1941      .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum))
1942      .build());
1943  }
1944
1945  public static Put addEmptyLocation(Put p, int replicaId) throws IOException {
1946    CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY);
1947    return p
1948      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1949        .setQualifier(getServerColumn(replicaId)).setTimestamp(p.getTimestamp())
1950        .setType(Cell.Type.Put).build())
1951      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1952        .setQualifier(getStartCodeColumn(replicaId)).setTimestamp(p.getTimestamp())
1953        .setType(Cell.Type.Put).build())
1954      .add(builder.clear().setRow(p.getRow()).setFamily(getCatalogFamily())
1955        .setQualifier(getSeqNumColumn(replicaId)).setTimestamp(p.getTimestamp())
1956        .setType(Cell.Type.Put).build());
1957  }
1958
1959  public static final class ReplicationBarrierResult {
1960    private final long[] barriers;
1961    private final RegionState.State state;
1962    private final List<byte[]> parentRegionNames;
1963
1964    ReplicationBarrierResult(long[] barriers, State state, List<byte[]> parentRegionNames) {
1965      this.barriers = barriers;
1966      this.state = state;
1967      this.parentRegionNames = parentRegionNames;
1968    }
1969
1970    public long[] getBarriers() {
1971      return barriers;
1972    }
1973
1974    public RegionState.State getState() {
1975      return state;
1976    }
1977
1978    public List<byte[]> getParentRegionNames() {
1979      return parentRegionNames;
1980    }
1981
1982    @Override
1983    public String toString() {
1984      return "ReplicationBarrierResult [barriers=" + Arrays.toString(barriers) + ", state=" + state
1985        + ", parentRegionNames="
1986        + parentRegionNames.stream().map(Bytes::toStringBinary).collect(Collectors.joining(", "))
1987        + "]";
1988    }
1989  }
1990
1991  private static long getReplicationBarrier(Cell c) {
1992    return Bytes.toLong(c.getValueArray(), c.getValueOffset(), c.getValueLength());
1993  }
1994
1995  public static long[] getReplicationBarriers(Result result) {
1996    return result.getColumnCells(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER)
1997      .stream().mapToLong(MetaTableAccessor::getReplicationBarrier).sorted().distinct().toArray();
1998  }
1999
2000  private static ReplicationBarrierResult getReplicationBarrierResult(Result result) {
2001    long[] barriers = getReplicationBarriers(result);
2002    byte[] stateBytes = result.getValue(getCatalogFamily(), getRegionStateColumn());
2003    RegionState.State state =
2004      stateBytes != null ? RegionState.State.valueOf(Bytes.toString(stateBytes)) : null;
2005    byte[] parentRegionsBytes =
2006      result.getValue(HConstants.REPLICATION_BARRIER_FAMILY, REPLICATION_PARENT_QUALIFIER);
2007    List<byte[]> parentRegionNames =
2008      parentRegionsBytes != null ? parseParentsBytes(parentRegionsBytes) : Collections.emptyList();
2009    return new ReplicationBarrierResult(barriers, state, parentRegionNames);
2010  }
2011
2012  public static ReplicationBarrierResult getReplicationBarrierResult(Connection conn,
2013    TableName tableName, byte[] row, byte[] encodedRegionName) throws IOException {
2014    byte[] metaStartKey = RegionInfo.createRegionName(tableName, row, HConstants.NINES, false);
2015    byte[] metaStopKey =
2016      RegionInfo.createRegionName(tableName, HConstants.EMPTY_START_ROW, "", false);
2017    Scan scan = new Scan().withStartRow(metaStartKey).withStopRow(metaStopKey)
2018      .addColumn(getCatalogFamily(), getRegionStateColumn())
2019      .addFamily(HConstants.REPLICATION_BARRIER_FAMILY).readAllVersions().setReversed(true)
2020      .setCaching(10);
2021    try (Table table = getMetaHTable(conn); ResultScanner scanner = table.getScanner(scan)) {
2022      for (Result result;;) {
2023        result = scanner.next();
2024        if (result == null) {
2025          return new ReplicationBarrierResult(new long[0], null, Collections.emptyList());
2026        }
2027        byte[] regionName = result.getRow();
2028        // TODO: we may look up a region which has already been split or merged so we need to check
2029        // whether the encoded name matches. Need to find a way to quit earlier when there is no
2030        // record for the given region, for now it will scan to the end of the table.
2031        if (
2032          !Bytes.equals(encodedRegionName, Bytes.toBytes(RegionInfo.encodeRegionName(regionName)))
2033        ) {
2034          continue;
2035        }
2036        return getReplicationBarrierResult(result);
2037      }
2038    }
2039  }
2040
2041  public static long[] getReplicationBarrier(Connection conn, byte[] regionName)
2042    throws IOException {
2043    try (Table table = getMetaHTable(conn)) {
2044      Result result = table.get(new Get(regionName)
2045        .addColumn(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER)
2046        .readAllVersions());
2047      return getReplicationBarriers(result);
2048    }
2049  }
2050
2051  public static List<Pair<String, Long>> getTableEncodedRegionNameAndLastBarrier(Connection conn,
2052    TableName tableName) throws IOException {
2053    List<Pair<String, Long>> list = new ArrayList<>();
2054    scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION),
2055      getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION, r -> {
2056        byte[] value =
2057          r.getValue(HConstants.REPLICATION_BARRIER_FAMILY, HConstants.SEQNUM_QUALIFIER);
2058        if (value == null) {
2059          return true;
2060        }
2061        long lastBarrier = Bytes.toLong(value);
2062        String encodedRegionName = RegionInfo.encodeRegionName(r.getRow());
2063        list.add(Pair.newPair(encodedRegionName, lastBarrier));
2064        return true;
2065      });
2066    return list;
2067  }
2068
2069  public static List<String> getTableEncodedRegionNamesForSerialReplication(Connection conn,
2070    TableName tableName) throws IOException {
2071    List<String> list = new ArrayList<>();
2072    scanMeta(conn, getTableStartRowForMeta(tableName, QueryType.REPLICATION),
2073      getTableStopRowForMeta(tableName, QueryType.REPLICATION), QueryType.REPLICATION,
2074      new FirstKeyOnlyFilter(), Integer.MAX_VALUE, r -> {
2075        list.add(RegionInfo.encodeRegionName(r.getRow()));
2076        return true;
2077      }, CatalogReplicaMode.NONE);
2078    return list;
2079  }
2080
2081  private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException {
2082    if (!METALOG.isDebugEnabled()) {
2083      return;
2084    }
2085    // Logging each mutation in separate line makes it easier to see diff between them visually
2086    // because of common starting indentation.
2087    for (Mutation mutation : mutations) {
2088      debugLogMutation(mutation);
2089    }
2090  }
2091
2092  private static void debugLogMutation(Mutation p) throws IOException {
2093    METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON());
2094  }
2095
2096  private static Put addSequenceNum(Put p, long openSeqNum, int replicaId) throws IOException {
2097    return p.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(p.getRow())
2098      .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getSeqNumColumn(replicaId))
2099      .setTimestamp(p.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(openSeqNum))
2100      .build());
2101  }
2102}