001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import edu.umd.cs.findbugs.annotations.Nullable;
021import java.io.IOException;
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.LinkedHashMap;
025import java.util.List;
026import java.util.Map;
027import java.util.NavigableMap;
028import java.util.SortedMap;
029import java.util.regex.Matcher;
030import java.util.regex.Pattern;
031import org.apache.hadoop.hbase.client.RegionInfo;
032import org.apache.hadoop.hbase.client.RegionInfoBuilder;
033import org.apache.hadoop.hbase.client.RegionReplicaUtil;
034import org.apache.hadoop.hbase.client.Result;
035import org.apache.hadoop.hbase.client.TableState;
036import org.apache.hadoop.hbase.exceptions.DeserializationException;
037import org.apache.hadoop.hbase.util.Bytes;
038import org.apache.yetus.audience.InterfaceAudience;
039import org.slf4j.Logger;
040import org.slf4j.LoggerFactory;
041
042import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
043
044/**
045 * Helper class for generating/parsing
046 * {@value org.apache.hadoop.hbase.HConstants#CATALOG_FAMILY_STR} family cells in meta table.
047 * <p/>
048 * The cells in catalog family are:
049 *
050 * <pre>
051 * For each table range ('Region'), there is a single row, formatted as:
052 * &lt;tableName&gt;,&lt;startKey&gt;,&lt;regionId&gt;,&lt;encodedRegionName&gt;.
053 * This row is the serialized regionName of the default region replica.
054 * Columns are:
055 * info:regioninfo         => contains serialized HRI for the default region replica
056 * info:server             => contains hostname:port (in string form) for the server hosting
057 *                            the default regionInfo replica
058 * info:server_&lt;replicaId&gt => contains hostname:port (in string form) for the server hosting
059 *                                 the regionInfo replica with replicaId
060 * info:serverstartcode    => contains server start code (in binary long form) for the server
061 *                            hosting the default regionInfo replica
062 * info:serverstartcode_&lt;replicaId&gt => contains server start code (in binary long form) for
063 *                                          the server hosting the regionInfo replica with
064 *                                          replicaId
065 * info:seqnumDuringOpen   => contains seqNum (in binary long form) for the region at the time
066 *                            the server opened the region with default replicaId
067 * info:seqnumDuringOpen_&lt;replicaId&gt => contains seqNum (in binary long form) for the region
068 *                                           at the time the server opened the region with
069 *                                           replicaId
070 * info:splitA             => contains a serialized HRI for the first daughter region if the
071 *                            region is split
072 * info:splitB             => contains a serialized HRI for the second daughter region if the
073 *                            region is split
074 * info:merge*             => contains a serialized HRI for a merge parent region. There will be two
075 *                            or more of these columns in a row. A row that has these columns is
076 *                            undergoing a merge and is the result of the merge. Columns listed
077 *                            in marge* columns are the parents of this merged region. Example
078 *                            columns: info:merge0001, info:merge0002. You make also see 'mergeA',
079 *                            and 'mergeB'. This is old form replaced by the new format that allows
080 *                            for more than two parents to be merged at a time.
081 * </pre>
082 */
083@InterfaceAudience.Private
084public class CatalogFamilyFormat {
085
086  private static final Logger LOG = LoggerFactory.getLogger(CatalogFamilyFormat.class);
087
088  /** A regex for parsing server columns from meta. See above javadoc for meta layout */
089  private static final Pattern SERVER_COLUMN_PATTERN =
090    Pattern.compile("^server(_[0-9a-fA-F]{4})?$");
091
092  /**
093   * Returns an HRI parsed from this regionName. Not all the fields of the HRI is stored in the
094   * name, so the returned object should only be used for the fields in the regionName.
095   * <p/>
096   * Since the returned object does not contain all the fields, we do not expose this method in
097   * public API, such as {@link RegionInfo} or {@link RegionInfoBuilder}.
098   */
099  public static RegionInfo parseRegionInfoFromRegionName(byte[] regionName) throws IOException {
100    byte[][] fields = RegionInfo.parseRegionName(regionName);
101    long regionId = Long.parseLong(Bytes.toString(fields[2]));
102    int replicaId = fields.length > 3 ? Integer.parseInt(Bytes.toString(fields[3]), 16) : 0;
103    return RegionInfoBuilder.newBuilder(TableName.valueOf(fields[0])).setStartKey(fields[1])
104      .setRegionId(regionId).setReplicaId(replicaId).build();
105  }
106
107  /**
108   * Returns the RegionInfo object from the column {@link HConstants#CATALOG_FAMILY} and
109   * <code>qualifier</code> of the catalog table result.
110   * @param r a Result object from the catalog table scan
111   * @param qualifier Column family qualifier
112   * @return An RegionInfo instance or null.
113   */
114  @Nullable
115  public static RegionInfo getRegionInfo(final Result r, byte[] qualifier) {
116    Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, qualifier);
117    if (cell == null) {
118      return null;
119    }
120    return RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(),
121      cell.getValueLength());
122  }
123
124  /**
125   * Returns RegionInfo object from the column
126   * HConstants.CATALOG_FAMILY:HConstants.REGIONINFO_QUALIFIER of the catalog table Result.
127   * @param data a Result object from the catalog table scan
128   * @return RegionInfo or null
129   */
130  public static RegionInfo getRegionInfo(Result data) {
131    return getRegionInfo(data, HConstants.REGIONINFO_QUALIFIER);
132  }
133
134  /**
135   * Returns the HRegionLocation parsed from the given meta row Result for the given regionInfo and
136   * replicaId. The regionInfo can be the default region info for the replica.
137   * @param r the meta row result
138   * @param regionInfo RegionInfo for default replica
139   * @param replicaId the replicaId for the HRegionLocation
140   * @return HRegionLocation parsed from the given meta row Result for the given replicaId
141   */
142  public static HRegionLocation getRegionLocation(final Result r, final RegionInfo regionInfo,
143    final int replicaId) {
144    ServerName serverName = getServerName(r, replicaId);
145    long seqNum = getSeqNumDuringOpen(r, replicaId);
146    RegionInfo replicaInfo = RegionReplicaUtil.getRegionInfoForReplica(regionInfo, replicaId);
147    return new HRegionLocation(replicaInfo, serverName, seqNum);
148  }
149
150  /**
151   * Returns an HRegionLocationList extracted from the result.
152   * @return an HRegionLocationList containing all locations for the region range or null if we
153   *         can't deserialize the result.
154   */
155  @Nullable
156  public static RegionLocations getRegionLocations(final Result r) {
157    if (r == null) {
158      return null;
159    }
160    RegionInfo regionInfo = getRegionInfo(r, HConstants.REGIONINFO_QUALIFIER);
161    if (regionInfo == null) {
162      return null;
163    }
164
165    List<HRegionLocation> locations = new ArrayList<>(1);
166    NavigableMap<byte[], NavigableMap<byte[], byte[]>> familyMap = r.getNoVersionMap();
167
168    locations.add(getRegionLocation(r, regionInfo, 0));
169
170    NavigableMap<byte[], byte[]> infoMap = familyMap.get(HConstants.CATALOG_FAMILY);
171    if (infoMap == null) {
172      return new RegionLocations(locations);
173    }
174
175    // iterate until all serverName columns are seen
176    int replicaId = 0;
177    byte[] serverColumn = getServerColumn(replicaId);
178    SortedMap<byte[], byte[]> serverMap;
179    serverMap = infoMap.tailMap(serverColumn, false);
180
181    if (serverMap.isEmpty()) {
182      return new RegionLocations(locations);
183    }
184
185    for (Map.Entry<byte[], byte[]> entry : serverMap.entrySet()) {
186      replicaId = parseReplicaIdFromServerColumn(entry.getKey());
187      if (replicaId < 0) {
188        break;
189      }
190      HRegionLocation location = getRegionLocation(r, regionInfo, replicaId);
191      // In case the region replica is newly created, it's location might be null. We usually do not
192      // have HRL's in RegionLocations object with null ServerName. They are handled as null HRLs.
193      if (location.getServerName() == null) {
194        locations.add(null);
195      } else {
196        locations.add(location);
197      }
198    }
199
200    return new RegionLocations(locations);
201  }
202
203  /**
204   * Returns a {@link ServerName} from catalog table {@link Result}.
205   * @param r Result to pull from
206   * @return A ServerName instance or null if necessary fields not found or empty.
207   */
208  @Nullable
209  public static ServerName getServerName(Result r, int replicaId) {
210    byte[] serverColumn = getServerColumn(replicaId);
211    Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, serverColumn);
212    if (cell == null || cell.getValueLength() == 0) {
213      return null;
214    }
215    String hostAndPort =
216      Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
217    byte[] startcodeColumn = getStartCodeColumn(replicaId);
218    cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, startcodeColumn);
219    if (cell == null || cell.getValueLength() == 0) {
220      return null;
221    }
222    try {
223      return ServerName.valueOf(hostAndPort,
224        Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength()));
225    } catch (IllegalArgumentException e) {
226      LOG.error("Ignoring invalid region for server " + hostAndPort + "; cell=" + cell, e);
227      return null;
228    }
229  }
230
231  /**
232   * Returns the column qualifier for server column for replicaId
233   * @param replicaId the replicaId of the region
234   * @return a byte[] for server column qualifier
235   */
236  public static byte[] getServerColumn(int replicaId) {
237    return replicaId == 0 ? HConstants.SERVER_QUALIFIER :
238      Bytes.toBytes(HConstants.SERVER_QUALIFIER_STR + META_REPLICA_ID_DELIMITER +
239        String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
240  }
241
242  /**
243   * Returns the column qualifier for server start code column for replicaId
244   * @param replicaId the replicaId of the region
245   * @return a byte[] for server start code column qualifier
246   */
247  public static byte[] getStartCodeColumn(int replicaId) {
248    return replicaId == 0 ? HConstants.STARTCODE_QUALIFIER :
249      Bytes.toBytes(HConstants.STARTCODE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER +
250        String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
251  }
252
253  /**
254   * The latest seqnum that the server writing to meta observed when opening the region. E.g. the
255   * seqNum when the result of {@link getServerName} was written.
256   * @param r Result to pull the seqNum from
257   * @return SeqNum, or HConstants.NO_SEQNUM if there's no value written.
258   */
259  private static long getSeqNumDuringOpen(final Result r, final int replicaId) {
260    Cell cell = r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getSeqNumColumn(replicaId));
261    if (cell == null || cell.getValueLength() == 0) {
262      return HConstants.NO_SEQNUM;
263    }
264    return Bytes.toLong(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
265  }
266
267  /**
268   * Returns the column qualifier for seqNum column for replicaId
269   * @param replicaId the replicaId of the region
270   * @return a byte[] for seqNum column qualifier
271   */
272  public static byte[] getSeqNumColumn(int replicaId) {
273    return replicaId == 0 ? HConstants.SEQNUM_QUALIFIER :
274      Bytes.toBytes(HConstants.SEQNUM_QUALIFIER_STR + META_REPLICA_ID_DELIMITER +
275        String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
276  }
277
278  /** The delimiter for meta columns for replicaIds &gt; 0 */
279  @VisibleForTesting
280  static final char META_REPLICA_ID_DELIMITER = '_';
281
282  /**
283   * Parses the replicaId from the server column qualifier. See top of the class javadoc for the
284   * actual meta layout
285   * @param serverColumn the column qualifier
286   * @return an int for the replicaId
287   */
288  @VisibleForTesting
289  static int parseReplicaIdFromServerColumn(byte[] serverColumn) {
290    String serverStr = Bytes.toString(serverColumn);
291
292    Matcher matcher = SERVER_COLUMN_PATTERN.matcher(serverStr);
293    if (matcher.matches() && matcher.groupCount() > 0) {
294      String group = matcher.group(1);
295      if (group != null && group.length() > 0) {
296        return Integer.parseInt(group.substring(1), 16);
297      } else {
298        return 0;
299      }
300    }
301    return -1;
302  }
303
304  /** Returns the row key to use for this regionInfo */
305  public static byte[] getMetaKeyForRegion(RegionInfo regionInfo) {
306    return RegionReplicaUtil.getRegionInfoForDefaultReplica(regionInfo).getRegionName();
307  }
308
309  /**
310   * Returns the column qualifier for serialized region state
311   * @param replicaId the replicaId of the region
312   * @return a byte[] for state qualifier
313   */
314  public static byte[] getRegionStateColumn(int replicaId) {
315    return replicaId == 0 ? HConstants.STATE_QUALIFIER :
316      Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER +
317        String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
318  }
319
320  /**
321   * Returns the column qualifier for serialized region state
322   * @param replicaId the replicaId of the region
323   * @return a byte[] for sn column qualifier
324   */
325  public static byte[] getServerNameColumn(int replicaId) {
326    return replicaId == 0 ? HConstants.SERVERNAME_QUALIFIER :
327      Bytes.toBytes(HConstants.SERVERNAME_QUALIFIER_STR + META_REPLICA_ID_DELIMITER +
328        String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
329  }
330
331  /**
332   * Decode table state from META Result. Should contain cell from HConstants.TABLE_FAMILY
333   * @return null if not found
334   */
335  @Nullable
336  public static TableState getTableState(Result r) throws IOException {
337    Cell cell = r.getColumnLatestCell(HConstants.TABLE_FAMILY, HConstants.TABLE_STATE_QUALIFIER);
338    if (cell == null) {
339      return null;
340    }
341    try {
342      return TableState.parseFrom(TableName.valueOf(r.getRow()),
343        Arrays.copyOfRange(cell.getValueArray(), cell.getValueOffset(),
344          cell.getValueOffset() + cell.getValueLength()));
345    } catch (DeserializationException e) {
346      throw new IOException(e);
347    }
348  }
349
350  /**
351   * @return Deserialized values of &lt;qualifier,regioninfo&gt; pairs taken from column values that
352   *         match the regex 'info:merge.*' in array of <code>cells</code>.
353   */
354  @Nullable
355  public static Map<String, RegionInfo> getMergeRegionsWithName(Cell[] cells) {
356    if (cells == null) {
357      return null;
358    }
359    Map<String, RegionInfo> regionsToMerge = null;
360    for (Cell cell : cells) {
361      if (!isMergeQualifierPrefix(cell)) {
362        continue;
363      }
364      // Ok. This cell is that of a info:merge* column.
365      RegionInfo ri = RegionInfo.parseFromOrNull(cell.getValueArray(), cell.getValueOffset(),
366        cell.getValueLength());
367      if (ri != null) {
368        if (regionsToMerge == null) {
369          regionsToMerge = new LinkedHashMap<>();
370        }
371        regionsToMerge.put(Bytes.toString(CellUtil.cloneQualifier(cell)), ri);
372      }
373    }
374    return regionsToMerge;
375  }
376
377  /**
378   * @return Deserialized regioninfo values taken from column values that match the regex
379   *         'info:merge.*' in array of <code>cells</code>.
380   */
381  @Nullable
382  public static List<RegionInfo> getMergeRegions(Cell[] cells) {
383    Map<String, RegionInfo> mergeRegionsWithName = getMergeRegionsWithName(cells);
384    return (mergeRegionsWithName == null) ? null : new ArrayList<>(mergeRegionsWithName.values());
385  }
386
387  /**
388   * @return True if any merge regions present in <code>cells</code>; i.e. the column in
389   *         <code>cell</code> matches the regex 'info:merge.*'.
390   */
391  public static boolean hasMergeRegions(Cell[] cells) {
392    for (Cell cell : cells) {
393      if (isMergeQualifierPrefix(cell)) {
394        return true;
395      }
396    }
397    return false;
398  }
399
400  /**
401   * @return True if the column in <code>cell</code> matches the regex 'info:merge.*'.
402   */
403  public static boolean isMergeQualifierPrefix(Cell cell) {
404    // Check to see if has family and that qualifier starts with the merge qualifier 'merge'
405    return CellUtil.matchingFamily(cell, HConstants.CATALOG_FAMILY) &&
406      PrivateCellUtil.qualifierStartsWith(cell, HConstants.MERGE_QUALIFIER_PREFIX);
407  }
408}