001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.assignment;
019
020import java.io.IOException;
021import java.util.ArrayList;
022import java.util.Collections;
023import java.util.List;
024import java.util.SortedMap;
025import java.util.TreeMap;
026import org.apache.hadoop.hbase.Cell;
027import org.apache.hadoop.hbase.CellBuilderFactory;
028import org.apache.hadoop.hbase.CellBuilderType;
029import org.apache.hadoop.hbase.HConstants;
030import org.apache.hadoop.hbase.HRegionLocation;
031import org.apache.hadoop.hbase.MetaTableAccessor;
032import org.apache.hadoop.hbase.RegionLocations;
033import org.apache.hadoop.hbase.ServerName;
034import org.apache.hadoop.hbase.TableName;
035import org.apache.hadoop.hbase.client.Delete;
036import org.apache.hadoop.hbase.client.Mutation;
037import org.apache.hadoop.hbase.client.Put;
038import org.apache.hadoop.hbase.client.RegionInfo;
039import org.apache.hadoop.hbase.client.Result;
040import org.apache.hadoop.hbase.client.ResultScanner;
041import org.apache.hadoop.hbase.client.Scan;
042import org.apache.hadoop.hbase.client.Table;
043import org.apache.hadoop.hbase.client.TableDescriptor;
044import org.apache.hadoop.hbase.master.MasterFileSystem;
045import org.apache.hadoop.hbase.master.MasterServices;
046import org.apache.hadoop.hbase.master.RegionState.State;
047import org.apache.hadoop.hbase.procedure2.Procedure;
048import org.apache.hadoop.hbase.procedure2.util.StringUtils;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
051import org.apache.hadoop.hbase.wal.WALSplitUtil;
052import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
053import org.apache.hadoop.hbase.zookeeper.ZKUtil;
054import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
055import org.apache.yetus.audience.InterfaceAudience;
056import org.apache.zookeeper.KeeperException;
057import org.slf4j.Logger;
058import org.slf4j.LoggerFactory;
059
060import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
061
062/**
063 * Store Region State to hbase:meta table.
064 */
065@InterfaceAudience.Private
066public class RegionStateStore {
067  private static final Logger LOG = LoggerFactory.getLogger(RegionStateStore.class);
068  private static final Logger METALOG = LoggerFactory.getLogger("org.apache.hadoop.hbase.META");
069
070  /** The delimiter for meta columns for replicaIds > 0 */
071  protected static final char META_REPLICA_ID_DELIMITER = '_';
072
073  private final MasterServices master;
074
075  public RegionStateStore(final MasterServices master) {
076    this.master = master;
077  }
078
079  public interface RegionStateVisitor {
080    void visitRegionState(Result result, RegionInfo regionInfo, State state,
081      ServerName regionLocation, ServerName lastHost, long openSeqNum);
082  }
083
084  public void visitMeta(final RegionStateVisitor visitor) throws IOException {
085    MetaTableAccessor.fullScanRegions(master.getConnection(), new MetaTableAccessor.Visitor() {
086      final boolean isDebugEnabled = LOG.isDebugEnabled();
087
088      @Override
089      public boolean visit(final Result r) throws IOException {
090        if (r !=  null && !r.isEmpty()) {
091          long st = 0;
092          if (LOG.isTraceEnabled()) {
093            st = System.currentTimeMillis();
094          }
095          visitMetaEntry(visitor, r);
096          if (LOG.isTraceEnabled()) {
097            long et = System.currentTimeMillis();
098            LOG.trace("[T] LOAD META PERF " + StringUtils.humanTimeDiff(et - st));
099          }
100        } else if (isDebugEnabled) {
101          LOG.debug("NULL result from meta - ignoring but this is strange.");
102        }
103        return true;
104      }
105    });
106  }
107
108  /**
109   * Queries META table for the passed region encoded name, delegating action upon results to the
110   * <code>RegionStateVisitor</code> passed as second parameter.
111   * @param regionEncodedName encoded name for the Region we want to query META for.
112   * @param visitor The <code>RegionStateVisitor</code> instance to react over the query results.
113   * @throws IOException If some error occurs while querying META or parsing results.
114   */
115  public void visitMetaForRegion(final String regionEncodedName, final RegionStateVisitor visitor)
116    throws IOException {
117    Result result =
118      MetaTableAccessor.scanByRegionEncodedName(master.getConnection(), regionEncodedName);
119    if (result != null) {
120      visitMetaEntry(visitor, result);
121    }
122  }
123
124  private void visitMetaEntry(final RegionStateVisitor visitor, final Result result)
125      throws IOException {
126    final RegionLocations rl = MetaTableAccessor.getRegionLocations(result);
127    if (rl == null) return;
128
129    final HRegionLocation[] locations = rl.getRegionLocations();
130    if (locations == null) return;
131
132    for (int i = 0; i < locations.length; ++i) {
133      final HRegionLocation hrl = locations[i];
134      if (hrl == null) continue;
135
136      final RegionInfo regionInfo = hrl.getRegion();
137      if (regionInfo == null) continue;
138
139      final int replicaId = regionInfo.getReplicaId();
140      final State state = getRegionState(result, regionInfo);
141
142      final ServerName lastHost = hrl.getServerName();
143      ServerName regionLocation = MetaTableAccessor.getTargetServerName(result, replicaId);
144      final long openSeqNum = hrl.getSeqNum();
145
146      // TODO: move under trace, now is visible for debugging
147      LOG.info(
148        "Load hbase:meta entry region={}, regionState={}, lastHost={}, " +
149          "regionLocation={}, openSeqNum={}",
150        regionInfo.getEncodedName(), state, lastHost, regionLocation, openSeqNum);
151      visitor.visitRegionState(result, regionInfo, state, regionLocation, lastHost, openSeqNum);
152    }
153  }
154
155  void updateRegionLocation(RegionStateNode regionStateNode) throws IOException {
156    if (regionStateNode.getRegionInfo().isMetaRegion()) {
157      updateMetaLocation(regionStateNode.getRegionInfo(), regionStateNode.getRegionLocation(),
158        regionStateNode.getState());
159    } else {
160      long openSeqNum = regionStateNode.getState() == State.OPEN ? regionStateNode.getOpenSeqNum() :
161        HConstants.NO_SEQNUM;
162      updateUserRegionLocation(regionStateNode.getRegionInfo(), regionStateNode.getState(),
163        regionStateNode.getRegionLocation(), openSeqNum,
164        // The regionStateNode may have no procedure in a test scenario; allow for this.
165        regionStateNode.getProcedure() != null ? regionStateNode.getProcedure().getProcId() :
166          Procedure.NO_PROC_ID);
167    }
168  }
169
170  private void updateMetaLocation(RegionInfo regionInfo, ServerName serverName, State state)
171    throws IOException {
172    try {
173      MetaTableLocator.setMetaLocation(master.getZooKeeper(), serverName, regionInfo.getReplicaId(),
174        state);
175    } catch (KeeperException e) {
176      throw new IOException(e);
177    }
178  }
179
180  private void updateUserRegionLocation(RegionInfo regionInfo, State state,
181    ServerName regionLocation, long openSeqNum, long pid) throws IOException {
182    long time = EnvironmentEdgeManager.currentTime();
183    final int replicaId = regionInfo.getReplicaId();
184    final Put put = new Put(MetaTableAccessor.getMetaKeyForRegion(regionInfo), time);
185    MetaTableAccessor.addRegionInfo(put, regionInfo);
186    final StringBuilder info =
187      new StringBuilder("pid=").append(pid).append(" updating hbase:meta row=")
188        .append(regionInfo.getEncodedName()).append(", regionState=").append(state);
189    if (openSeqNum >= 0) {
190      Preconditions.checkArgument(state == State.OPEN && regionLocation != null,
191        "Open region should be on a server");
192      MetaTableAccessor.addLocation(put, regionLocation, openSeqNum, replicaId);
193      // only update replication barrier for default replica
194      if (regionInfo.getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID &&
195        hasGlobalReplicationScope(regionInfo.getTable())) {
196        MetaTableAccessor.addReplicationBarrier(put, openSeqNum);
197        info.append(", repBarrier=").append(openSeqNum);
198      }
199      info.append(", openSeqNum=").append(openSeqNum);
200      info.append(", regionLocation=").append(regionLocation);
201    } else if (regionLocation != null) {
202      // Ideally, if no regionLocation, write null to the hbase:meta but this will confuse clients
203      // currently; they want a server to hit. TODO: Make clients wait if no location.
204      put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
205        .setFamily(HConstants.CATALOG_FAMILY)
206        .setQualifier(MetaTableAccessor.getServerNameColumn(replicaId))
207        .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put)
208        .setValue(Bytes.toBytes(regionLocation.getServerName())).build());
209      info.append(", regionLocation=").append(regionLocation);
210    }
211    put.add(CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY).setRow(put.getRow())
212      .setFamily(HConstants.CATALOG_FAMILY).setQualifier(getStateColumn(replicaId))
213      .setTimestamp(put.getTimestamp()).setType(Cell.Type.Put).setValue(Bytes.toBytes(state.name()))
214      .build());
215    LOG.info(info.toString());
216    updateRegionLocation(regionInfo, state, put);
217  }
218
219  private void updateRegionLocation(RegionInfo regionInfo, State state, Put put)
220      throws IOException {
221    try (Table table = getMetaTable()) {
222      debugLogMutation(put);
223      table.put(put);
224    } catch (IOException e) {
225      // TODO: Revist!!!! Means that if a server is loaded, then we will abort our host!
226      // In tests we abort the Master!
227      String msg = String.format("FAILED persisting region=%s state=%s",
228        regionInfo.getShortNameToLog(), state);
229      LOG.error(msg, e);
230      master.abort(msg, e);
231      throw e;
232    }
233  }
234
235  private long getOpenSeqNumForParentRegion(RegionInfo region) throws IOException {
236    MasterFileSystem fs = master.getMasterFileSystem();
237    long maxSeqId = WALSplitUtil.getMaxRegionSequenceId(master.getConfiguration(), region,
238      fs::getFileSystem, fs::getWALFileSystem);
239    return maxSeqId > 0 ? maxSeqId + 1 : HConstants.NO_SEQNUM;
240  }
241
242  private Table getMetaTable() throws IOException {
243    return master.getConnection().getTable(TableName.META_TABLE_NAME);
244  }
245
246  // ============================================================================================
247  // Update Region Splitting State helpers
248  // ============================================================================================
249  public void splitRegion(RegionInfo parent, RegionInfo hriA, RegionInfo hriB,
250      ServerName serverName) throws IOException {
251    TableDescriptor htd = getTableDescriptor(parent.getTable());
252    long parentOpenSeqNum = HConstants.NO_SEQNUM;
253    if (htd.hasGlobalReplicationScope()) {
254      parentOpenSeqNum = getOpenSeqNumForParentRegion(parent);
255    }
256    MetaTableAccessor.splitRegion(master.getConnection(), parent, parentOpenSeqNum, hriA, hriB,
257      serverName, getRegionReplication(htd));
258  }
259
260  // ============================================================================================
261  // Update Region Merging State helpers
262  // ============================================================================================
263  public void mergeRegions(RegionInfo child, RegionInfo [] parents, ServerName serverName)
264      throws IOException {
265    TableDescriptor htd = getTableDescriptor(child.getTable());
266    boolean globalScope = htd.hasGlobalReplicationScope();
267    SortedMap<RegionInfo, Long> parentSeqNums = new TreeMap<>();
268    for (RegionInfo ri: parents) {
269      parentSeqNums.put(ri, globalScope? getOpenSeqNumForParentRegion(ri): -1);
270    }
271    MetaTableAccessor.mergeRegions(master.getConnection(), child, parentSeqNums,
272        serverName, getRegionReplication(htd));
273  }
274
275  // ============================================================================================
276  // Delete Region State helpers
277  // ============================================================================================
278  public void deleteRegion(final RegionInfo regionInfo) throws IOException {
279    deleteRegions(Collections.singletonList(regionInfo));
280  }
281
282  public void deleteRegions(final List<RegionInfo> regions) throws IOException {
283    MetaTableAccessor.deleteRegionInfos(master.getConnection(), regions);
284  }
285
286  private Scan getScanForUpdateRegionReplicas(TableName tableName) {
287    return MetaTableAccessor.getScanForTableName(master.getConfiguration(), tableName)
288      .addColumn(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
289  }
290
291  public void removeRegionReplicas(TableName tableName, int oldReplicaCount, int newReplicaCount)
292    throws IOException {
293    if (TableName.isMetaTableName(tableName)) {
294      ZKWatcher zk = master.getZooKeeper();
295      try {
296        for (int i = newReplicaCount; i < oldReplicaCount; i++) {
297          ZKUtil.deleteNode(zk, zk.getZNodePaths().getZNodeForReplica(i));
298        }
299      } catch (KeeperException e) {
300        throw new IOException(e);
301      }
302    } else {
303      Scan scan = getScanForUpdateRegionReplicas(tableName);
304      List<Delete> deletes = new ArrayList<>();
305      long now = EnvironmentEdgeManager.currentTime();
306      try (Table metaTable = getMetaTable(); ResultScanner scanner = metaTable.getScanner(scan)) {
307        for (;;) {
308          Result result = scanner.next();
309          if (result == null) {
310            break;
311          }
312          RegionInfo primaryRegionInfo = MetaTableAccessor.getRegionInfo(result);
313          if (primaryRegionInfo == null || primaryRegionInfo.isSplitParent()) {
314            continue;
315          }
316          Delete delete = new Delete(result.getRow());
317          for (int i = newReplicaCount; i < oldReplicaCount; i++) {
318            delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerColumn(i),
319              now);
320            delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getSeqNumColumn(i),
321              now);
322            delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getStartCodeColumn(i),
323              now);
324            delete.addColumns(HConstants.CATALOG_FAMILY, MetaTableAccessor.getServerNameColumn(i),
325              now);
326            delete.addColumns(HConstants.CATALOG_FAMILY,
327              MetaTableAccessor.getRegionStateColumn(i), now);
328          }
329          deletes.add(delete);
330        }
331        debugLogMutations(deletes);
332        metaTable.delete(deletes);
333      }
334    }
335  }
336
337  // ==========================================================================
338  // Table Descriptors helpers
339  // ==========================================================================
340  private boolean hasGlobalReplicationScope(TableName tableName) throws IOException {
341    return hasGlobalReplicationScope(getTableDescriptor(tableName));
342  }
343
344  private boolean hasGlobalReplicationScope(TableDescriptor htd) {
345    return htd != null ? htd.hasGlobalReplicationScope() : false;
346  }
347
348  private int getRegionReplication(TableDescriptor htd) {
349    return htd != null ? htd.getRegionReplication() : 1;
350  }
351
352  private TableDescriptor getTableDescriptor(TableName tableName) throws IOException {
353    return master.getTableDescriptors().get(tableName);
354  }
355
356  // ==========================================================================
357  // Region State
358  // ==========================================================================
359
360  /**
361   * Pull the region state from a catalog table {@link Result}.
362   * @return the region state, or null if unknown.
363   */
364  public static State getRegionState(final Result r, RegionInfo regionInfo) {
365    Cell cell =
366      r.getColumnLatestCell(HConstants.CATALOG_FAMILY, getStateColumn(regionInfo.getReplicaId()));
367    if (cell == null || cell.getValueLength() == 0) {
368      return null;
369    }
370
371    String state =
372      Bytes.toString(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength());
373    try {
374      return State.valueOf(state);
375    } catch (IllegalArgumentException e) {
376      LOG.warn(
377        "BAD value {} in hbase:meta info:state column for region {} , " +
378          "Consider using HBCK2 setRegionState ENCODED_REGION_NAME STATE",
379        state, regionInfo.getEncodedName());
380      return null;
381    }
382  }
383
384  private static byte[] getStateColumn(int replicaId) {
385    return replicaId == 0 ? HConstants.STATE_QUALIFIER :
386      Bytes.toBytes(HConstants.STATE_QUALIFIER_STR + META_REPLICA_ID_DELIMITER +
387        String.format(RegionInfo.REPLICA_ID_FORMAT, replicaId));
388  }
389
390  private static void debugLogMutations(List<? extends Mutation> mutations) throws IOException {
391    if (!METALOG.isDebugEnabled()) {
392      return;
393    }
394    // Logging each mutation in separate line makes it easier to see diff between them visually
395    // because of common starting indentation.
396    for (Mutation mutation : mutations) {
397      debugLogMutation(mutation);
398    }
399  }
400
401  private static void debugLogMutation(Mutation p) throws IOException {
402    METALOG.debug("{} {}", p.getClass().getSimpleName(), p.toJSON());
403  }
404}