001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.client;
019
020import static java.util.stream.Collectors.toList;
021import static org.apache.hadoop.hbase.HConstants.EMPTY_END_ROW;
022import static org.apache.hadoop.hbase.HConstants.EMPTY_START_ROW;
023import static org.apache.hadoop.hbase.util.FutureUtils.addListener;
024
025import java.io.IOException;
026import java.lang.reflect.UndeclaredThrowableException;
027import java.net.InetAddress;
028import java.net.UnknownHostException;
029import java.util.Arrays;
030import java.util.List;
031import java.util.concurrent.CompletableFuture;
032import java.util.concurrent.ExecutorService;
033import java.util.concurrent.ThreadLocalRandom;
034import java.util.concurrent.TimeUnit;
035import java.util.function.Function;
036import org.apache.hadoop.conf.Configuration;
037import org.apache.hadoop.hbase.Cell;
038import org.apache.hadoop.hbase.CellComparator;
039import org.apache.hadoop.hbase.HConstants;
040import org.apache.hadoop.hbase.PrivateCellUtil;
041import org.apache.hadoop.hbase.RegionLocations;
042import org.apache.hadoop.hbase.ServerName;
043import org.apache.hadoop.hbase.TableName;
044import org.apache.hadoop.hbase.client.metrics.ScanMetrics;
045import org.apache.hadoop.hbase.ipc.HBaseRpcController;
046import org.apache.hadoop.hbase.security.User;
047import org.apache.hadoop.hbase.security.UserProvider;
048import org.apache.hadoop.hbase.util.Bytes;
049import org.apache.hadoop.hbase.util.ReflectionUtils;
050import org.apache.hadoop.ipc.RemoteException;
051import org.apache.hadoop.net.DNS;
052import org.apache.yetus.audience.InterfaceAudience;
053import org.slf4j.Logger;
054import org.slf4j.LoggerFactory;
055
056import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
057import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
058import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
059import org.apache.hbase.thirdparty.io.netty.util.Timer;
060
061import org.apache.hadoop.hbase.shaded.protobuf.ResponseConverter;
062import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService;
063import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ClientService;
064import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ScanResponse;
065import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterService;
066
067/**
068 * Utility used by client connections.
069 */
070@InterfaceAudience.Private
071public final class ConnectionUtils {
072
073  private static final Logger LOG = LoggerFactory.getLogger(ConnectionUtils.class);
074
075  private ConnectionUtils() {
076  }
077
078  /**
079   * Calculate pause time. Built on {@link HConstants#RETRY_BACKOFF}.
080   * @param pause time to pause
081   * @param tries amount of tries
082   * @return How long to wait after <code>tries</code> retries
083   */
084  public static long getPauseTime(final long pause, final int tries) {
085    int ntries = tries;
086    if (ntries >= HConstants.RETRY_BACKOFF.length) {
087      ntries = HConstants.RETRY_BACKOFF.length - 1;
088    }
089    if (ntries < 0) {
090      ntries = 0;
091    }
092
093    long normalPause = pause * HConstants.RETRY_BACKOFF[ntries];
094    // 1% possible jitter
095    long jitter = (long) (normalPause * ThreadLocalRandom.current().nextFloat() * 0.01f);
096    return normalPause + jitter;
097  }
098
099  /**
100   * @param conn The connection for which to replace the generator.
101   * @param cnm Replaces the nonce generator used, for testing.
102   * @return old nonce generator.
103   */
104  public static NonceGenerator injectNonceGeneratorForTesting(ClusterConnection conn,
105      NonceGenerator cnm) {
106    return ConnectionImplementation.injectNonceGeneratorForTesting(conn, cnm);
107  }
108
109  /**
110   * Changes the configuration to set the number of retries needed when using Connection internally,
111   * e.g. for updating catalog tables, etc. Call this method before we create any Connections.
112   * @param c The Configuration instance to set the retries into.
113   * @param log Used to log what we set in here.
114   */
115  public static void setServerSideHConnectionRetriesConfig(final Configuration c, final String sn,
116      final Logger log) {
117    // TODO: Fix this. Not all connections from server side should have 10 times the retries.
118    int hcRetries = c.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER,
119      HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER);
120    // Go big. Multiply by 10. If we can't get to meta after this many retries
121    // then something seriously wrong.
122    int serversideMultiplier = c.getInt(HConstants.HBASE_CLIENT_SERVERSIDE_RETRIES_MULTIPLIER,
123      HConstants.DEFAULT_HBASE_CLIENT_SERVERSIDE_RETRIES_MULTIPLIER);
124    int retries = hcRetries * serversideMultiplier;
125    c.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, retries);
126    log.info(sn + " server-side Connection retries=" + retries);
127  }
128
129  /**
130   * A ClusterConnection that will short-circuit RPC making direct invocations against the localhost
131   * if the invocation target is 'this' server; save on network and protobuf invocations.
132   */
133  // TODO This has to still do PB marshalling/unmarshalling stuff. Check how/whether we can avoid.
134  @VisibleForTesting // Class is visible so can assert we are short-circuiting when expected.
135  public static class ShortCircuitingClusterConnection extends ConnectionImplementation {
136    private final ServerName serverName;
137    private final AdminService.BlockingInterface localHostAdmin;
138    private final ClientService.BlockingInterface localHostClient;
139
140    private ShortCircuitingClusterConnection(Configuration conf, ExecutorService pool, User user,
141        ServerName serverName, AdminService.BlockingInterface admin,
142        ClientService.BlockingInterface client) throws IOException {
143      super(conf, pool, user);
144      this.serverName = serverName;
145      this.localHostAdmin = admin;
146      this.localHostClient = client;
147    }
148
149    @Override
150    public AdminService.BlockingInterface getAdmin(ServerName sn) throws IOException {
151      return serverName.equals(sn) ? this.localHostAdmin : super.getAdmin(sn);
152    }
153
154    @Override
155    public ClientService.BlockingInterface getClient(ServerName sn) throws IOException {
156      return serverName.equals(sn) ? this.localHostClient : super.getClient(sn);
157    }
158
159    @Override
160    public MasterKeepAliveConnection getMaster() throws IOException {
161      if (this.localHostClient instanceof MasterService.BlockingInterface) {
162        return new ShortCircuitMasterConnection(
163          (MasterService.BlockingInterface) this.localHostClient);
164      }
165      return super.getMaster();
166    }
167  }
168
169  /**
170   * Creates a short-circuit connection that can bypass the RPC layer (serialization,
171   * deserialization, networking, etc..) when talking to a local server.
172   * @param conf the current configuration
173   * @param pool the thread pool to use for batch operations
174   * @param user the user the connection is for
175   * @param serverName the local server name
176   * @param admin the admin interface of the local server
177   * @param client the client interface of the local server
178   * @return an short-circuit connection.
179   * @throws IOException if IO failure occurred
180   */
181  public static ClusterConnection createShortCircuitConnection(final Configuration conf,
182      ExecutorService pool, User user, final ServerName serverName,
183      final AdminService.BlockingInterface admin, final ClientService.BlockingInterface client)
184      throws IOException {
185    if (user == null) {
186      user = UserProvider.instantiate(conf).getCurrent();
187    }
188    return new ShortCircuitingClusterConnection(conf, pool, user, serverName, admin, client);
189  }
190
191  /**
192   * Setup the connection class, so that it will not depend on master being online. Used for testing
193   * @param conf configuration to set
194   */
195  @VisibleForTesting
196  public static void setupMasterlessConnection(Configuration conf) {
197    conf.set(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL, MasterlessConnection.class.getName());
198  }
199
200  /**
201   * Some tests shut down the master. But table availability is a master RPC which is performed on
202   * region re-lookups.
203   */
204  static class MasterlessConnection extends ConnectionImplementation {
205    MasterlessConnection(Configuration conf, ExecutorService pool, User user) throws IOException {
206      super(conf, pool, user);
207    }
208
209    @Override
210    public boolean isTableDisabled(TableName tableName) throws IOException {
211      // treat all tables as enabled
212      return false;
213    }
214  }
215
216  /**
217   * Return retires + 1. The returned value will be in range [1, Integer.MAX_VALUE].
218   */
219  static int retries2Attempts(int retries) {
220    return Math.max(1, retries == Integer.MAX_VALUE ? Integer.MAX_VALUE : retries + 1);
221  }
222
223  /**
224   * Get a unique key for the rpc stub to the given server.
225   */
226  static String getStubKey(String serviceName, ServerName serverName, boolean hostnameCanChange) {
227    // Sometimes, servers go down and they come back up with the same hostname but a different
228    // IP address. Force a resolution of the rsHostname by trying to instantiate an
229    // InetSocketAddress, and this way we will rightfully get a new stubKey.
230    // Also, include the hostname in the key so as to take care of those cases where the
231    // DNS name is different but IP address remains the same.
232    String hostname = serverName.getHostname();
233    int port = serverName.getPort();
234    if (hostnameCanChange) {
235      try {
236        InetAddress ip = InetAddress.getByName(hostname);
237        return serviceName + "@" + hostname + "-" + ip.getHostAddress() + ":" + port;
238      } catch (UnknownHostException e) {
239        LOG.warn("Can not resolve " + hostname + ", please check your network", e);
240      }
241    }
242    return serviceName + "@" + hostname + ":" + port;
243  }
244
245  static void checkHasFamilies(Mutation mutation) {
246    Preconditions.checkArgument(mutation.numFamilies() > 0,
247      "Invalid arguments to %s, zero columns specified", mutation.toString());
248  }
249
250  /** Dummy nonce generator for disabled nonces. */
251  static final NonceGenerator NO_NONCE_GENERATOR = new NonceGenerator() {
252
253    @Override
254    public long newNonce() {
255      return HConstants.NO_NONCE;
256    }
257
258    @Override
259    public long getNonceGroup() {
260      return HConstants.NO_NONCE;
261    }
262  };
263
264  // A byte array in which all elements are the max byte, and it is used to
265  // construct closest front row
266  static final byte[] MAX_BYTE_ARRAY = Bytes.createMaxByteArray(9);
267
268  /**
269   * Create the closest row after the specified row
270   */
271  static byte[] createClosestRowAfter(byte[] row) {
272    return Arrays.copyOf(row, row.length + 1);
273  }
274
275  /**
276   * Create a row before the specified row and very close to the specified row.
277   */
278  static byte[] createCloseRowBefore(byte[] row) {
279    if (row.length == 0) {
280      return MAX_BYTE_ARRAY;
281    }
282    if (row[row.length - 1] == 0) {
283      return Arrays.copyOf(row, row.length - 1);
284    } else {
285      byte[] nextRow = new byte[row.length + MAX_BYTE_ARRAY.length];
286      System.arraycopy(row, 0, nextRow, 0, row.length - 1);
287      nextRow[row.length - 1] = (byte) ((row[row.length - 1] & 0xFF) - 1);
288      System.arraycopy(MAX_BYTE_ARRAY, 0, nextRow, row.length, MAX_BYTE_ARRAY.length);
289      return nextRow;
290    }
291  }
292
293  static boolean isEmptyStartRow(byte[] row) {
294    return Bytes.equals(row, EMPTY_START_ROW);
295  }
296
297  static boolean isEmptyStopRow(byte[] row) {
298    return Bytes.equals(row, EMPTY_END_ROW);
299  }
300
301  static void resetController(HBaseRpcController controller, long timeoutNs) {
302    controller.reset();
303    if (timeoutNs >= 0) {
304      controller.setCallTimeout(
305        (int) Math.min(Integer.MAX_VALUE, TimeUnit.NANOSECONDS.toMillis(timeoutNs)));
306    }
307  }
308
309  static Throwable translateException(Throwable t) {
310    if (t instanceof UndeclaredThrowableException && t.getCause() != null) {
311      t = t.getCause();
312    }
313    if (t instanceof RemoteException) {
314      t = ((RemoteException) t).unwrapRemoteException();
315    }
316    if (t instanceof ServiceException && t.getCause() != null) {
317      t = translateException(t.getCause());
318    }
319    return t;
320  }
321
322  static long calcEstimatedSize(Result rs) {
323    long estimatedHeapSizeOfResult = 0;
324    // We don't make Iterator here
325    for (Cell cell : rs.rawCells()) {
326      estimatedHeapSizeOfResult += PrivateCellUtil.estimatedSizeOfCell(cell);
327    }
328    return estimatedHeapSizeOfResult;
329  }
330
331  static Result filterCells(Result result, Cell keepCellsAfter) {
332    if (keepCellsAfter == null) {
333      // do not need to filter
334      return result;
335    }
336    // not the same row
337    if (!PrivateCellUtil.matchingRows(keepCellsAfter, result.getRow(), 0, result.getRow().length)) {
338      return result;
339    }
340    Cell[] rawCells = result.rawCells();
341    int index = Arrays.binarySearch(rawCells, keepCellsAfter,
342      CellComparator.getInstance()::compareWithoutRow);
343    if (index < 0) {
344      index = -index - 1;
345    } else {
346      index++;
347    }
348    if (index == 0) {
349      return result;
350    }
351    if (index == rawCells.length) {
352      return null;
353    }
354    return Result.create(Arrays.copyOfRange(rawCells, index, rawCells.length), null,
355      result.isStale(), result.mayHaveMoreCellsInRow());
356  }
357
358  // Add a delta to avoid timeout immediately after a retry sleeping.
359  static final long SLEEP_DELTA_NS = TimeUnit.MILLISECONDS.toNanos(1);
360
361  static Get toCheckExistenceOnly(Get get) {
362    if (get.isCheckExistenceOnly()) {
363      return get;
364    }
365    return ReflectionUtils.newInstance(get.getClass(), get).setCheckExistenceOnly(true);
366  }
367
368  static List<Get> toCheckExistenceOnly(List<Get> gets) {
369    return gets.stream().map(ConnectionUtils::toCheckExistenceOnly).collect(toList());
370  }
371
372  static RegionLocateType getLocateType(Scan scan) {
373    if (scan.isReversed()) {
374      if (isEmptyStartRow(scan.getStartRow())) {
375        return RegionLocateType.BEFORE;
376      } else {
377        return scan.includeStartRow() ? RegionLocateType.CURRENT : RegionLocateType.BEFORE;
378      }
379    } else {
380      return scan.includeStartRow() ? RegionLocateType.CURRENT : RegionLocateType.AFTER;
381    }
382  }
383
384  static boolean noMoreResultsForScan(Scan scan, RegionInfo info) {
385    if (isEmptyStopRow(info.getEndKey())) {
386      return true;
387    }
388    if (isEmptyStopRow(scan.getStopRow())) {
389      return false;
390    }
391    int c = Bytes.compareTo(info.getEndKey(), scan.getStopRow());
392    // 1. if our stop row is less than the endKey of the region
393    // 2. if our stop row is equal to the endKey of the region and we do not include the stop row
394    // for scan.
395    return c > 0 || (c == 0 && !scan.includeStopRow());
396  }
397
398  static boolean noMoreResultsForReverseScan(Scan scan, RegionInfo info) {
399    if (isEmptyStartRow(info.getStartKey())) {
400      return true;
401    }
402    if (isEmptyStopRow(scan.getStopRow())) {
403      return false;
404    }
405    // no need to test the inclusive of the stop row as the start key of a region is included in
406    // the region.
407    return Bytes.compareTo(info.getStartKey(), scan.getStopRow()) <= 0;
408  }
409
410  static <T> CompletableFuture<List<T>> allOf(List<CompletableFuture<T>> futures) {
411    return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0]))
412      .thenApply(v -> futures.stream().map(f -> f.getNow(null)).collect(toList()));
413  }
414
415  public static ScanResultCache createScanResultCache(Scan scan) {
416    if (scan.getAllowPartialResults()) {
417      return new AllowPartialScanResultCache();
418    } else if (scan.getBatch() > 0) {
419      return new BatchScanResultCache(scan.getBatch());
420    } else {
421      return new CompleteScanResultCache();
422    }
423  }
424
425  private static final String MY_ADDRESS = getMyAddress();
426
427  private static String getMyAddress() {
428    try {
429      return DNS.getDefaultHost("default", "default");
430    } catch (UnknownHostException uhe) {
431      LOG.error("cannot determine my address", uhe);
432      return null;
433    }
434  }
435
436  static boolean isRemote(String host) {
437    return !host.equalsIgnoreCase(MY_ADDRESS);
438  }
439
440  static void incRPCCallsMetrics(ScanMetrics scanMetrics, boolean isRegionServerRemote) {
441    if (scanMetrics == null) {
442      return;
443    }
444    scanMetrics.countOfRPCcalls.incrementAndGet();
445    if (isRegionServerRemote) {
446      scanMetrics.countOfRemoteRPCcalls.incrementAndGet();
447    }
448  }
449
450  static void incRPCRetriesMetrics(ScanMetrics scanMetrics, boolean isRegionServerRemote) {
451    if (scanMetrics == null) {
452      return;
453    }
454    scanMetrics.countOfRPCRetries.incrementAndGet();
455    if (isRegionServerRemote) {
456      scanMetrics.countOfRemoteRPCRetries.incrementAndGet();
457    }
458  }
459
460  static void updateResultsMetrics(ScanMetrics scanMetrics, Result[] rrs,
461      boolean isRegionServerRemote) {
462    if (scanMetrics == null || rrs == null || rrs.length == 0) {
463      return;
464    }
465    long resultSize = 0;
466    for (Result rr : rrs) {
467      for (Cell cell : rr.rawCells()) {
468        resultSize += PrivateCellUtil.estimatedSerializedSizeOf(cell);
469      }
470    }
471    scanMetrics.countOfBytesInResults.addAndGet(resultSize);
472    if (isRegionServerRemote) {
473      scanMetrics.countOfBytesInRemoteResults.addAndGet(resultSize);
474    }
475  }
476
477  /**
478   * Use the scan metrics returned by the server to add to the identically named counters in the
479   * client side metrics. If a counter does not exist with the same name as the server side metric,
480   * the attempt to increase the counter will fail.
481   */
482  static void updateServerSideMetrics(ScanMetrics scanMetrics, ScanResponse response) {
483    if (scanMetrics == null || response == null || !response.hasScanMetrics()) {
484      return;
485    }
486    ResponseConverter.getScanMetrics(response).forEach(scanMetrics::addToCounter);
487  }
488
489  static void incRegionCountMetrics(ScanMetrics scanMetrics) {
490    if (scanMetrics == null) {
491      return;
492    }
493    scanMetrics.countOfRegions.incrementAndGet();
494  }
495
496  /**
497   * Connect the two futures, if the src future is done, then mark the dst future as done. And if
498   * the dst future is done, then cancel the src future. This is used for timeline consistent read.
499   */
500  private static <T> void connect(CompletableFuture<T> srcFuture, CompletableFuture<T> dstFuture) {
501    addListener(srcFuture, (r, e) -> {
502      if (e != null) {
503        dstFuture.completeExceptionally(e);
504      } else {
505        dstFuture.complete(r);
506      }
507    });
508    // The cancellation may be a dummy one as the dstFuture may be completed by this srcFuture.
509    // Notice that this is a bit tricky, as the execution chain maybe 'complete src -> complete dst
510    // -> cancel src', for now it seems to be fine, as the will use CAS to set the result first in
511    // CompletableFuture. If later this causes problems, we could use whenCompleteAsync to break the
512    // tie.
513    addListener(dstFuture, (r, e) -> srcFuture.cancel(false));
514  }
515
516  private static <T> void sendRequestsToSecondaryReplicas(
517      Function<Integer, CompletableFuture<T>> requestReplica, RegionLocations locs,
518      CompletableFuture<T> future) {
519    if (future.isDone()) {
520      // do not send requests to secondary replicas if the future is done, i.e, the primary request
521      // has already been finished.
522      return;
523    }
524    for (int replicaId = 1, n = locs.size(); replicaId < n; replicaId++) {
525      CompletableFuture<T> secondaryFuture = requestReplica.apply(replicaId);
526      connect(secondaryFuture, future);
527    }
528  }
529
530  static <T> CompletableFuture<T> timelineConsistentRead(AsyncRegionLocator locator,
531      TableName tableName, Query query, byte[] row, RegionLocateType locateType,
532      Function<Integer, CompletableFuture<T>> requestReplica, long rpcTimeoutNs,
533      long primaryCallTimeoutNs, Timer retryTimer) {
534    if (query.getConsistency() == Consistency.STRONG) {
535      return requestReplica.apply(RegionReplicaUtil.DEFAULT_REPLICA_ID);
536    }
537    // user specifies a replica id explicitly, just send request to the specific replica
538    if (query.getReplicaId() >= 0) {
539      return requestReplica.apply(query.getReplicaId());
540    }
541    // Timeline consistent read, where we may send requests to other region replicas
542    CompletableFuture<T> primaryFuture = requestReplica.apply(RegionReplicaUtil.DEFAULT_REPLICA_ID);
543    CompletableFuture<T> future = new CompletableFuture<>();
544    connect(primaryFuture, future);
545    long startNs = System.nanoTime();
546    // after the getRegionLocations, all the locations for the replicas of this region should have
547    // been cached, so it is not big deal to locate them again when actually sending requests to
548    // these replicas.
549    addListener(locator.getRegionLocations(tableName, row, locateType, false, rpcTimeoutNs),
550      (locs, error) -> {
551        if (error != null) {
552          LOG.warn(
553            "Failed to locate all the replicas for table={}, row='{}', locateType={}" +
554              " give up timeline consistent read",
555            tableName, Bytes.toStringBinary(row), locateType, error);
556          return;
557        }
558        if (locs.size() <= 1) {
559          LOG.warn(
560            "There are no secondary replicas for region {}, give up timeline consistent read",
561            locs.getDefaultRegionLocation().getRegion());
562          return;
563        }
564        long delayNs = primaryCallTimeoutNs - (System.nanoTime() - startNs);
565        if (delayNs <= 0) {
566          sendRequestsToSecondaryReplicas(requestReplica, locs, future);
567        } else {
568          retryTimer.newTimeout(
569            timeout -> sendRequestsToSecondaryReplicas(requestReplica, locs, future), delayNs,
570            TimeUnit.NANOSECONDS);
571        }
572      });
573    return future;
574  }
575}