001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.client; 019 020import static java.util.stream.Collectors.toList; 021import static org.apache.hadoop.hbase.HConstants.EMPTY_END_ROW; 022import static org.apache.hadoop.hbase.HConstants.EMPTY_START_ROW; 023import static org.apache.hadoop.hbase.util.FutureUtils.addListener; 024 025import java.io.IOException; 026import java.lang.reflect.UndeclaredThrowableException; 027import java.net.InetAddress; 028import java.net.UnknownHostException; 029import java.util.Arrays; 030import java.util.List; 031import java.util.concurrent.CompletableFuture; 032import java.util.concurrent.ExecutorService; 033import java.util.concurrent.ThreadLocalRandom; 034import java.util.concurrent.TimeUnit; 035import java.util.function.Function; 036import org.apache.hadoop.conf.Configuration; 037import org.apache.hadoop.hbase.Cell; 038import org.apache.hadoop.hbase.CellComparator; 039import org.apache.hadoop.hbase.HConstants; 040import org.apache.hadoop.hbase.PrivateCellUtil; 041import org.apache.hadoop.hbase.RegionLocations; 042import org.apache.hadoop.hbase.ServerName; 043import org.apache.hadoop.hbase.TableName; 044import org.apache.hadoop.hbase.client.metrics.ScanMetrics; 045import org.apache.hadoop.hbase.ipc.HBaseRpcController; 046import org.apache.hadoop.hbase.security.User; 047import org.apache.hadoop.hbase.security.UserProvider; 048import org.apache.hadoop.hbase.util.Bytes; 049import org.apache.hadoop.hbase.util.ReflectionUtils; 050import org.apache.hadoop.ipc.RemoteException; 051import org.apache.hadoop.net.DNS; 052import org.apache.yetus.audience.InterfaceAudience; 053import org.slf4j.Logger; 054import org.slf4j.LoggerFactory; 055 056import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 057import org.apache.hbase.thirdparty.com.google.common.base.Preconditions; 058import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; 059import org.apache.hbase.thirdparty.io.netty.util.Timer; 060 061import org.apache.hadoop.hbase.shaded.protobuf.ResponseConverter; 062import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService; 063import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ClientService; 064import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ScanResponse; 065import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterService; 066 067/** 068 * Utility used by client connections. 069 */ 070@InterfaceAudience.Private 071public final class ConnectionUtils { 072 073 private static final Logger LOG = LoggerFactory.getLogger(ConnectionUtils.class); 074 075 private ConnectionUtils() { 076 } 077 078 /** 079 * Calculate pause time. Built on {@link HConstants#RETRY_BACKOFF}. 080 * @param pause time to pause 081 * @param tries amount of tries 082 * @return How long to wait after <code>tries</code> retries 083 */ 084 public static long getPauseTime(final long pause, final int tries) { 085 int ntries = tries; 086 if (ntries >= HConstants.RETRY_BACKOFF.length) { 087 ntries = HConstants.RETRY_BACKOFF.length - 1; 088 } 089 if (ntries < 0) { 090 ntries = 0; 091 } 092 093 long normalPause = pause * HConstants.RETRY_BACKOFF[ntries]; 094 // 1% possible jitter 095 long jitter = (long) (normalPause * ThreadLocalRandom.current().nextFloat() * 0.01f); 096 return normalPause + jitter; 097 } 098 099 /** 100 * @param conn The connection for which to replace the generator. 101 * @param cnm Replaces the nonce generator used, for testing. 102 * @return old nonce generator. 103 */ 104 public static NonceGenerator injectNonceGeneratorForTesting(ClusterConnection conn, 105 NonceGenerator cnm) { 106 return ConnectionImplementation.injectNonceGeneratorForTesting(conn, cnm); 107 } 108 109 /** 110 * Changes the configuration to set the number of retries needed when using Connection internally, 111 * e.g. for updating catalog tables, etc. Call this method before we create any Connections. 112 * @param c The Configuration instance to set the retries into. 113 * @param log Used to log what we set in here. 114 */ 115 public static void setServerSideHConnectionRetriesConfig(final Configuration c, final String sn, 116 final Logger log) { 117 // TODO: Fix this. Not all connections from server side should have 10 times the retries. 118 int hcRetries = c.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 119 HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER); 120 // Go big. Multiply by 10. If we can't get to meta after this many retries 121 // then something seriously wrong. 122 int serversideMultiplier = c.getInt(HConstants.HBASE_CLIENT_SERVERSIDE_RETRIES_MULTIPLIER, 123 HConstants.DEFAULT_HBASE_CLIENT_SERVERSIDE_RETRIES_MULTIPLIER); 124 int retries = hcRetries * serversideMultiplier; 125 c.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, retries); 126 log.info(sn + " server-side Connection retries=" + retries); 127 } 128 129 /** 130 * A ClusterConnection that will short-circuit RPC making direct invocations against the localhost 131 * if the invocation target is 'this' server; save on network and protobuf invocations. 132 */ 133 // TODO This has to still do PB marshalling/unmarshalling stuff. Check how/whether we can avoid. 134 @VisibleForTesting // Class is visible so can assert we are short-circuiting when expected. 135 public static class ShortCircuitingClusterConnection extends ConnectionImplementation { 136 private final ServerName serverName; 137 private final AdminService.BlockingInterface localHostAdmin; 138 private final ClientService.BlockingInterface localHostClient; 139 140 private ShortCircuitingClusterConnection(Configuration conf, ExecutorService pool, User user, 141 ServerName serverName, AdminService.BlockingInterface admin, 142 ClientService.BlockingInterface client) throws IOException { 143 super(conf, pool, user); 144 this.serverName = serverName; 145 this.localHostAdmin = admin; 146 this.localHostClient = client; 147 } 148 149 @Override 150 public AdminService.BlockingInterface getAdmin(ServerName sn) throws IOException { 151 return serverName.equals(sn) ? this.localHostAdmin : super.getAdmin(sn); 152 } 153 154 @Override 155 public ClientService.BlockingInterface getClient(ServerName sn) throws IOException { 156 return serverName.equals(sn) ? this.localHostClient : super.getClient(sn); 157 } 158 159 @Override 160 public MasterKeepAliveConnection getMaster() throws IOException { 161 if (this.localHostClient instanceof MasterService.BlockingInterface) { 162 return new ShortCircuitMasterConnection( 163 (MasterService.BlockingInterface) this.localHostClient); 164 } 165 return super.getMaster(); 166 } 167 } 168 169 /** 170 * Creates a short-circuit connection that can bypass the RPC layer (serialization, 171 * deserialization, networking, etc..) when talking to a local server. 172 * @param conf the current configuration 173 * @param pool the thread pool to use for batch operations 174 * @param user the user the connection is for 175 * @param serverName the local server name 176 * @param admin the admin interface of the local server 177 * @param client the client interface of the local server 178 * @return an short-circuit connection. 179 * @throws IOException if IO failure occurred 180 */ 181 public static ClusterConnection createShortCircuitConnection(final Configuration conf, 182 ExecutorService pool, User user, final ServerName serverName, 183 final AdminService.BlockingInterface admin, final ClientService.BlockingInterface client) 184 throws IOException { 185 if (user == null) { 186 user = UserProvider.instantiate(conf).getCurrent(); 187 } 188 return new ShortCircuitingClusterConnection(conf, pool, user, serverName, admin, client); 189 } 190 191 /** 192 * Setup the connection class, so that it will not depend on master being online. Used for testing 193 * @param conf configuration to set 194 */ 195 @VisibleForTesting 196 public static void setupMasterlessConnection(Configuration conf) { 197 conf.set(ClusterConnection.HBASE_CLIENT_CONNECTION_IMPL, MasterlessConnection.class.getName()); 198 } 199 200 /** 201 * Some tests shut down the master. But table availability is a master RPC which is performed on 202 * region re-lookups. 203 */ 204 static class MasterlessConnection extends ConnectionImplementation { 205 MasterlessConnection(Configuration conf, ExecutorService pool, User user) throws IOException { 206 super(conf, pool, user); 207 } 208 209 @Override 210 public boolean isTableDisabled(TableName tableName) throws IOException { 211 // treat all tables as enabled 212 return false; 213 } 214 } 215 216 /** 217 * Return retires + 1. The returned value will be in range [1, Integer.MAX_VALUE]. 218 */ 219 static int retries2Attempts(int retries) { 220 return Math.max(1, retries == Integer.MAX_VALUE ? Integer.MAX_VALUE : retries + 1); 221 } 222 223 /** 224 * Get a unique key for the rpc stub to the given server. 225 */ 226 static String getStubKey(String serviceName, ServerName serverName, boolean hostnameCanChange) { 227 // Sometimes, servers go down and they come back up with the same hostname but a different 228 // IP address. Force a resolution of the rsHostname by trying to instantiate an 229 // InetSocketAddress, and this way we will rightfully get a new stubKey. 230 // Also, include the hostname in the key so as to take care of those cases where the 231 // DNS name is different but IP address remains the same. 232 String hostname = serverName.getHostname(); 233 int port = serverName.getPort(); 234 if (hostnameCanChange) { 235 try { 236 InetAddress ip = InetAddress.getByName(hostname); 237 return serviceName + "@" + hostname + "-" + ip.getHostAddress() + ":" + port; 238 } catch (UnknownHostException e) { 239 LOG.warn("Can not resolve " + hostname + ", please check your network", e); 240 } 241 } 242 return serviceName + "@" + hostname + ":" + port; 243 } 244 245 static void checkHasFamilies(Mutation mutation) { 246 Preconditions.checkArgument(mutation.numFamilies() > 0, 247 "Invalid arguments to %s, zero columns specified", mutation.toString()); 248 } 249 250 /** Dummy nonce generator for disabled nonces. */ 251 static final NonceGenerator NO_NONCE_GENERATOR = new NonceGenerator() { 252 253 @Override 254 public long newNonce() { 255 return HConstants.NO_NONCE; 256 } 257 258 @Override 259 public long getNonceGroup() { 260 return HConstants.NO_NONCE; 261 } 262 }; 263 264 // A byte array in which all elements are the max byte, and it is used to 265 // construct closest front row 266 static final byte[] MAX_BYTE_ARRAY = Bytes.createMaxByteArray(9); 267 268 /** 269 * Create the closest row after the specified row 270 */ 271 static byte[] createClosestRowAfter(byte[] row) { 272 return Arrays.copyOf(row, row.length + 1); 273 } 274 275 /** 276 * Create a row before the specified row and very close to the specified row. 277 */ 278 static byte[] createCloseRowBefore(byte[] row) { 279 if (row.length == 0) { 280 return MAX_BYTE_ARRAY; 281 } 282 if (row[row.length - 1] == 0) { 283 return Arrays.copyOf(row, row.length - 1); 284 } else { 285 byte[] nextRow = new byte[row.length + MAX_BYTE_ARRAY.length]; 286 System.arraycopy(row, 0, nextRow, 0, row.length - 1); 287 nextRow[row.length - 1] = (byte) ((row[row.length - 1] & 0xFF) - 1); 288 System.arraycopy(MAX_BYTE_ARRAY, 0, nextRow, row.length, MAX_BYTE_ARRAY.length); 289 return nextRow; 290 } 291 } 292 293 static boolean isEmptyStartRow(byte[] row) { 294 return Bytes.equals(row, EMPTY_START_ROW); 295 } 296 297 static boolean isEmptyStopRow(byte[] row) { 298 return Bytes.equals(row, EMPTY_END_ROW); 299 } 300 301 static void resetController(HBaseRpcController controller, long timeoutNs) { 302 controller.reset(); 303 if (timeoutNs >= 0) { 304 controller.setCallTimeout( 305 (int) Math.min(Integer.MAX_VALUE, TimeUnit.NANOSECONDS.toMillis(timeoutNs))); 306 } 307 } 308 309 static Throwable translateException(Throwable t) { 310 if (t instanceof UndeclaredThrowableException && t.getCause() != null) { 311 t = t.getCause(); 312 } 313 if (t instanceof RemoteException) { 314 t = ((RemoteException) t).unwrapRemoteException(); 315 } 316 if (t instanceof ServiceException && t.getCause() != null) { 317 t = translateException(t.getCause()); 318 } 319 return t; 320 } 321 322 static long calcEstimatedSize(Result rs) { 323 long estimatedHeapSizeOfResult = 0; 324 // We don't make Iterator here 325 for (Cell cell : rs.rawCells()) { 326 estimatedHeapSizeOfResult += PrivateCellUtil.estimatedSizeOfCell(cell); 327 } 328 return estimatedHeapSizeOfResult; 329 } 330 331 static Result filterCells(Result result, Cell keepCellsAfter) { 332 if (keepCellsAfter == null) { 333 // do not need to filter 334 return result; 335 } 336 // not the same row 337 if (!PrivateCellUtil.matchingRows(keepCellsAfter, result.getRow(), 0, result.getRow().length)) { 338 return result; 339 } 340 Cell[] rawCells = result.rawCells(); 341 int index = Arrays.binarySearch(rawCells, keepCellsAfter, 342 CellComparator.getInstance()::compareWithoutRow); 343 if (index < 0) { 344 index = -index - 1; 345 } else { 346 index++; 347 } 348 if (index == 0) { 349 return result; 350 } 351 if (index == rawCells.length) { 352 return null; 353 } 354 return Result.create(Arrays.copyOfRange(rawCells, index, rawCells.length), null, 355 result.isStale(), result.mayHaveMoreCellsInRow()); 356 } 357 358 // Add a delta to avoid timeout immediately after a retry sleeping. 359 static final long SLEEP_DELTA_NS = TimeUnit.MILLISECONDS.toNanos(1); 360 361 static Get toCheckExistenceOnly(Get get) { 362 if (get.isCheckExistenceOnly()) { 363 return get; 364 } 365 return ReflectionUtils.newInstance(get.getClass(), get).setCheckExistenceOnly(true); 366 } 367 368 static List<Get> toCheckExistenceOnly(List<Get> gets) { 369 return gets.stream().map(ConnectionUtils::toCheckExistenceOnly).collect(toList()); 370 } 371 372 static RegionLocateType getLocateType(Scan scan) { 373 if (scan.isReversed()) { 374 if (isEmptyStartRow(scan.getStartRow())) { 375 return RegionLocateType.BEFORE; 376 } else { 377 return scan.includeStartRow() ? RegionLocateType.CURRENT : RegionLocateType.BEFORE; 378 } 379 } else { 380 return scan.includeStartRow() ? RegionLocateType.CURRENT : RegionLocateType.AFTER; 381 } 382 } 383 384 static boolean noMoreResultsForScan(Scan scan, RegionInfo info) { 385 if (isEmptyStopRow(info.getEndKey())) { 386 return true; 387 } 388 if (isEmptyStopRow(scan.getStopRow())) { 389 return false; 390 } 391 int c = Bytes.compareTo(info.getEndKey(), scan.getStopRow()); 392 // 1. if our stop row is less than the endKey of the region 393 // 2. if our stop row is equal to the endKey of the region and we do not include the stop row 394 // for scan. 395 return c > 0 || (c == 0 && !scan.includeStopRow()); 396 } 397 398 static boolean noMoreResultsForReverseScan(Scan scan, RegionInfo info) { 399 if (isEmptyStartRow(info.getStartKey())) { 400 return true; 401 } 402 if (isEmptyStopRow(scan.getStopRow())) { 403 return false; 404 } 405 // no need to test the inclusive of the stop row as the start key of a region is included in 406 // the region. 407 return Bytes.compareTo(info.getStartKey(), scan.getStopRow()) <= 0; 408 } 409 410 static <T> CompletableFuture<List<T>> allOf(List<CompletableFuture<T>> futures) { 411 return CompletableFuture.allOf(futures.toArray(new CompletableFuture[0])) 412 .thenApply(v -> futures.stream().map(f -> f.getNow(null)).collect(toList())); 413 } 414 415 public static ScanResultCache createScanResultCache(Scan scan) { 416 if (scan.getAllowPartialResults()) { 417 return new AllowPartialScanResultCache(); 418 } else if (scan.getBatch() > 0) { 419 return new BatchScanResultCache(scan.getBatch()); 420 } else { 421 return new CompleteScanResultCache(); 422 } 423 } 424 425 private static final String MY_ADDRESS = getMyAddress(); 426 427 private static String getMyAddress() { 428 try { 429 return DNS.getDefaultHost("default", "default"); 430 } catch (UnknownHostException uhe) { 431 LOG.error("cannot determine my address", uhe); 432 return null; 433 } 434 } 435 436 static boolean isRemote(String host) { 437 return !host.equalsIgnoreCase(MY_ADDRESS); 438 } 439 440 static void incRPCCallsMetrics(ScanMetrics scanMetrics, boolean isRegionServerRemote) { 441 if (scanMetrics == null) { 442 return; 443 } 444 scanMetrics.countOfRPCcalls.incrementAndGet(); 445 if (isRegionServerRemote) { 446 scanMetrics.countOfRemoteRPCcalls.incrementAndGet(); 447 } 448 } 449 450 static void incRPCRetriesMetrics(ScanMetrics scanMetrics, boolean isRegionServerRemote) { 451 if (scanMetrics == null) { 452 return; 453 } 454 scanMetrics.countOfRPCRetries.incrementAndGet(); 455 if (isRegionServerRemote) { 456 scanMetrics.countOfRemoteRPCRetries.incrementAndGet(); 457 } 458 } 459 460 static void updateResultsMetrics(ScanMetrics scanMetrics, Result[] rrs, 461 boolean isRegionServerRemote) { 462 if (scanMetrics == null || rrs == null || rrs.length == 0) { 463 return; 464 } 465 long resultSize = 0; 466 for (Result rr : rrs) { 467 for (Cell cell : rr.rawCells()) { 468 resultSize += PrivateCellUtil.estimatedSerializedSizeOf(cell); 469 } 470 } 471 scanMetrics.countOfBytesInResults.addAndGet(resultSize); 472 if (isRegionServerRemote) { 473 scanMetrics.countOfBytesInRemoteResults.addAndGet(resultSize); 474 } 475 } 476 477 /** 478 * Use the scan metrics returned by the server to add to the identically named counters in the 479 * client side metrics. If a counter does not exist with the same name as the server side metric, 480 * the attempt to increase the counter will fail. 481 */ 482 static void updateServerSideMetrics(ScanMetrics scanMetrics, ScanResponse response) { 483 if (scanMetrics == null || response == null || !response.hasScanMetrics()) { 484 return; 485 } 486 ResponseConverter.getScanMetrics(response).forEach(scanMetrics::addToCounter); 487 } 488 489 static void incRegionCountMetrics(ScanMetrics scanMetrics) { 490 if (scanMetrics == null) { 491 return; 492 } 493 scanMetrics.countOfRegions.incrementAndGet(); 494 } 495 496 /** 497 * Connect the two futures, if the src future is done, then mark the dst future as done. And if 498 * the dst future is done, then cancel the src future. This is used for timeline consistent read. 499 */ 500 private static <T> void connect(CompletableFuture<T> srcFuture, CompletableFuture<T> dstFuture) { 501 addListener(srcFuture, (r, e) -> { 502 if (e != null) { 503 dstFuture.completeExceptionally(e); 504 } else { 505 dstFuture.complete(r); 506 } 507 }); 508 // The cancellation may be a dummy one as the dstFuture may be completed by this srcFuture. 509 // Notice that this is a bit tricky, as the execution chain maybe 'complete src -> complete dst 510 // -> cancel src', for now it seems to be fine, as the will use CAS to set the result first in 511 // CompletableFuture. If later this causes problems, we could use whenCompleteAsync to break the 512 // tie. 513 addListener(dstFuture, (r, e) -> srcFuture.cancel(false)); 514 } 515 516 private static <T> void sendRequestsToSecondaryReplicas( 517 Function<Integer, CompletableFuture<T>> requestReplica, RegionLocations locs, 518 CompletableFuture<T> future) { 519 if (future.isDone()) { 520 // do not send requests to secondary replicas if the future is done, i.e, the primary request 521 // has already been finished. 522 return; 523 } 524 for (int replicaId = 1, n = locs.size(); replicaId < n; replicaId++) { 525 CompletableFuture<T> secondaryFuture = requestReplica.apply(replicaId); 526 connect(secondaryFuture, future); 527 } 528 } 529 530 static <T> CompletableFuture<T> timelineConsistentRead(AsyncRegionLocator locator, 531 TableName tableName, Query query, byte[] row, RegionLocateType locateType, 532 Function<Integer, CompletableFuture<T>> requestReplica, long rpcTimeoutNs, 533 long primaryCallTimeoutNs, Timer retryTimer) { 534 if (query.getConsistency() == Consistency.STRONG) { 535 return requestReplica.apply(RegionReplicaUtil.DEFAULT_REPLICA_ID); 536 } 537 // user specifies a replica id explicitly, just send request to the specific replica 538 if (query.getReplicaId() >= 0) { 539 return requestReplica.apply(query.getReplicaId()); 540 } 541 // Timeline consistent read, where we may send requests to other region replicas 542 CompletableFuture<T> primaryFuture = requestReplica.apply(RegionReplicaUtil.DEFAULT_REPLICA_ID); 543 CompletableFuture<T> future = new CompletableFuture<>(); 544 connect(primaryFuture, future); 545 long startNs = System.nanoTime(); 546 // after the getRegionLocations, all the locations for the replicas of this region should have 547 // been cached, so it is not big deal to locate them again when actually sending requests to 548 // these replicas. 549 addListener(locator.getRegionLocations(tableName, row, locateType, false, rpcTimeoutNs), 550 (locs, error) -> { 551 if (error != null) { 552 LOG.warn( 553 "Failed to locate all the replicas for table={}, row='{}', locateType={}" + 554 " give up timeline consistent read", 555 tableName, Bytes.toStringBinary(row), locateType, error); 556 return; 557 } 558 if (locs.size() <= 1) { 559 LOG.warn( 560 "There are no secondary replicas for region {}, give up timeline consistent read", 561 locs.getDefaultRegionLocation().getRegion()); 562 return; 563 } 564 long delayNs = primaryCallTimeoutNs - (System.nanoTime() - startNs); 565 if (delayNs <= 0) { 566 sendRequestsToSecondaryReplicas(requestReplica, locs, future); 567 } else { 568 retryTimer.newTimeout( 569 timeout -> sendRequestsToSecondaryReplicas(requestReplica, locs, future), delayNs, 570 TimeUnit.NANOSECONDS); 571 } 572 }); 573 return future; 574 } 575}