001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.List; 025import java.util.Map; 026import java.util.OptionalDouble; 027import java.util.concurrent.ConcurrentHashMap; 028import java.util.concurrent.ScheduledExecutorService; 029import java.util.concurrent.TimeUnit; 030import java.util.stream.Collectors; 031import org.apache.commons.lang3.StringUtils; 032import org.apache.hadoop.hbase.CompatibilitySingletonFactory; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.HDFSBlocksDistribution; 035import org.apache.hadoop.hbase.ServerName; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.io.ByteBuffAllocator; 038import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 039import org.apache.hadoop.hbase.io.asyncfs.monitor.ExcludeDatanodeManager; 040import org.apache.hadoop.hbase.io.hfile.BlockCache; 041import org.apache.hadoop.hbase.io.hfile.CacheStats; 042import org.apache.hadoop.hbase.io.hfile.CombinedBlockCache; 043import org.apache.hadoop.hbase.mob.MobFileCache; 044import org.apache.hadoop.hbase.regionserver.wal.MetricsWALSource; 045import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 046import org.apache.hadoop.hbase.util.FSUtils; 047import org.apache.hadoop.hbase.wal.WALProvider; 048import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 049import org.apache.hadoop.hdfs.DFSHedgedReadMetrics; 050import org.apache.hadoop.metrics2.MetricsExecutor; 051import org.apache.yetus.audience.InterfaceAudience; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055/** 056 * Impl for exposing HRegionServer Information through Hadoop's metrics 2 system. 057 */ 058@InterfaceAudience.Private 059class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper { 060 061 private static final Logger LOG = LoggerFactory.getLogger(MetricsRegionServerWrapperImpl.class); 062 063 private final HRegionServer regionServer; 064 private final MetricsWALSource metricsWALSource; 065 private final ByteBuffAllocator allocator; 066 067 private BlockCache blockCache; 068 private BlockCache l1Cache = null; 069 private BlockCache l2Cache = null; 070 private MobFileCache mobFileCache; 071 private CacheStats cacheStats; 072 private CacheStats l1Stats = null; 073 private CacheStats l2Stats = null; 074 private volatile long numWALFiles = 0; 075 private volatile long walFileSize = 0; 076 private volatile long mobFileCacheAccessCount = 0; 077 private volatile long mobFileCacheMissCount = 0; 078 private volatile double mobFileCacheHitRatio = 0; 079 private volatile long mobFileCacheEvictedCount = 0; 080 private volatile long mobFileCacheCount = 0; 081 082 private volatile RegionMetricAggregate aggregate = new RegionMetricAggregate(null); 083 084 protected final Map<String, ArrayList<Long>> requestsCountCache = 085 new ConcurrentHashMap<String, ArrayList<Long>>(); 086 087 private ScheduledExecutorService executor; 088 private Runnable runnable; 089 private long period; 090 091 /** 092 * Can be null if not on hdfs. 093 */ 094 private DFSHedgedReadMetrics dfsHedgedReadMetrics; 095 096 private final ExcludeDatanodeManager excludeDatanodeManager; 097 098 public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) { 099 this.regionServer = regionServer; 100 initBlockCache(); 101 initMobFileCache(); 102 this.excludeDatanodeManager = this.regionServer.getWalFactory().getExcludeDatanodeManager(); 103 104 this.period = regionServer.getConfiguration().getLong(HConstants.REGIONSERVER_METRICS_PERIOD, 105 HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD); 106 107 this.executor = CompatibilitySingletonFactory.getInstance(MetricsExecutor.class).getExecutor(); 108 this.runnable = new RegionServerMetricsWrapperRunnable(); 109 this.executor.scheduleWithFixedDelay(this.runnable, this.period, this.period, 110 TimeUnit.MILLISECONDS); 111 this.metricsWALSource = CompatibilitySingletonFactory.getInstance(MetricsWALSource.class); 112 this.allocator = regionServer.getRpcServer().getByteBuffAllocator(); 113 114 try { 115 this.dfsHedgedReadMetrics = FSUtils.getDFSHedgedReadMetrics(regionServer.getConfiguration()); 116 } catch (IOException e) { 117 LOG.warn("Failed to get hedged metrics", e); 118 } 119 if (LOG.isInfoEnabled()) { 120 LOG.info("Computing regionserver metrics every " + this.period + " milliseconds"); 121 } 122 } 123 124 private void initBlockCache() { 125 this.blockCache = this.regionServer.getBlockCache().orElse(null); 126 this.cacheStats = this.blockCache != null ? this.blockCache.getStats() : null; 127 if (this.cacheStats != null) { 128 if (this.cacheStats instanceof CombinedBlockCache.CombinedCacheStats) { 129 l1Stats = ((CombinedBlockCache.CombinedCacheStats) this.cacheStats).getLruCacheStats(); 130 l2Stats = ((CombinedBlockCache.CombinedCacheStats) this.cacheStats).getBucketCacheStats(); 131 } else { 132 l1Stats = this.cacheStats; 133 } 134 } 135 if (this.blockCache != null) { 136 if (this.blockCache instanceof CombinedBlockCache) { 137 l1Cache = ((CombinedBlockCache) this.blockCache).getFirstLevelCache(); 138 l2Cache = ((CombinedBlockCache) this.blockCache).getSecondLevelCache(); 139 } else { 140 l1Cache = this.blockCache; 141 } 142 } 143 } 144 145 /** 146 * Initializes the mob file cache. 147 */ 148 private void initMobFileCache() { 149 this.mobFileCache = this.regionServer.getMobFileCache().orElse(null); 150 } 151 152 @Override 153 public String getClusterId() { 154 return regionServer.getClusterId(); 155 } 156 157 @Override 158 public long getStartCode() { 159 return regionServer.getStartcode(); 160 } 161 162 @Override 163 public String getZookeeperQuorum() { 164 ZKWatcher zk = regionServer.getZooKeeper(); 165 if (zk == null) { 166 return ""; 167 } 168 return zk.getQuorum(); 169 } 170 171 @Override 172 public String getCoprocessors() { 173 String[] coprocessors = regionServer.getRegionServerCoprocessors(); 174 if (coprocessors == null || coprocessors.length == 0) { 175 return ""; 176 } 177 return StringUtils.join(coprocessors, ", "); 178 } 179 180 @Override 181 public String getServerName() { 182 ServerName serverName = regionServer.getServerName(); 183 if (serverName == null) { 184 return ""; 185 } 186 return serverName.getServerName(); 187 } 188 189 @Override 190 public long getNumOnlineRegions() { 191 Collection<HRegion> onlineRegionsLocalContext = regionServer.getOnlineRegionsLocalContext(); 192 if (onlineRegionsLocalContext == null) { 193 return 0; 194 } 195 return onlineRegionsLocalContext.size(); 196 } 197 198 @Override 199 public long getTotalRequestCount() { 200 return regionServer.getRpcServices().requestCount.sum(); 201 } 202 203 @Override 204 public long getTotalRowActionRequestCount() { 205 return aggregate.readRequestsCount + aggregate.writeRequestsCount; 206 } 207 208 @Override 209 public int getSplitQueueSize() { 210 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 211 return compactSplit == null ? 0 : compactSplit.getSplitQueueSize(); 212 } 213 214 @Override 215 public int getCompactionQueueSize() { 216 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 217 return compactSplit == null ? 0 : compactSplit.getCompactionQueueSize(); 218 } 219 220 @Override 221 public int getSmallCompactionQueueSize() { 222 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 223 return compactSplit == null ? 0 : compactSplit.getSmallCompactionQueueSize(); 224 } 225 226 @Override 227 public int getLargeCompactionQueueSize() { 228 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 229 return compactSplit == null ? 0 : compactSplit.getLargeCompactionQueueSize(); 230 } 231 232 @Override 233 public int getFlushQueueSize() { 234 // If there is no flusher there should be no queue. 235 if (this.regionServer.getMemStoreFlusher() == null) { 236 return 0; 237 } 238 return this.regionServer.getMemStoreFlusher().getFlushQueueSize(); 239 } 240 241 @Override 242 public long getBlockCacheCount() { 243 return this.blockCache != null ? this.blockCache.getBlockCount() : 0L; 244 } 245 246 @Override 247 public long getBlockCacheDataBlockCount() { 248 return this.blockCache != null ? this.blockCache.getDataBlockCount() : 0L; 249 } 250 251 @Override 252 public long getMemStoreLimit() { 253 return this.regionServer.getRegionServerAccounting().getGlobalMemStoreLimit(); 254 } 255 256 @Override 257 public long getOnHeapMemStoreLimit() { 258 return this.regionServer.getRegionServerAccounting().getGlobalOnHeapMemStoreLimit(); 259 } 260 261 @Override 262 public long getOffHeapMemStoreLimit() { 263 return this.regionServer.getRegionServerAccounting().getGlobalOffHeapMemStoreLimit(); 264 } 265 266 @Override 267 public long getBlockCacheSize() { 268 return this.blockCache != null ? this.blockCache.getCurrentSize() : 0L; 269 } 270 271 @Override 272 public long getBlockCacheFreeSize() { 273 return this.blockCache != null ? this.blockCache.getFreeSize() : 0L; 274 } 275 276 @Override 277 public long getBlockCacheHitCount() { 278 return this.cacheStats != null ? this.cacheStats.getHitCount() : 0L; 279 } 280 281 @Override 282 public long getBlockCachePrimaryHitCount() { 283 return this.cacheStats != null ? this.cacheStats.getPrimaryHitCount() : 0L; 284 } 285 286 @Override 287 public long getBlockCacheHitCachingCount() { 288 return this.cacheStats != null ? this.cacheStats.getHitCachingCount() : 0L; 289 } 290 291 @Override 292 public long getBlockCacheMissCount() { 293 return this.cacheStats != null ? this.cacheStats.getMissCount() : 0L; 294 } 295 296 @Override 297 public long getBlockCachePrimaryMissCount() { 298 return this.cacheStats != null ? this.cacheStats.getPrimaryMissCount() : 0L; 299 } 300 301 @Override 302 public long getBlockCacheMissCachingCount() { 303 return this.cacheStats != null ? this.cacheStats.getMissCachingCount() : 0L; 304 } 305 306 @Override 307 public long getBlockCacheEvictedCount() { 308 return this.cacheStats != null ? this.cacheStats.getEvictedCount() : 0L; 309 } 310 311 @Override 312 public long getBlockCachePrimaryEvictedCount() { 313 return this.cacheStats != null ? this.cacheStats.getPrimaryEvictedCount() : 0L; 314 } 315 316 @Override 317 public double getBlockCacheHitPercent() { 318 double ratio = this.cacheStats != null ? this.cacheStats.getHitRatio() : 0.0; 319 if (Double.isNaN(ratio)) { 320 ratio = 0; 321 } 322 return (ratio * 100); 323 } 324 325 @Override 326 public double getBlockCacheHitCachingPercent() { 327 double ratio = this.cacheStats != null ? this.cacheStats.getHitCachingRatio() : 0.0; 328 if (Double.isNaN(ratio)) { 329 ratio = 0; 330 } 331 return (ratio * 100); 332 } 333 334 @Override 335 public long getBlockCacheFailedInsertions() { 336 return this.cacheStats != null ? this.cacheStats.getFailedInserts() : 0L; 337 } 338 339 public long getL1CacheSize() { 340 return this.l1Cache != null ? this.l1Cache.getCurrentSize() : 0L; 341 } 342 343 public long getL1CacheFreeSize() { 344 return this.l1Cache != null ? this.l1Cache.getFreeSize() : 0L; 345 } 346 347 public long getL1CacheCount() { 348 return this.l1Cache != null ? this.l1Cache.getBlockCount() : 0L; 349 } 350 351 public long getL1CacheEvictedCount() { 352 return this.l1Stats != null ? this.l1Stats.getEvictedCount() : 0L; 353 } 354 355 public long getL2CacheSize() { 356 return this.l2Cache != null ? this.l2Cache.getCurrentSize() : 0L; 357 } 358 359 public long getL2CacheFreeSize() { 360 return this.l2Cache != null ? this.l2Cache.getFreeSize() : 0L; 361 } 362 363 public long getL2CacheCount() { 364 return this.l2Cache != null ? this.l2Cache.getBlockCount() : 0L; 365 } 366 367 public long getL2CacheEvictedCount() { 368 return this.l2Stats != null ? this.l2Stats.getEvictedCount() : 0L; 369 } 370 371 @Override 372 public long getL1CacheHitCount() { 373 return this.l1Stats != null ? this.l1Stats.getHitCount() : 0L; 374 } 375 376 @Override 377 public long getL1CacheHitCachingCount() { 378 return this.l1Stats != null ? this.l1Stats.getHitCachingCount() : 0L; 379 } 380 381 @Override 382 public long getL1CacheMissCount() { 383 return this.l1Stats != null ? this.l1Stats.getMissCount() : 0L; 384 } 385 386 @Override 387 public long getL1CacheMissCachingCount() { 388 return this.l1Stats != null ? this.l1Stats.getMissCachingCount() : 0L; 389 } 390 391 @Override 392 public double getL1CacheHitRatio() { 393 return this.l1Stats != null ? this.l1Stats.getHitRatio() : 0.0; 394 } 395 396 @Override 397 public double getL1CacheHitCachingRatio() { 398 return this.l1Stats != null ? this.l1Stats.getHitCachingRatio() : 0.0; 399 } 400 401 @Override 402 public double getL1CacheMissRatio() { 403 return this.l1Stats != null ? this.l1Stats.getMissRatio() : 0.0; 404 } 405 406 @Override 407 public double getL1CacheMissCachingRatio() { 408 return this.l1Stats != null ? this.l1Stats.getMissCachingRatio() : 0.0; 409 } 410 411 @Override 412 public long getL2CacheHitCount() { 413 return this.l2Stats != null ? this.l2Stats.getHitCount() : 0L; 414 } 415 416 @Override 417 public long getL2CacheHitCachingCount() { 418 return this.l2Stats != null ? this.l2Stats.getHitCachingCount() : 0L; 419 } 420 421 @Override 422 public long getL2CacheMissCount() { 423 return this.l2Stats != null ? this.l2Stats.getMissCount() : 0L; 424 } 425 426 @Override 427 public long getL2CacheMissCachingCount() { 428 return this.l2Stats != null ? this.l2Stats.getMissCachingCount() : 0L; 429 } 430 431 @Override 432 public double getL2CacheHitRatio() { 433 return this.l2Stats != null ? this.l2Stats.getHitRatio() : 0.0; 434 } 435 436 @Override 437 public double getL2CacheHitCachingRatio() { 438 return this.l2Stats != null ? this.l2Stats.getHitCachingRatio() : 0.0; 439 } 440 441 @Override 442 public double getL2CacheMissRatio() { 443 return this.l2Stats != null ? this.l2Stats.getMissRatio() : 0.0; 444 } 445 446 @Override 447 public double getL2CacheMissCachingRatio() { 448 return this.l2Stats != null ? this.l2Stats.getMissCachingRatio() : 0.0; 449 } 450 451 @Override 452 public void forceRecompute() { 453 this.runnable.run(); 454 } 455 456 @Override 457 public long getNumStores() { 458 return aggregate.numStores; 459 } 460 461 @Override 462 public long getNumWALFiles() { 463 return numWALFiles; 464 } 465 466 @Override 467 public long getWALFileSize() { 468 return walFileSize; 469 } 470 471 @Override 472 public List<String> getWALExcludeDNs() { 473 if (excludeDatanodeManager == null) { 474 return Collections.emptyList(); 475 } 476 return excludeDatanodeManager.getExcludeDNs().entrySet().stream().map(e -> e.getKey().toString() 477 + " - " + e.getValue().getSecond() + " - " + e.getValue().getFirst()) 478 .collect(Collectors.toList()); 479 } 480 481 @Override 482 public long getNumWALSlowAppend() { 483 return metricsWALSource.getSlowAppendCount(); 484 } 485 486 @Override 487 public long getNumStoreFiles() { 488 return aggregate.numStoreFiles; 489 } 490 491 @Override 492 public long getMaxStoreFiles() { 493 return aggregate.maxStoreFileCount; 494 } 495 496 @Override 497 public long getMaxStoreFileAge() { 498 return aggregate.maxStoreFileAge; 499 } 500 501 @Override 502 public long getMinStoreFileAge() { 503 return aggregate.minStoreFileAge; 504 } 505 506 @Override 507 public long getAvgStoreFileAge() { 508 return aggregate.avgStoreFileAge; 509 } 510 511 @Override 512 public long getNumReferenceFiles() { 513 return aggregate.numReferenceFiles; 514 } 515 516 @Override 517 public long getMemStoreSize() { 518 return aggregate.memstoreSize; 519 } 520 521 @Override 522 public long getOnHeapMemStoreSize() { 523 return aggregate.onHeapMemstoreSize; 524 } 525 526 @Override 527 public long getOffHeapMemStoreSize() { 528 return aggregate.offHeapMemstoreSize; 529 } 530 531 @Override 532 public long getStoreFileSize() { 533 return aggregate.storeFileSize; 534 } 535 536 @Override 537 public double getStoreFileSizeGrowthRate() { 538 return aggregate.storeFileSizeGrowthRate; 539 } 540 541 @Override 542 public double getRequestsPerSecond() { 543 return aggregate.requestsPerSecond; 544 } 545 546 @Override 547 public long getReadRequestsCount() { 548 return aggregate.readRequestsCount; 549 } 550 551 @Override 552 public long getCpRequestsCount() { 553 return aggregate.cpRequestsCount; 554 } 555 556 @Override 557 public double getReadRequestsRatePerSecond() { 558 return aggregate.readRequestsRatePerSecond; 559 } 560 561 @Override 562 public long getFilteredReadRequestsCount() { 563 return aggregate.filteredReadRequestsCount; 564 } 565 566 @Override 567 public long getWriteRequestsCount() { 568 return aggregate.writeRequestsCount; 569 } 570 571 @Override 572 public double getWriteRequestsRatePerSecond() { 573 return aggregate.writeRequestsRatePerSecond; 574 } 575 576 @Override 577 public long getRpcGetRequestsCount() { 578 return regionServer.getRpcServices().rpcGetRequestCount.sum(); 579 } 580 581 @Override 582 public long getRpcScanRequestsCount() { 583 return regionServer.getRpcServices().rpcScanRequestCount.sum(); 584 } 585 586 @Override 587 public long getRpcFullScanRequestsCount() { 588 return regionServer.getRpcServices().rpcFullScanRequestCount.sum(); 589 } 590 591 @Override 592 public long getRpcMultiRequestsCount() { 593 return regionServer.getRpcServices().rpcMultiRequestCount.sum(); 594 } 595 596 @Override 597 public long getRpcMutateRequestsCount() { 598 return regionServer.getRpcServices().rpcMutateRequestCount.sum(); 599 } 600 601 @Override 602 public long getCheckAndMutateChecksFailed() { 603 return aggregate.checkAndMutateChecksFailed; 604 } 605 606 @Override 607 public long getCheckAndMutateChecksPassed() { 608 return aggregate.checkAndMutateChecksPassed; 609 } 610 611 @Override 612 public long getStoreFileIndexSize() { 613 return aggregate.storefileIndexSize; 614 } 615 616 @Override 617 public long getTotalStaticIndexSize() { 618 return aggregate.totalStaticIndexSize; 619 } 620 621 @Override 622 public long getTotalStaticBloomSize() { 623 return aggregate.totalStaticBloomSize; 624 } 625 626 @Override 627 public long getBloomFilterRequestsCount() { 628 return aggregate.bloomFilterRequestsCount; 629 } 630 631 @Override 632 public long getBloomFilterNegativeResultsCount() { 633 return aggregate.bloomFilterNegativeResultsCount; 634 } 635 636 @Override 637 public long getBloomFilterEligibleRequestsCount() { 638 return aggregate.bloomFilterEligibleRequestsCount; 639 } 640 641 @Override 642 public long getNumMutationsWithoutWAL() { 643 return aggregate.numMutationsWithoutWAL; 644 } 645 646 @Override 647 public long getDataInMemoryWithoutWAL() { 648 return aggregate.dataInMemoryWithoutWAL; 649 } 650 651 @Override 652 public double getPercentFileLocal() { 653 return aggregate.percentFileLocal; 654 } 655 656 @Override 657 public double getPercentFileLocalPrimaryRegions() { 658 return aggregate.percentFileLocalPrimaryRegions; 659 } 660 661 @Override 662 public double getPercentFileLocalSecondaryRegions() { 663 return aggregate.percentFileLocalSecondaryRegions; 664 } 665 666 @Override 667 public long getUpdatesBlockedTime() { 668 if (this.regionServer.getMemStoreFlusher() == null) { 669 return 0; 670 } 671 return this.regionServer.getMemStoreFlusher().getUpdatesBlockedMsHighWater().sum(); 672 } 673 674 @Override 675 public long getFlushedCellsCount() { 676 return aggregate.flushedCellsCount; 677 } 678 679 @Override 680 public long getCompactedCellsCount() { 681 return aggregate.compactedCellsCount; 682 } 683 684 @Override 685 public long getMajorCompactedCellsCount() { 686 return aggregate.majorCompactedCellsCount; 687 } 688 689 @Override 690 public long getFlushedCellsSize() { 691 return aggregate.flushedCellsSize; 692 } 693 694 @Override 695 public long getCompactedCellsSize() { 696 return aggregate.compactedCellsSize; 697 } 698 699 @Override 700 public long getMajorCompactedCellsSize() { 701 return aggregate.majorCompactedCellsSize; 702 } 703 704 @Override 705 public long getCellsCountCompactedFromMob() { 706 return aggregate.cellsCountCompactedFromMob; 707 } 708 709 @Override 710 public long getCellsCountCompactedToMob() { 711 return aggregate.cellsCountCompactedToMob; 712 } 713 714 @Override 715 public long getCellsSizeCompactedFromMob() { 716 return aggregate.cellsSizeCompactedFromMob; 717 } 718 719 @Override 720 public long getCellsSizeCompactedToMob() { 721 return aggregate.cellsSizeCompactedToMob; 722 } 723 724 @Override 725 public long getMobFlushCount() { 726 return aggregate.mobFlushCount; 727 } 728 729 @Override 730 public long getMobFlushedCellsCount() { 731 return aggregate.mobFlushedCellsCount; 732 } 733 734 @Override 735 public long getMobFlushedCellsSize() { 736 return aggregate.mobFlushedCellsSize; 737 } 738 739 @Override 740 public long getMobScanCellsCount() { 741 return aggregate.mobScanCellsCount; 742 } 743 744 @Override 745 public long getMobScanCellsSize() { 746 return aggregate.mobScanCellsSize; 747 } 748 749 @Override 750 public long getMobFileCacheAccessCount() { 751 return mobFileCacheAccessCount; 752 } 753 754 @Override 755 public long getMobFileCacheMissCount() { 756 return mobFileCacheMissCount; 757 } 758 759 @Override 760 public long getMobFileCacheCount() { 761 return mobFileCacheCount; 762 } 763 764 @Override 765 public long getMobFileCacheEvictedCount() { 766 return mobFileCacheEvictedCount; 767 } 768 769 @Override 770 public double getMobFileCacheHitPercent() { 771 return mobFileCacheHitRatio * 100; 772 } 773 774 @Override 775 public int getActiveScanners() { 776 return regionServer.getRpcServices().getScannersCount(); 777 } 778 779 private static final class RegionMetricAggregate { 780 private long numStores = 0; 781 private long numStoreFiles = 0; 782 private long memstoreSize = 0; 783 private long onHeapMemstoreSize = 0; 784 private long offHeapMemstoreSize = 0; 785 private long storeFileSize = 0; 786 private double storeFileSizeGrowthRate = 0; 787 private long maxStoreFileCount = 0; 788 private long maxStoreFileAge = 0; 789 private long minStoreFileAge = Long.MAX_VALUE; 790 private long avgStoreFileAge = 0; 791 private long numReferenceFiles = 0; 792 793 private long cpRequestsCount = 0; 794 private double requestsPerSecond = 0.0; 795 private long readRequestsCount = 0; 796 private double readRequestsRatePerSecond = 0; 797 private long filteredReadRequestsCount = 0; 798 private long writeRequestsCount = 0; 799 private double writeRequestsRatePerSecond = 0; 800 private long checkAndMutateChecksFailed = 0; 801 private long checkAndMutateChecksPassed = 0; 802 private long storefileIndexSize = 0; 803 private long totalStaticIndexSize = 0; 804 private long totalStaticBloomSize = 0; 805 private long bloomFilterRequestsCount = 0; 806 private long bloomFilterNegativeResultsCount = 0; 807 private long bloomFilterEligibleRequestsCount = 0; 808 private long numMutationsWithoutWAL = 0; 809 private long dataInMemoryWithoutWAL = 0; 810 private double percentFileLocal = 0; 811 private double percentFileLocalPrimaryRegions = 0; 812 private double percentFileLocalSecondaryRegions = 0; 813 private long flushedCellsCount = 0; 814 private long compactedCellsCount = 0; 815 private long majorCompactedCellsCount = 0; 816 private long flushedCellsSize = 0; 817 private long compactedCellsSize = 0; 818 private long majorCompactedCellsSize = 0; 819 private long cellsCountCompactedToMob = 0; 820 private long cellsCountCompactedFromMob = 0; 821 private long cellsSizeCompactedToMob = 0; 822 private long cellsSizeCompactedFromMob = 0; 823 private long mobFlushCount = 0; 824 private long mobFlushedCellsCount = 0; 825 private long mobFlushedCellsSize = 0; 826 private long mobScanCellsCount = 0; 827 private long mobScanCellsSize = 0; 828 private long blockedRequestsCount = 0L; 829 private long averageRegionSize = 0L; 830 private long totalReadRequestsDelta = 0; 831 private long totalWriteRequestsDelta = 0; 832 833 private RegionMetricAggregate(RegionMetricAggregate other) { 834 if (other != null) { 835 requestsPerSecond = other.requestsPerSecond; 836 readRequestsRatePerSecond = other.readRequestsRatePerSecond; 837 writeRequestsRatePerSecond = other.writeRequestsRatePerSecond; 838 } 839 } 840 841 private void aggregate(HRegionServer regionServer, 842 Map<String, ArrayList<Long>> requestsCountCache) { 843 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); 844 HDFSBlocksDistribution hdfsBlocksDistributionPrimaryRegions = new HDFSBlocksDistribution(); 845 HDFSBlocksDistribution hdfsBlocksDistributionSecondaryRegions = new HDFSBlocksDistribution(); 846 847 long avgAgeNumerator = 0; 848 long numHFiles = 0; 849 int regionCount = 0; 850 851 for (HRegion r : regionServer.getOnlineRegionsLocalContext()) { 852 Deltas deltas = calculateReadWriteDeltas(r, requestsCountCache); 853 totalReadRequestsDelta += deltas.readRequestsCountDelta; 854 totalWriteRequestsDelta += deltas.writeRequestsCountDelta; 855 856 numMutationsWithoutWAL += r.getNumMutationsWithoutWAL(); 857 dataInMemoryWithoutWAL += r.getDataInMemoryWithoutWAL(); 858 cpRequestsCount += r.getCpRequestsCount(); 859 readRequestsCount += r.getReadRequestsCount(); 860 filteredReadRequestsCount += r.getFilteredReadRequestsCount(); 861 writeRequestsCount += r.getWriteRequestsCount(); 862 checkAndMutateChecksFailed += r.getCheckAndMutateChecksFailed(); 863 checkAndMutateChecksPassed += r.getCheckAndMutateChecksPassed(); 864 blockedRequestsCount += r.getBlockedRequestsCount(); 865 866 StoreFileStats storeFileStats = aggregateStores(r.getStores()); 867 numHFiles += storeFileStats.numHFiles; 868 avgAgeNumerator += storeFileStats.avgAgeNumerator; 869 870 HDFSBlocksDistribution distro = r.getHDFSBlocksDistribution(); 871 hdfsBlocksDistribution.add(distro); 872 if (r.getRegionInfo().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 873 hdfsBlocksDistributionPrimaryRegions.add(distro); 874 } 875 if (r.getRegionInfo().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 876 hdfsBlocksDistributionSecondaryRegions.add(distro); 877 } 878 879 regionCount++; 880 } 881 882 float localityIndex = 883 hdfsBlocksDistribution.getBlockLocalityIndex(regionServer.getServerName().getHostname()); 884 percentFileLocal = Double.isNaN(localityIndex) ? 0 : (localityIndex * 100); 885 886 float localityIndexPrimaryRegions = hdfsBlocksDistributionPrimaryRegions 887 .getBlockLocalityIndex(regionServer.getServerName().getHostname()); 888 percentFileLocalPrimaryRegions = 889 Double.isNaN(localityIndexPrimaryRegions) ? 0 : (localityIndexPrimaryRegions * 100); 890 891 float localityIndexSecondaryRegions = hdfsBlocksDistributionSecondaryRegions 892 .getBlockLocalityIndex(regionServer.getServerName().getHostname()); 893 percentFileLocalSecondaryRegions = 894 Double.isNaN(localityIndexSecondaryRegions) ? 0 : (localityIndexSecondaryRegions * 100); 895 896 if (regionCount > 0) { 897 averageRegionSize = (memstoreSize + storeFileSize) / regionCount; 898 } 899 900 // if there were no store files, we'll never have updated this with Math.min 901 // so set it to 0, which is a better value to display in case of no storefiles 902 if (minStoreFileAge == Long.MAX_VALUE) { 903 this.minStoreFileAge = 0; 904 } 905 906 if (numHFiles != 0) { 907 avgStoreFileAge = avgAgeNumerator / numHFiles; 908 } 909 } 910 911 private static final class Deltas { 912 private final long readRequestsCountDelta; 913 private final long writeRequestsCountDelta; 914 915 private Deltas(long readRequestsCountDelta, long writeRequestsCountDelta) { 916 this.readRequestsCountDelta = readRequestsCountDelta; 917 this.writeRequestsCountDelta = writeRequestsCountDelta; 918 } 919 } 920 921 private Deltas calculateReadWriteDeltas(HRegion r, 922 Map<String, ArrayList<Long>> requestsCountCache) { 923 String encodedRegionName = r.getRegionInfo().getEncodedName(); 924 long currentReadRequestsCount = r.getReadRequestsCount(); 925 long currentWriteRequestsCount = r.getWriteRequestsCount(); 926 if (requestsCountCache.containsKey(encodedRegionName)) { 927 long lastReadRequestsCount = requestsCountCache.get(encodedRegionName).get(0); 928 long lastWriteRequestsCount = requestsCountCache.get(encodedRegionName).get(1); 929 930 // Update cache for our next comparison 931 requestsCountCache.get(encodedRegionName).set(0, currentReadRequestsCount); 932 requestsCountCache.get(encodedRegionName).set(1, currentWriteRequestsCount); 933 934 long readRequestsDelta = currentReadRequestsCount - lastReadRequestsCount; 935 long writeRequestsDelta = currentWriteRequestsCount - lastWriteRequestsCount; 936 return new Deltas(readRequestsDelta, writeRequestsDelta); 937 } else { 938 // List[0] -> readRequestCount 939 // List[1] -> writeRequestCount 940 ArrayList<Long> requests = new ArrayList<Long>(2); 941 requests.add(currentReadRequestsCount); 942 requests.add(currentWriteRequestsCount); 943 requestsCountCache.put(encodedRegionName, requests); 944 return new Deltas(currentReadRequestsCount, currentWriteRequestsCount); 945 } 946 } 947 948 public void updateRates(long timeSinceLastRun, long expectedPeriod, long lastStoreFileSize) { 949 requestsPerSecond = 950 (totalReadRequestsDelta + totalWriteRequestsDelta) / (timeSinceLastRun / 1000.0); 951 952 double readRequestsRatePerMilliSecond = (double) totalReadRequestsDelta / expectedPeriod; 953 double writeRequestsRatePerMilliSecond = (double) totalWriteRequestsDelta / expectedPeriod; 954 955 readRequestsRatePerSecond = readRequestsRatePerMilliSecond * 1000.0; 956 writeRequestsRatePerSecond = writeRequestsRatePerMilliSecond * 1000.0; 957 958 long intervalStoreFileSize = storeFileSize - lastStoreFileSize; 959 storeFileSizeGrowthRate = (double) intervalStoreFileSize * 1000.0 / expectedPeriod; 960 } 961 962 private static final class StoreFileStats { 963 private final long numHFiles; 964 private final long avgAgeNumerator; 965 966 private StoreFileStats(long numHFiles, long avgAgeNumerator) { 967 this.numHFiles = numHFiles; 968 this.avgAgeNumerator = avgAgeNumerator; 969 } 970 } 971 972 private StoreFileStats aggregateStores(List<HStore> stores) { 973 numStores += stores.size(); 974 long numHFiles = 0; 975 long avgAgeNumerator = 0; 976 for (Store store : stores) { 977 numStoreFiles += store.getStorefilesCount(); 978 memstoreSize += store.getMemStoreSize().getDataSize(); 979 onHeapMemstoreSize += store.getMemStoreSize().getHeapSize(); 980 offHeapMemstoreSize += store.getMemStoreSize().getOffHeapSize(); 981 storeFileSize += store.getStorefilesSize(); 982 maxStoreFileCount = Math.max(maxStoreFileCount, store.getStorefilesCount()); 983 984 maxStoreFileAge = 985 Math.max(store.getMaxStoreFileAge().orElse(maxStoreFileAge), maxStoreFileAge); 986 minStoreFileAge = 987 Math.min(store.getMinStoreFileAge().orElse(minStoreFileAge), minStoreFileAge); 988 989 long storeHFiles = store.getNumHFiles(); 990 numHFiles += storeHFiles; 991 numReferenceFiles += store.getNumReferenceFiles(); 992 993 OptionalDouble storeAvgStoreFileAge = store.getAvgStoreFileAge(); 994 if (storeAvgStoreFileAge.isPresent()) { 995 avgAgeNumerator = 996 (long) (avgAgeNumerator + storeAvgStoreFileAge.getAsDouble() * storeHFiles); 997 } 998 999 storefileIndexSize += store.getStorefilesRootLevelIndexSize(); 1000 totalStaticBloomSize += store.getTotalStaticBloomSize(); 1001 totalStaticIndexSize += store.getTotalStaticIndexSize(); 1002 bloomFilterRequestsCount += store.getBloomFilterRequestsCount(); 1003 bloomFilterNegativeResultsCount += store.getBloomFilterNegativeResultsCount(); 1004 bloomFilterEligibleRequestsCount += store.getBloomFilterEligibleRequestsCount(); 1005 flushedCellsCount += store.getFlushedCellsCount(); 1006 compactedCellsCount += store.getCompactedCellsCount(); 1007 majorCompactedCellsCount += store.getMajorCompactedCellsCount(); 1008 flushedCellsSize += store.getFlushedCellsSize(); 1009 compactedCellsSize += store.getCompactedCellsSize(); 1010 majorCompactedCellsSize += store.getMajorCompactedCellsSize(); 1011 if (store instanceof HMobStore) { 1012 HMobStore mobStore = (HMobStore) store; 1013 cellsCountCompactedToMob += mobStore.getCellsCountCompactedToMob(); 1014 cellsCountCompactedFromMob += mobStore.getCellsCountCompactedFromMob(); 1015 cellsSizeCompactedToMob += mobStore.getCellsSizeCompactedToMob(); 1016 cellsSizeCompactedFromMob += mobStore.getCellsSizeCompactedFromMob(); 1017 mobFlushCount += mobStore.getMobFlushCount(); 1018 mobFlushedCellsCount += mobStore.getMobFlushedCellsCount(); 1019 mobFlushedCellsSize += mobStore.getMobFlushedCellsSize(); 1020 mobScanCellsCount += mobStore.getMobScanCellsCount(); 1021 mobScanCellsSize += mobStore.getMobScanCellsSize(); 1022 } 1023 } 1024 1025 return new StoreFileStats(numHFiles, avgAgeNumerator); 1026 } 1027 1028 } 1029 1030 /** 1031 * This is the runnable that will be executed on the executor every PERIOD number of seconds It 1032 * will take metrics/numbers from all of the regions and use them to compute point in time 1033 * metrics. 1034 */ 1035 public class RegionServerMetricsWrapperRunnable implements Runnable { 1036 1037 private long lastRan = 0; 1038 private long lastStoreFileSize = 0; 1039 1040 @Override 1041 synchronized public void run() { 1042 try { 1043 RegionMetricAggregate newVal = new RegionMetricAggregate(aggregate); 1044 newVal.aggregate(regionServer, requestsCountCache); 1045 1046 // Compute the number of requests per second 1047 long currentTime = EnvironmentEdgeManager.currentTime(); 1048 1049 // assume that it took PERIOD seconds to start the executor. 1050 // this is a guess but it's a pretty good one. 1051 if (lastRan == 0) { 1052 lastRan = currentTime - period; 1053 } 1054 1055 long timeSinceLastRun = currentTime - lastRan; 1056 // If we've time traveled keep the last requests per second. 1057 if (timeSinceLastRun > 0) { 1058 newVal.updateRates(timeSinceLastRun, period, lastStoreFileSize); 1059 } 1060 1061 aggregate = newVal; 1062 1063 List<WALProvider> providers = regionServer.getWalFactory().getAllWALProviders(); 1064 long numWALFilesTmp = 0; 1065 long walFileSizeTmp = 0; 1066 for (WALProvider provider : providers) { 1067 numWALFilesTmp += provider.getNumLogFiles(); 1068 walFileSizeTmp += provider.getLogFileSize(); 1069 } 1070 numWALFiles = numWALFilesTmp; 1071 walFileSize = walFileSizeTmp; 1072 1073 mobFileCacheAccessCount = mobFileCache != null ? mobFileCache.getAccessCount() : 0L; 1074 mobFileCacheMissCount = mobFileCache != null ? mobFileCache.getMissCount() : 0L; 1075 mobFileCacheHitRatio = mobFileCache != null ? mobFileCache.getHitRatio() : 0.0; 1076 if (Double.isNaN(mobFileCacheHitRatio)) { 1077 mobFileCacheHitRatio = 0.0; 1078 } 1079 mobFileCacheEvictedCount = mobFileCache != null ? mobFileCache.getEvictedFileCount() : 0L; 1080 mobFileCacheCount = mobFileCache != null ? mobFileCache.getCacheSize() : 0; 1081 1082 lastStoreFileSize = aggregate.storeFileSize; 1083 lastRan = currentTime; 1084 } catch (Throwable e) { 1085 LOG.warn("Caught exception! Will suppress and retry.", e); 1086 } 1087 } 1088 } 1089 1090 @Override 1091 public long getHedgedReadOps() { 1092 return this.dfsHedgedReadMetrics == null ? 0 : this.dfsHedgedReadMetrics.getHedgedReadOps(); 1093 } 1094 1095 @Override 1096 public long getHedgedReadWins() { 1097 return this.dfsHedgedReadMetrics == null ? 0 : this.dfsHedgedReadMetrics.getHedgedReadWins(); 1098 } 1099 1100 @Override 1101 public long getHedgedReadOpsInCurThread() { 1102 return this.dfsHedgedReadMetrics == null 1103 ? 0 1104 : this.dfsHedgedReadMetrics.getHedgedReadOpsInCurThread(); 1105 } 1106 1107 @Override 1108 public long getTotalBytesRead() { 1109 return FSDataInputStreamWrapper.getTotalBytesRead(); 1110 } 1111 1112 @Override 1113 public long getLocalBytesRead() { 1114 return FSDataInputStreamWrapper.getLocalBytesRead(); 1115 } 1116 1117 @Override 1118 public long getShortCircuitBytesRead() { 1119 return FSDataInputStreamWrapper.getShortCircuitBytesRead(); 1120 } 1121 1122 @Override 1123 public long getZeroCopyBytesRead() { 1124 return FSDataInputStreamWrapper.getZeroCopyBytesRead(); 1125 } 1126 1127 @Override 1128 public long getBlockedRequestsCount() { 1129 return aggregate.blockedRequestsCount; 1130 } 1131 1132 @Override 1133 public long getAverageRegionSize() { 1134 return aggregate.averageRegionSize; 1135 } 1136 1137 @Override 1138 public long getDataMissCount() { 1139 return this.cacheStats != null ? this.cacheStats.getDataMissCount() : 0L; 1140 } 1141 1142 @Override 1143 public long getLeafIndexMissCount() { 1144 return this.cacheStats != null ? this.cacheStats.getLeafIndexMissCount() : 0L; 1145 } 1146 1147 @Override 1148 public long getBloomChunkMissCount() { 1149 return this.cacheStats != null ? this.cacheStats.getBloomChunkMissCount() : 0L; 1150 } 1151 1152 @Override 1153 public long getMetaMissCount() { 1154 return this.cacheStats != null ? this.cacheStats.getMetaMissCount() : 0L; 1155 } 1156 1157 @Override 1158 public long getRootIndexMissCount() { 1159 return this.cacheStats != null ? this.cacheStats.getRootIndexMissCount() : 0L; 1160 } 1161 1162 @Override 1163 public long getIntermediateIndexMissCount() { 1164 return this.cacheStats != null ? this.cacheStats.getIntermediateIndexMissCount() : 0L; 1165 } 1166 1167 @Override 1168 public long getFileInfoMissCount() { 1169 return this.cacheStats != null ? this.cacheStats.getFileInfoMissCount() : 0L; 1170 } 1171 1172 @Override 1173 public long getGeneralBloomMetaMissCount() { 1174 return this.cacheStats != null ? this.cacheStats.getGeneralBloomMetaMissCount() : 0L; 1175 } 1176 1177 @Override 1178 public long getDeleteFamilyBloomMissCount() { 1179 return this.cacheStats != null ? this.cacheStats.getDeleteFamilyBloomMissCount() : 0L; 1180 } 1181 1182 @Override 1183 public long getTrailerMissCount() { 1184 return this.cacheStats != null ? this.cacheStats.getTrailerMissCount() : 0L; 1185 } 1186 1187 @Override 1188 public long getDataHitCount() { 1189 return this.cacheStats != null ? this.cacheStats.getDataHitCount() : 0L; 1190 } 1191 1192 @Override 1193 public long getLeafIndexHitCount() { 1194 return this.cacheStats != null ? this.cacheStats.getLeafIndexHitCount() : 0L; 1195 } 1196 1197 @Override 1198 public long getBloomChunkHitCount() { 1199 return this.cacheStats != null ? this.cacheStats.getBloomChunkHitCount() : 0L; 1200 } 1201 1202 @Override 1203 public long getMetaHitCount() { 1204 return this.cacheStats != null ? this.cacheStats.getMetaHitCount() : 0L; 1205 } 1206 1207 @Override 1208 public long getRootIndexHitCount() { 1209 return this.cacheStats != null ? this.cacheStats.getRootIndexHitCount() : 0L; 1210 } 1211 1212 @Override 1213 public long getIntermediateIndexHitCount() { 1214 return this.cacheStats != null ? this.cacheStats.getIntermediateIndexHitCount() : 0L; 1215 } 1216 1217 @Override 1218 public long getFileInfoHitCount() { 1219 return this.cacheStats != null ? this.cacheStats.getFileInfoHitCount() : 0L; 1220 } 1221 1222 @Override 1223 public long getGeneralBloomMetaHitCount() { 1224 return this.cacheStats != null ? this.cacheStats.getGeneralBloomMetaHitCount() : 0L; 1225 } 1226 1227 @Override 1228 public long getDeleteFamilyBloomHitCount() { 1229 return this.cacheStats != null ? this.cacheStats.getDeleteFamilyBloomHitCount() : 0L; 1230 } 1231 1232 @Override 1233 public long getTrailerHitCount() { 1234 return this.cacheStats != null ? this.cacheStats.getTrailerHitCount() : 0L; 1235 } 1236 1237 @Override 1238 public long getByteBuffAllocatorHeapAllocationBytes() { 1239 return ByteBuffAllocator.getHeapAllocationBytes(allocator, ByteBuffAllocator.HEAP); 1240 } 1241 1242 @Override 1243 public long getByteBuffAllocatorPoolAllocationBytes() { 1244 return this.allocator.getPoolAllocationBytes(); 1245 } 1246 1247 @Override 1248 public double getByteBuffAllocatorHeapAllocRatio() { 1249 return ByteBuffAllocator.getHeapAllocationRatio(allocator, ByteBuffAllocator.HEAP); 1250 } 1251 1252 @Override 1253 public long getByteBuffAllocatorTotalBufferCount() { 1254 return this.allocator.getTotalBufferCount(); 1255 } 1256 1257 @Override 1258 public long getByteBuffAllocatorUsedBufferCount() { 1259 return this.allocator.getUsedBufferCount(); 1260 } 1261 1262 // Visible for testing 1263 long getPeriod() { 1264 return period; 1265 } 1266}