001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.List; 025import java.util.Map; 026import java.util.OptionalDouble; 027import java.util.concurrent.ConcurrentHashMap; 028import java.util.concurrent.ScheduledExecutorService; 029import java.util.concurrent.TimeUnit; 030import java.util.stream.Collectors; 031import org.apache.commons.lang3.StringUtils; 032import org.apache.hadoop.hbase.CompatibilitySingletonFactory; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.HDFSBlocksDistribution; 035import org.apache.hadoop.hbase.ServerName; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.io.ByteBuffAllocator; 038import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 039import org.apache.hadoop.hbase.io.asyncfs.monitor.ExcludeDatanodeManager; 040import org.apache.hadoop.hbase.io.hfile.BlockCache; 041import org.apache.hadoop.hbase.io.hfile.CacheStats; 042import org.apache.hadoop.hbase.io.hfile.CombinedBlockCache; 043import org.apache.hadoop.hbase.mob.MobFileCache; 044import org.apache.hadoop.hbase.regionserver.wal.MetricsWALSource; 045import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 046import org.apache.hadoop.hbase.util.FSUtils; 047import org.apache.hadoop.hbase.wal.WALProvider; 048import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 049import org.apache.hadoop.hdfs.DFSHedgedReadMetrics; 050import org.apache.hadoop.metrics2.MetricsExecutor; 051import org.apache.yetus.audience.InterfaceAudience; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055/** 056 * Impl for exposing HRegionServer Information through Hadoop's metrics 2 system. 057 */ 058@InterfaceAudience.Private 059class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper { 060 061 private static final Logger LOG = LoggerFactory.getLogger(MetricsRegionServerWrapperImpl.class); 062 063 private final HRegionServer regionServer; 064 private final MetricsWALSource metricsWALSource; 065 private final ByteBuffAllocator allocator; 066 067 private BlockCache blockCache; 068 private BlockCache l1Cache = null; 069 private BlockCache l2Cache = null; 070 private MobFileCache mobFileCache; 071 private CacheStats cacheStats; 072 private CacheStats l1Stats = null; 073 private CacheStats l2Stats = null; 074 private volatile long numWALFiles = 0; 075 private volatile long walFileSize = 0; 076 private volatile long mobFileCacheAccessCount = 0; 077 private volatile long mobFileCacheMissCount = 0; 078 private volatile double mobFileCacheHitRatio = 0; 079 private volatile long mobFileCacheEvictedCount = 0; 080 private volatile long mobFileCacheCount = 0; 081 082 private volatile RegionMetricAggregate aggregate = new RegionMetricAggregate(null); 083 084 protected final Map<String, ArrayList<Long>> requestsCountCache = 085 new ConcurrentHashMap<String, ArrayList<Long>>(); 086 087 private ScheduledExecutorService executor; 088 private Runnable runnable; 089 private long period; 090 091 /** 092 * Can be null if not on hdfs. 093 */ 094 private DFSHedgedReadMetrics dfsHedgedReadMetrics; 095 096 private final ExcludeDatanodeManager excludeDatanodeManager; 097 098 public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) { 099 this.regionServer = regionServer; 100 initBlockCache(); 101 initMobFileCache(); 102 this.excludeDatanodeManager = this.regionServer.getWalFactory().getExcludeDatanodeManager(); 103 104 this.period = regionServer.getConfiguration().getLong(HConstants.REGIONSERVER_METRICS_PERIOD, 105 HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD); 106 107 this.executor = CompatibilitySingletonFactory.getInstance(MetricsExecutor.class).getExecutor(); 108 this.runnable = new RegionServerMetricsWrapperRunnable(); 109 this.executor.scheduleWithFixedDelay(this.runnable, this.period, this.period, 110 TimeUnit.MILLISECONDS); 111 this.metricsWALSource = CompatibilitySingletonFactory.getInstance(MetricsWALSource.class); 112 this.allocator = regionServer.getRpcServer().getByteBuffAllocator(); 113 114 try { 115 this.dfsHedgedReadMetrics = FSUtils.getDFSHedgedReadMetrics(regionServer.getConfiguration()); 116 } catch (IOException e) { 117 LOG.warn("Failed to get hedged metrics", e); 118 } 119 if (LOG.isInfoEnabled()) { 120 LOG.info("Computing regionserver metrics every " + this.period + " milliseconds"); 121 } 122 } 123 124 private void initBlockCache() { 125 this.blockCache = this.regionServer.getBlockCache().orElse(null); 126 this.cacheStats = this.blockCache != null ? this.blockCache.getStats() : null; 127 if (this.cacheStats != null) { 128 if (this.cacheStats instanceof CombinedBlockCache.CombinedCacheStats) { 129 l1Stats = ((CombinedBlockCache.CombinedCacheStats) this.cacheStats).getLruCacheStats(); 130 l2Stats = ((CombinedBlockCache.CombinedCacheStats) this.cacheStats).getBucketCacheStats(); 131 } else { 132 l1Stats = this.cacheStats; 133 } 134 } 135 if (this.blockCache != null) { 136 if (this.blockCache instanceof CombinedBlockCache) { 137 l1Cache = ((CombinedBlockCache) this.blockCache).getFirstLevelCache(); 138 l2Cache = ((CombinedBlockCache) this.blockCache).getSecondLevelCache(); 139 } else { 140 l1Cache = this.blockCache; 141 } 142 } 143 } 144 145 /** 146 * Initializes the mob file cache. 147 */ 148 private void initMobFileCache() { 149 this.mobFileCache = this.regionServer.getMobFileCache().orElse(null); 150 } 151 152 @Override 153 public String getClusterId() { 154 return regionServer.getClusterId(); 155 } 156 157 @Override 158 public long getStartCode() { 159 return regionServer.getStartcode(); 160 } 161 162 @Override 163 public String getZookeeperQuorum() { 164 ZKWatcher zk = regionServer.getZooKeeper(); 165 if (zk == null) { 166 return ""; 167 } 168 return zk.getQuorum(); 169 } 170 171 @Override 172 public String getCoprocessors() { 173 String[] coprocessors = regionServer.getRegionServerCoprocessors(); 174 if (coprocessors == null || coprocessors.length == 0) { 175 return ""; 176 } 177 return StringUtils.join(coprocessors, ", "); 178 } 179 180 @Override 181 public String getServerName() { 182 ServerName serverName = regionServer.getServerName(); 183 if (serverName == null) { 184 return ""; 185 } 186 return serverName.getServerName(); 187 } 188 189 @Override 190 public long getNumOnlineRegions() { 191 Collection<HRegion> onlineRegionsLocalContext = regionServer.getOnlineRegionsLocalContext(); 192 if (onlineRegionsLocalContext == null) { 193 return 0; 194 } 195 return onlineRegionsLocalContext.size(); 196 } 197 198 @Override 199 public long getTotalRequestCount() { 200 return regionServer.getRpcServices().requestCount.sum(); 201 } 202 203 @Override 204 public long getTotalRowActionRequestCount() { 205 return aggregate.readRequestsCount + aggregate.writeRequestsCount; 206 } 207 208 @Override 209 public int getSplitQueueSize() { 210 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 211 return compactSplit == null ? 0 : compactSplit.getSplitQueueSize(); 212 } 213 214 @Override 215 public int getCompactionQueueSize() { 216 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 217 return compactSplit == null ? 0 : compactSplit.getCompactionQueueSize(); 218 } 219 220 @Override 221 public int getSmallCompactionQueueSize() { 222 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 223 return compactSplit == null ? 0 : compactSplit.getSmallCompactionQueueSize(); 224 } 225 226 @Override 227 public int getLargeCompactionQueueSize() { 228 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 229 return compactSplit == null ? 0 : compactSplit.getLargeCompactionQueueSize(); 230 } 231 232 @Override 233 public int getFlushQueueSize() { 234 // If there is no flusher there should be no queue. 235 if (this.regionServer.getMemStoreFlusher() == null) { 236 return 0; 237 } 238 return this.regionServer.getMemStoreFlusher().getFlushQueueSize(); 239 } 240 241 @Override 242 public long getBlockCacheCount() { 243 return this.blockCache != null ? this.blockCache.getBlockCount() : 0L; 244 } 245 246 @Override 247 public long getBlockCacheDataBlockCount() { 248 return this.blockCache != null ? this.blockCache.getDataBlockCount() : 0L; 249 } 250 251 @Override 252 public long getMemStoreLimit() { 253 return this.regionServer.getRegionServerAccounting().getGlobalMemStoreLimit(); 254 } 255 256 @Override 257 public long getOnHeapMemStoreLimit() { 258 return this.regionServer.getRegionServerAccounting().getGlobalOnHeapMemStoreLimit(); 259 } 260 261 @Override 262 public long getOffHeapMemStoreLimit() { 263 return this.regionServer.getRegionServerAccounting().getGlobalOffHeapMemStoreLimit(); 264 } 265 266 @Override 267 public long getBlockCacheSize() { 268 return this.blockCache != null ? this.blockCache.getCurrentSize() : 0L; 269 } 270 271 @Override 272 public long getBlockCacheFreeSize() { 273 return this.blockCache != null ? this.blockCache.getFreeSize() : 0L; 274 } 275 276 @Override 277 public long getBlockCacheHitCount() { 278 return this.cacheStats != null ? this.cacheStats.getHitCount() : 0L; 279 } 280 281 @Override 282 public long getBlockCachePrimaryHitCount() { 283 return this.cacheStats != null ? this.cacheStats.getPrimaryHitCount() : 0L; 284 } 285 286 @Override 287 public long getBlockCacheHitCachingCount() { 288 return this.cacheStats != null ? this.cacheStats.getHitCachingCount() : 0L; 289 } 290 291 @Override 292 public long getBlockCacheMissCount() { 293 return this.cacheStats != null ? this.cacheStats.getMissCount() : 0L; 294 } 295 296 @Override 297 public long getBlockCachePrimaryMissCount() { 298 return this.cacheStats != null ? this.cacheStats.getPrimaryMissCount() : 0L; 299 } 300 301 @Override 302 public long getBlockCacheMissCachingCount() { 303 return this.cacheStats != null ? this.cacheStats.getMissCachingCount() : 0L; 304 } 305 306 @Override 307 public long getBlockCacheEvictedCount() { 308 return this.cacheStats != null ? this.cacheStats.getEvictedCount() : 0L; 309 } 310 311 @Override 312 public long getBlockCachePrimaryEvictedCount() { 313 return this.cacheStats != null ? this.cacheStats.getPrimaryEvictedCount() : 0L; 314 } 315 316 @Override 317 public double getBlockCacheHitPercent() { 318 double ratio = this.cacheStats != null ? this.cacheStats.getHitRatio() : 0.0; 319 if (Double.isNaN(ratio)) { 320 ratio = 0; 321 } 322 return (ratio * 100); 323 } 324 325 @Override 326 public double getBlockCacheHitCachingPercent() { 327 double ratio = this.cacheStats != null ? this.cacheStats.getHitCachingRatio() : 0.0; 328 if (Double.isNaN(ratio)) { 329 ratio = 0; 330 } 331 return (ratio * 100); 332 } 333 334 @Override 335 public long getBlockCacheFailedInsertions() { 336 return this.cacheStats != null ? this.cacheStats.getFailedInserts() : 0L; 337 } 338 339 public long getL1CacheSize() { 340 return this.l1Cache != null ? this.l1Cache.getCurrentSize() : 0L; 341 } 342 343 public long getL1CacheFreeSize() { 344 return this.l1Cache != null ? this.l1Cache.getFreeSize() : 0L; 345 } 346 347 public long getL1CacheCount() { 348 return this.l1Cache != null ? this.l1Cache.getBlockCount() : 0L; 349 } 350 351 public long getL1CacheEvictedCount() { 352 return this.l1Stats != null ? this.l1Stats.getEvictedCount() : 0L; 353 } 354 355 public long getL2CacheSize() { 356 return this.l2Cache != null ? this.l2Cache.getCurrentSize() : 0L; 357 } 358 359 public long getL2CacheFreeSize() { 360 return this.l2Cache != null ? this.l2Cache.getFreeSize() : 0L; 361 } 362 363 public long getL2CacheCount() { 364 return this.l2Cache != null ? this.l2Cache.getBlockCount() : 0L; 365 } 366 367 public long getL2CacheEvictedCount() { 368 return this.l2Stats != null ? this.l2Stats.getEvictedCount() : 0L; 369 } 370 371 @Override 372 public long getL1CacheHitCount() { 373 return this.l1Stats != null ? this.l1Stats.getHitCount() : 0L; 374 } 375 376 @Override 377 public long getL1CacheMissCount() { 378 return this.l1Stats != null ? this.l1Stats.getMissCount() : 0L; 379 } 380 381 @Override 382 public double getL1CacheHitRatio() { 383 return this.l1Stats != null ? this.l1Stats.getHitRatio() : 0.0; 384 } 385 386 @Override 387 public double getL1CacheMissRatio() { 388 return this.l1Stats != null ? this.l1Stats.getMissRatio() : 0.0; 389 } 390 391 @Override 392 public long getL2CacheHitCount() { 393 return this.l2Stats != null ? this.l2Stats.getHitCount() : 0L; 394 } 395 396 @Override 397 public long getL2CacheMissCount() { 398 return this.l2Stats != null ? this.l2Stats.getMissCount() : 0L; 399 } 400 401 @Override 402 public double getL2CacheHitRatio() { 403 return this.l2Stats != null ? this.l2Stats.getHitRatio() : 0.0; 404 } 405 406 @Override 407 public double getL2CacheMissRatio() { 408 return this.l2Stats != null ? this.l2Stats.getMissRatio() : 0.0; 409 } 410 411 @Override 412 public void forceRecompute() { 413 this.runnable.run(); 414 } 415 416 @Override 417 public long getNumStores() { 418 return aggregate.numStores; 419 } 420 421 @Override 422 public long getNumWALFiles() { 423 return numWALFiles; 424 } 425 426 @Override 427 public long getWALFileSize() { 428 return walFileSize; 429 } 430 431 @Override 432 public List<String> getWALExcludeDNs() { 433 if (excludeDatanodeManager == null) { 434 return Collections.emptyList(); 435 } 436 return excludeDatanodeManager.getExcludeDNs().entrySet().stream() 437 .map(e -> e.getKey().toString() + ", " + e.getValue()).collect(Collectors.toList()); 438 } 439 440 @Override 441 public long getNumWALSlowAppend() { 442 return metricsWALSource.getSlowAppendCount(); 443 } 444 445 @Override 446 public long getNumStoreFiles() { 447 return aggregate.numStoreFiles; 448 } 449 450 @Override 451 public long getMaxStoreFiles() { 452 return aggregate.maxStoreFileCount; 453 } 454 455 @Override 456 public long getMaxStoreFileAge() { 457 return aggregate.maxStoreFileAge; 458 } 459 460 @Override 461 public long getMinStoreFileAge() { 462 return aggregate.minStoreFileAge; 463 } 464 465 @Override 466 public long getAvgStoreFileAge() { 467 return aggregate.avgStoreFileAge; 468 } 469 470 @Override 471 public long getNumReferenceFiles() { 472 return aggregate.numReferenceFiles; 473 } 474 475 @Override 476 public long getMemStoreSize() { 477 return aggregate.memstoreSize; 478 } 479 480 @Override 481 public long getOnHeapMemStoreSize() { 482 return aggregate.onHeapMemstoreSize; 483 } 484 485 @Override 486 public long getOffHeapMemStoreSize() { 487 return aggregate.offHeapMemstoreSize; 488 } 489 490 @Override 491 public long getStoreFileSize() { 492 return aggregate.storeFileSize; 493 } 494 495 @Override 496 public double getStoreFileSizeGrowthRate() { 497 return aggregate.storeFileSizeGrowthRate; 498 } 499 500 @Override 501 public double getRequestsPerSecond() { 502 return aggregate.requestsPerSecond; 503 } 504 505 @Override 506 public long getReadRequestsCount() { 507 return aggregate.readRequestsCount; 508 } 509 510 @Override 511 public long getCpRequestsCount() { 512 return aggregate.cpRequestsCount; 513 } 514 515 @Override 516 public double getReadRequestsRatePerSecond() { 517 return aggregate.readRequestsRatePerSecond; 518 } 519 520 @Override 521 public long getFilteredReadRequestsCount() { 522 return aggregate.filteredReadRequestsCount; 523 } 524 525 @Override 526 public long getWriteRequestsCount() { 527 return aggregate.writeRequestsCount; 528 } 529 530 @Override 531 public double getWriteRequestsRatePerSecond() { 532 return aggregate.writeRequestsRatePerSecond; 533 } 534 535 @Override 536 public long getRpcGetRequestsCount() { 537 return regionServer.getRpcServices().rpcGetRequestCount.sum(); 538 } 539 540 @Override 541 public long getRpcScanRequestsCount() { 542 return regionServer.getRpcServices().rpcScanRequestCount.sum(); 543 } 544 545 @Override 546 public long getRpcFullScanRequestsCount() { 547 return regionServer.getRpcServices().rpcFullScanRequestCount.sum(); 548 } 549 550 @Override 551 public long getRpcMultiRequestsCount() { 552 return regionServer.getRpcServices().rpcMultiRequestCount.sum(); 553 } 554 555 @Override 556 public long getRpcMutateRequestsCount() { 557 return regionServer.getRpcServices().rpcMutateRequestCount.sum(); 558 } 559 560 @Override 561 public long getCheckAndMutateChecksFailed() { 562 return aggregate.checkAndMutateChecksFailed; 563 } 564 565 @Override 566 public long getCheckAndMutateChecksPassed() { 567 return aggregate.checkAndMutateChecksPassed; 568 } 569 570 @Override 571 public long getStoreFileIndexSize() { 572 return aggregate.storefileIndexSize; 573 } 574 575 @Override 576 public long getTotalStaticIndexSize() { 577 return aggregate.totalStaticIndexSize; 578 } 579 580 @Override 581 public long getTotalStaticBloomSize() { 582 return aggregate.totalStaticBloomSize; 583 } 584 585 @Override 586 public long getBloomFilterRequestsCount() { 587 return aggregate.bloomFilterRequestsCount; 588 } 589 590 @Override 591 public long getBloomFilterNegativeResultsCount() { 592 return aggregate.bloomFilterNegativeResultsCount; 593 } 594 595 @Override 596 public long getBloomFilterEligibleRequestsCount() { 597 return aggregate.bloomFilterEligibleRequestsCount; 598 } 599 600 @Override 601 public long getNumMutationsWithoutWAL() { 602 return aggregate.numMutationsWithoutWAL; 603 } 604 605 @Override 606 public long getDataInMemoryWithoutWAL() { 607 return aggregate.dataInMemoryWithoutWAL; 608 } 609 610 @Override 611 public double getPercentFileLocal() { 612 return aggregate.percentFileLocal; 613 } 614 615 @Override 616 public double getPercentFileLocalSecondaryRegions() { 617 return aggregate.percentFileLocalSecondaryRegions; 618 } 619 620 @Override 621 public long getUpdatesBlockedTime() { 622 if (this.regionServer.getMemStoreFlusher() == null) { 623 return 0; 624 } 625 return this.regionServer.getMemStoreFlusher().getUpdatesBlockedMsHighWater().sum(); 626 } 627 628 @Override 629 public long getFlushedCellsCount() { 630 return aggregate.flushedCellsCount; 631 } 632 633 @Override 634 public long getCompactedCellsCount() { 635 return aggregate.compactedCellsCount; 636 } 637 638 @Override 639 public long getMajorCompactedCellsCount() { 640 return aggregate.majorCompactedCellsCount; 641 } 642 643 @Override 644 public long getFlushedCellsSize() { 645 return aggregate.flushedCellsSize; 646 } 647 648 @Override 649 public long getCompactedCellsSize() { 650 return aggregate.compactedCellsSize; 651 } 652 653 @Override 654 public long getMajorCompactedCellsSize() { 655 return aggregate.majorCompactedCellsSize; 656 } 657 658 @Override 659 public long getCellsCountCompactedFromMob() { 660 return aggregate.cellsCountCompactedFromMob; 661 } 662 663 @Override 664 public long getCellsCountCompactedToMob() { 665 return aggregate.cellsCountCompactedToMob; 666 } 667 668 @Override 669 public long getCellsSizeCompactedFromMob() { 670 return aggregate.cellsSizeCompactedFromMob; 671 } 672 673 @Override 674 public long getCellsSizeCompactedToMob() { 675 return aggregate.cellsSizeCompactedToMob; 676 } 677 678 @Override 679 public long getMobFlushCount() { 680 return aggregate.mobFlushCount; 681 } 682 683 @Override 684 public long getMobFlushedCellsCount() { 685 return aggregate.mobFlushedCellsCount; 686 } 687 688 @Override 689 public long getMobFlushedCellsSize() { 690 return aggregate.mobFlushedCellsSize; 691 } 692 693 @Override 694 public long getMobScanCellsCount() { 695 return aggregate.mobScanCellsCount; 696 } 697 698 @Override 699 public long getMobScanCellsSize() { 700 return aggregate.mobScanCellsSize; 701 } 702 703 @Override 704 public long getMobFileCacheAccessCount() { 705 return mobFileCacheAccessCount; 706 } 707 708 @Override 709 public long getMobFileCacheMissCount() { 710 return mobFileCacheMissCount; 711 } 712 713 @Override 714 public long getMobFileCacheCount() { 715 return mobFileCacheCount; 716 } 717 718 @Override 719 public long getMobFileCacheEvictedCount() { 720 return mobFileCacheEvictedCount; 721 } 722 723 @Override 724 public double getMobFileCacheHitPercent() { 725 return mobFileCacheHitRatio * 100; 726 } 727 728 @Override 729 public int getActiveScanners() { 730 return regionServer.getRpcServices().getScannersCount(); 731 } 732 733 private static final class RegionMetricAggregate { 734 private long numStores = 0; 735 private long numStoreFiles = 0; 736 private long memstoreSize = 0; 737 private long onHeapMemstoreSize = 0; 738 private long offHeapMemstoreSize = 0; 739 private long storeFileSize = 0; 740 private double storeFileSizeGrowthRate = 0; 741 private long maxStoreFileCount = 0; 742 private long maxStoreFileAge = 0; 743 private long minStoreFileAge = Long.MAX_VALUE; 744 private long avgStoreFileAge = 0; 745 private long numReferenceFiles = 0; 746 747 private long cpRequestsCount = 0; 748 private double requestsPerSecond = 0.0; 749 private long readRequestsCount = 0; 750 private double readRequestsRatePerSecond = 0; 751 private long filteredReadRequestsCount = 0; 752 private long writeRequestsCount = 0; 753 private double writeRequestsRatePerSecond = 0; 754 private long checkAndMutateChecksFailed = 0; 755 private long checkAndMutateChecksPassed = 0; 756 private long storefileIndexSize = 0; 757 private long totalStaticIndexSize = 0; 758 private long totalStaticBloomSize = 0; 759 private long bloomFilterRequestsCount = 0; 760 private long bloomFilterNegativeResultsCount = 0; 761 private long bloomFilterEligibleRequestsCount = 0; 762 private long numMutationsWithoutWAL = 0; 763 private long dataInMemoryWithoutWAL = 0; 764 private double percentFileLocal = 0; 765 private double percentFileLocalSecondaryRegions = 0; 766 private long flushedCellsCount = 0; 767 private long compactedCellsCount = 0; 768 private long majorCompactedCellsCount = 0; 769 private long flushedCellsSize = 0; 770 private long compactedCellsSize = 0; 771 private long majorCompactedCellsSize = 0; 772 private long cellsCountCompactedToMob = 0; 773 private long cellsCountCompactedFromMob = 0; 774 private long cellsSizeCompactedToMob = 0; 775 private long cellsSizeCompactedFromMob = 0; 776 private long mobFlushCount = 0; 777 private long mobFlushedCellsCount = 0; 778 private long mobFlushedCellsSize = 0; 779 private long mobScanCellsCount = 0; 780 private long mobScanCellsSize = 0; 781 private long blockedRequestsCount = 0L; 782 private long averageRegionSize = 0L; 783 private long totalReadRequestsDelta = 0; 784 private long totalWriteRequestsDelta = 0; 785 786 private RegionMetricAggregate(RegionMetricAggregate other) { 787 if (other != null) { 788 requestsPerSecond = other.requestsPerSecond; 789 readRequestsRatePerSecond = other.readRequestsRatePerSecond; 790 writeRequestsRatePerSecond = other.writeRequestsRatePerSecond; 791 } 792 } 793 794 private void aggregate(HRegionServer regionServer, 795 Map<String, ArrayList<Long>> requestsCountCache) { 796 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); 797 HDFSBlocksDistribution hdfsBlocksDistributionSecondaryRegions = new HDFSBlocksDistribution(); 798 799 long avgAgeNumerator = 0; 800 long numHFiles = 0; 801 int regionCount = 0; 802 803 for (HRegion r : regionServer.getOnlineRegionsLocalContext()) { 804 Deltas deltas = calculateReadWriteDeltas(r, requestsCountCache); 805 totalReadRequestsDelta += deltas.readRequestsCountDelta; 806 totalWriteRequestsDelta += deltas.writeRequestsCountDelta; 807 808 numMutationsWithoutWAL += r.getNumMutationsWithoutWAL(); 809 dataInMemoryWithoutWAL += r.getDataInMemoryWithoutWAL(); 810 cpRequestsCount += r.getCpRequestsCount(); 811 readRequestsCount += r.getReadRequestsCount(); 812 filteredReadRequestsCount += r.getFilteredReadRequestsCount(); 813 writeRequestsCount += r.getWriteRequestsCount(); 814 checkAndMutateChecksFailed += r.getCheckAndMutateChecksFailed(); 815 checkAndMutateChecksPassed += r.getCheckAndMutateChecksPassed(); 816 blockedRequestsCount += r.getBlockedRequestsCount(); 817 818 StoreFileStats storeFileStats = aggregateStores(r.getStores()); 819 numHFiles += storeFileStats.numHFiles; 820 avgAgeNumerator += storeFileStats.avgAgeNumerator; 821 822 HDFSBlocksDistribution distro = r.getHDFSBlocksDistribution(); 823 hdfsBlocksDistribution.add(distro); 824 if (r.getRegionInfo().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 825 hdfsBlocksDistributionSecondaryRegions.add(distro); 826 } 827 828 regionCount++; 829 } 830 831 float localityIndex = 832 hdfsBlocksDistribution.getBlockLocalityIndex(regionServer.getServerName().getHostname()); 833 percentFileLocal = Double.isNaN(localityIndex) ? 0 : (localityIndex * 100); 834 835 float localityIndexSecondaryRegions = hdfsBlocksDistributionSecondaryRegions 836 .getBlockLocalityIndex(regionServer.getServerName().getHostname()); 837 percentFileLocalSecondaryRegions = 838 Double.isNaN(localityIndexSecondaryRegions) ? 0 : (localityIndexSecondaryRegions * 100); 839 840 if (regionCount > 0) { 841 averageRegionSize = (memstoreSize + storeFileSize) / regionCount; 842 } 843 844 // if there were no store files, we'll never have updated this with Math.min 845 // so set it to 0, which is a better value to display in case of no storefiles 846 if (minStoreFileAge == Long.MAX_VALUE) { 847 this.minStoreFileAge = 0; 848 } 849 850 if (numHFiles != 0) { 851 avgStoreFileAge = avgAgeNumerator / numHFiles; 852 } 853 } 854 855 private static final class Deltas { 856 private final long readRequestsCountDelta; 857 private final long writeRequestsCountDelta; 858 859 private Deltas(long readRequestsCountDelta, long writeRequestsCountDelta) { 860 this.readRequestsCountDelta = readRequestsCountDelta; 861 this.writeRequestsCountDelta = writeRequestsCountDelta; 862 } 863 } 864 865 private Deltas calculateReadWriteDeltas(HRegion r, 866 Map<String, ArrayList<Long>> requestsCountCache) { 867 String encodedRegionName = r.getRegionInfo().getEncodedName(); 868 long currentReadRequestsCount = r.getReadRequestsCount(); 869 long currentWriteRequestsCount = r.getWriteRequestsCount(); 870 if (requestsCountCache.containsKey(encodedRegionName)) { 871 long lastReadRequestsCount = requestsCountCache.get(encodedRegionName).get(0); 872 long lastWriteRequestsCount = requestsCountCache.get(encodedRegionName).get(1); 873 874 // Update cache for our next comparison 875 requestsCountCache.get(encodedRegionName).set(0, currentReadRequestsCount); 876 requestsCountCache.get(encodedRegionName).set(1, currentWriteRequestsCount); 877 878 long readRequestsDelta = currentReadRequestsCount - lastReadRequestsCount; 879 long writeRequestsDelta = currentWriteRequestsCount - lastWriteRequestsCount; 880 return new Deltas(readRequestsDelta, writeRequestsDelta); 881 } else { 882 // List[0] -> readRequestCount 883 // List[1] -> writeRequestCount 884 ArrayList<Long> requests = new ArrayList<Long>(2); 885 requests.add(currentReadRequestsCount); 886 requests.add(currentWriteRequestsCount); 887 requestsCountCache.put(encodedRegionName, requests); 888 return new Deltas(currentReadRequestsCount, currentWriteRequestsCount); 889 } 890 } 891 892 public void updateRates(long timeSinceLastRun, long expectedPeriod, long lastStoreFileSize) { 893 requestsPerSecond = 894 (totalReadRequestsDelta + totalWriteRequestsDelta) / (timeSinceLastRun / 1000.0); 895 896 double readRequestsRatePerMilliSecond = (double) totalReadRequestsDelta / expectedPeriod; 897 double writeRequestsRatePerMilliSecond = (double) totalWriteRequestsDelta / expectedPeriod; 898 899 readRequestsRatePerSecond = readRequestsRatePerMilliSecond * 1000.0; 900 writeRequestsRatePerSecond = writeRequestsRatePerMilliSecond * 1000.0; 901 902 long intervalStoreFileSize = storeFileSize - lastStoreFileSize; 903 storeFileSizeGrowthRate = (double) intervalStoreFileSize * 1000.0 / expectedPeriod; 904 } 905 906 private static final class StoreFileStats { 907 private final long numHFiles; 908 private final long avgAgeNumerator; 909 910 private StoreFileStats(long numHFiles, long avgAgeNumerator) { 911 this.numHFiles = numHFiles; 912 this.avgAgeNumerator = avgAgeNumerator; 913 } 914 } 915 916 private StoreFileStats aggregateStores(List<HStore> stores) { 917 numStores += stores.size(); 918 long numHFiles = 0; 919 long avgAgeNumerator = 0; 920 for (Store store : stores) { 921 numStoreFiles += store.getStorefilesCount(); 922 memstoreSize += store.getMemStoreSize().getDataSize(); 923 onHeapMemstoreSize += store.getMemStoreSize().getHeapSize(); 924 offHeapMemstoreSize += store.getMemStoreSize().getOffHeapSize(); 925 storeFileSize += store.getStorefilesSize(); 926 maxStoreFileCount = Math.max(maxStoreFileCount, store.getStorefilesCount()); 927 928 maxStoreFileAge = 929 Math.max(store.getMaxStoreFileAge().orElse(maxStoreFileAge), maxStoreFileAge); 930 minStoreFileAge = 931 Math.min(store.getMinStoreFileAge().orElse(minStoreFileAge), minStoreFileAge); 932 933 long storeHFiles = store.getNumHFiles(); 934 numHFiles += storeHFiles; 935 numReferenceFiles += store.getNumReferenceFiles(); 936 937 OptionalDouble storeAvgStoreFileAge = store.getAvgStoreFileAge(); 938 if (storeAvgStoreFileAge.isPresent()) { 939 avgAgeNumerator = 940 (long) (avgAgeNumerator + storeAvgStoreFileAge.getAsDouble() * storeHFiles); 941 } 942 943 storefileIndexSize += store.getStorefilesRootLevelIndexSize(); 944 totalStaticBloomSize += store.getTotalStaticBloomSize(); 945 totalStaticIndexSize += store.getTotalStaticIndexSize(); 946 bloomFilterRequestsCount += store.getBloomFilterRequestsCount(); 947 bloomFilterNegativeResultsCount += store.getBloomFilterNegativeResultsCount(); 948 bloomFilterEligibleRequestsCount += store.getBloomFilterEligibleRequestsCount(); 949 flushedCellsCount += store.getFlushedCellsCount(); 950 compactedCellsCount += store.getCompactedCellsCount(); 951 majorCompactedCellsCount += store.getMajorCompactedCellsCount(); 952 flushedCellsSize += store.getFlushedCellsSize(); 953 compactedCellsSize += store.getCompactedCellsSize(); 954 majorCompactedCellsSize += store.getMajorCompactedCellsSize(); 955 if (store instanceof HMobStore) { 956 HMobStore mobStore = (HMobStore) store; 957 cellsCountCompactedToMob += mobStore.getCellsCountCompactedToMob(); 958 cellsCountCompactedFromMob += mobStore.getCellsCountCompactedFromMob(); 959 cellsSizeCompactedToMob += mobStore.getCellsSizeCompactedToMob(); 960 cellsSizeCompactedFromMob += mobStore.getCellsSizeCompactedFromMob(); 961 mobFlushCount += mobStore.getMobFlushCount(); 962 mobFlushedCellsCount += mobStore.getMobFlushedCellsCount(); 963 mobFlushedCellsSize += mobStore.getMobFlushedCellsSize(); 964 mobScanCellsCount += mobStore.getMobScanCellsCount(); 965 mobScanCellsSize += mobStore.getMobScanCellsSize(); 966 } 967 } 968 969 return new StoreFileStats(numHFiles, avgAgeNumerator); 970 } 971 972 } 973 974 /** 975 * This is the runnable that will be executed on the executor every PERIOD number of seconds It 976 * will take metrics/numbers from all of the regions and use them to compute point in time 977 * metrics. 978 */ 979 public class RegionServerMetricsWrapperRunnable implements Runnable { 980 981 private long lastRan = 0; 982 private long lastStoreFileSize = 0; 983 984 @Override 985 synchronized public void run() { 986 try { 987 RegionMetricAggregate newVal = new RegionMetricAggregate(aggregate); 988 newVal.aggregate(regionServer, requestsCountCache); 989 990 // Compute the number of requests per second 991 long currentTime = EnvironmentEdgeManager.currentTime(); 992 993 // assume that it took PERIOD seconds to start the executor. 994 // this is a guess but it's a pretty good one. 995 if (lastRan == 0) { 996 lastRan = currentTime - period; 997 } 998 999 long timeSinceLastRun = currentTime - lastRan; 1000 // If we've time traveled keep the last requests per second. 1001 if (timeSinceLastRun > 0) { 1002 newVal.updateRates(timeSinceLastRun, period, lastStoreFileSize); 1003 } 1004 1005 aggregate = newVal; 1006 1007 List<WALProvider> providers = regionServer.getWalFactory().getAllWALProviders(); 1008 for (WALProvider provider : providers) { 1009 numWALFiles += provider.getNumLogFiles(); 1010 walFileSize += provider.getLogFileSize(); 1011 } 1012 1013 mobFileCacheAccessCount = mobFileCache != null ? mobFileCache.getAccessCount() : 0L; 1014 mobFileCacheMissCount = mobFileCache != null ? mobFileCache.getMissCount() : 0L; 1015 mobFileCacheHitRatio = mobFileCache != null ? mobFileCache.getHitRatio() : 0.0; 1016 if (Double.isNaN(mobFileCacheHitRatio)) { 1017 mobFileCacheHitRatio = 0.0; 1018 } 1019 mobFileCacheEvictedCount = mobFileCache != null ? mobFileCache.getEvictedFileCount() : 0L; 1020 mobFileCacheCount = mobFileCache != null ? mobFileCache.getCacheSize() : 0; 1021 1022 lastStoreFileSize = aggregate.storeFileSize; 1023 lastRan = currentTime; 1024 } catch (Throwable e) { 1025 LOG.warn("Caught exception! Will suppress and retry.", e); 1026 } 1027 } 1028 } 1029 1030 @Override 1031 public long getHedgedReadOps() { 1032 return this.dfsHedgedReadMetrics == null ? 0 : this.dfsHedgedReadMetrics.getHedgedReadOps(); 1033 } 1034 1035 @Override 1036 public long getHedgedReadWins() { 1037 return this.dfsHedgedReadMetrics == null ? 0 : this.dfsHedgedReadMetrics.getHedgedReadWins(); 1038 } 1039 1040 @Override 1041 public long getHedgedReadOpsInCurThread() { 1042 return this.dfsHedgedReadMetrics == null 1043 ? 0 1044 : this.dfsHedgedReadMetrics.getHedgedReadOpsInCurThread(); 1045 } 1046 1047 @Override 1048 public long getTotalBytesRead() { 1049 return FSDataInputStreamWrapper.getTotalBytesRead(); 1050 } 1051 1052 @Override 1053 public long getLocalBytesRead() { 1054 return FSDataInputStreamWrapper.getLocalBytesRead(); 1055 } 1056 1057 @Override 1058 public long getShortCircuitBytesRead() { 1059 return FSDataInputStreamWrapper.getShortCircuitBytesRead(); 1060 } 1061 1062 @Override 1063 public long getZeroCopyBytesRead() { 1064 return FSDataInputStreamWrapper.getZeroCopyBytesRead(); 1065 } 1066 1067 @Override 1068 public long getBlockedRequestsCount() { 1069 return aggregate.blockedRequestsCount; 1070 } 1071 1072 @Override 1073 public long getAverageRegionSize() { 1074 return aggregate.averageRegionSize; 1075 } 1076 1077 @Override 1078 public long getDataMissCount() { 1079 return this.cacheStats != null ? this.cacheStats.getDataMissCount() : 0L; 1080 } 1081 1082 @Override 1083 public long getLeafIndexMissCount() { 1084 return this.cacheStats != null ? this.cacheStats.getLeafIndexMissCount() : 0L; 1085 } 1086 1087 @Override 1088 public long getBloomChunkMissCount() { 1089 return this.cacheStats != null ? this.cacheStats.getBloomChunkMissCount() : 0L; 1090 } 1091 1092 @Override 1093 public long getMetaMissCount() { 1094 return this.cacheStats != null ? this.cacheStats.getMetaMissCount() : 0L; 1095 } 1096 1097 @Override 1098 public long getRootIndexMissCount() { 1099 return this.cacheStats != null ? this.cacheStats.getRootIndexMissCount() : 0L; 1100 } 1101 1102 @Override 1103 public long getIntermediateIndexMissCount() { 1104 return this.cacheStats != null ? this.cacheStats.getIntermediateIndexMissCount() : 0L; 1105 } 1106 1107 @Override 1108 public long getFileInfoMissCount() { 1109 return this.cacheStats != null ? this.cacheStats.getFileInfoMissCount() : 0L; 1110 } 1111 1112 @Override 1113 public long getGeneralBloomMetaMissCount() { 1114 return this.cacheStats != null ? this.cacheStats.getGeneralBloomMetaMissCount() : 0L; 1115 } 1116 1117 @Override 1118 public long getDeleteFamilyBloomMissCount() { 1119 return this.cacheStats != null ? this.cacheStats.getDeleteFamilyBloomMissCount() : 0L; 1120 } 1121 1122 @Override 1123 public long getTrailerMissCount() { 1124 return this.cacheStats != null ? this.cacheStats.getTrailerMissCount() : 0L; 1125 } 1126 1127 @Override 1128 public long getDataHitCount() { 1129 return this.cacheStats != null ? this.cacheStats.getDataHitCount() : 0L; 1130 } 1131 1132 @Override 1133 public long getLeafIndexHitCount() { 1134 return this.cacheStats != null ? this.cacheStats.getLeafIndexHitCount() : 0L; 1135 } 1136 1137 @Override 1138 public long getBloomChunkHitCount() { 1139 return this.cacheStats != null ? this.cacheStats.getBloomChunkHitCount() : 0L; 1140 } 1141 1142 @Override 1143 public long getMetaHitCount() { 1144 return this.cacheStats != null ? this.cacheStats.getMetaHitCount() : 0L; 1145 } 1146 1147 @Override 1148 public long getRootIndexHitCount() { 1149 return this.cacheStats != null ? this.cacheStats.getRootIndexHitCount() : 0L; 1150 } 1151 1152 @Override 1153 public long getIntermediateIndexHitCount() { 1154 return this.cacheStats != null ? this.cacheStats.getIntermediateIndexHitCount() : 0L; 1155 } 1156 1157 @Override 1158 public long getFileInfoHitCount() { 1159 return this.cacheStats != null ? this.cacheStats.getFileInfoHitCount() : 0L; 1160 } 1161 1162 @Override 1163 public long getGeneralBloomMetaHitCount() { 1164 return this.cacheStats != null ? this.cacheStats.getGeneralBloomMetaHitCount() : 0L; 1165 } 1166 1167 @Override 1168 public long getDeleteFamilyBloomHitCount() { 1169 return this.cacheStats != null ? this.cacheStats.getDeleteFamilyBloomHitCount() : 0L; 1170 } 1171 1172 @Override 1173 public long getTrailerHitCount() { 1174 return this.cacheStats != null ? this.cacheStats.getTrailerHitCount() : 0L; 1175 } 1176 1177 @Override 1178 public long getByteBuffAllocatorHeapAllocationBytes() { 1179 return ByteBuffAllocator.getHeapAllocationBytes(allocator, ByteBuffAllocator.HEAP); 1180 } 1181 1182 @Override 1183 public long getByteBuffAllocatorPoolAllocationBytes() { 1184 return this.allocator.getPoolAllocationBytes(); 1185 } 1186 1187 @Override 1188 public double getByteBuffAllocatorHeapAllocRatio() { 1189 return ByteBuffAllocator.getHeapAllocationRatio(allocator, ByteBuffAllocator.HEAP); 1190 } 1191 1192 @Override 1193 public long getByteBuffAllocatorTotalBufferCount() { 1194 return this.allocator.getTotalBufferCount(); 1195 } 1196 1197 @Override 1198 public long getByteBuffAllocatorUsedBufferCount() { 1199 return this.allocator.getUsedBufferCount(); 1200 } 1201 1202 // Visible for testing 1203 long getPeriod() { 1204 return period; 1205 } 1206}