001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.List; 025import java.util.Map; 026import java.util.OptionalDouble; 027import java.util.concurrent.ConcurrentHashMap; 028import java.util.concurrent.ScheduledExecutorService; 029import java.util.concurrent.TimeUnit; 030import java.util.stream.Collectors; 031import org.apache.commons.lang3.StringUtils; 032import org.apache.hadoop.hbase.CompatibilitySingletonFactory; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.HDFSBlocksDistribution; 035import org.apache.hadoop.hbase.ServerName; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.io.ByteBuffAllocator; 038import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 039import org.apache.hadoop.hbase.io.asyncfs.monitor.ExcludeDatanodeManager; 040import org.apache.hadoop.hbase.io.hfile.BlockCache; 041import org.apache.hadoop.hbase.io.hfile.CacheStats; 042import org.apache.hadoop.hbase.io.hfile.CombinedBlockCache; 043import org.apache.hadoop.hbase.mob.MobFileCache; 044import org.apache.hadoop.hbase.regionserver.wal.MetricsWALSource; 045import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 046import org.apache.hadoop.hbase.util.FSUtils; 047import org.apache.hadoop.hbase.wal.WALProvider; 048import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 049import org.apache.hadoop.hdfs.DFSHedgedReadMetrics; 050import org.apache.hadoop.metrics2.MetricsExecutor; 051import org.apache.yetus.audience.InterfaceAudience; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055/** 056 * Impl for exposing HRegionServer Information through Hadoop's metrics 2 system. 057 */ 058@InterfaceAudience.Private 059class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper { 060 061 private static final Logger LOG = LoggerFactory.getLogger(MetricsRegionServerWrapperImpl.class); 062 063 private final HRegionServer regionServer; 064 private final MetricsWALSource metricsWALSource; 065 private final ByteBuffAllocator allocator; 066 067 private BlockCache blockCache; 068 private BlockCache l1Cache = null; 069 private BlockCache l2Cache = null; 070 private MobFileCache mobFileCache; 071 private CacheStats cacheStats; 072 private CacheStats l1Stats = null; 073 private CacheStats l2Stats = null; 074 private volatile long numWALFiles = 0; 075 private volatile long walFileSize = 0; 076 private volatile long mobFileCacheAccessCount = 0; 077 private volatile long mobFileCacheMissCount = 0; 078 private volatile double mobFileCacheHitRatio = 0; 079 private volatile long mobFileCacheEvictedCount = 0; 080 private volatile long mobFileCacheCount = 0; 081 082 private volatile RegionMetricAggregate aggregate = new RegionMetricAggregate(null); 083 084 protected final Map<String, ArrayList<Long>> requestsCountCache = 085 new ConcurrentHashMap<String, ArrayList<Long>>(); 086 087 private ScheduledExecutorService executor; 088 private Runnable runnable; 089 private long period; 090 091 /** 092 * Can be null if not on hdfs. 093 */ 094 private DFSHedgedReadMetrics dfsHedgedReadMetrics; 095 096 private final ExcludeDatanodeManager excludeDatanodeManager; 097 098 public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) { 099 this.regionServer = regionServer; 100 initBlockCache(); 101 initMobFileCache(); 102 this.excludeDatanodeManager = this.regionServer.getWalFactory().getExcludeDatanodeManager(); 103 104 this.period = regionServer.getConfiguration().getLong(HConstants.REGIONSERVER_METRICS_PERIOD, 105 HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD); 106 107 this.executor = CompatibilitySingletonFactory.getInstance(MetricsExecutor.class).getExecutor(); 108 this.runnable = new RegionServerMetricsWrapperRunnable(); 109 this.executor.scheduleWithFixedDelay(this.runnable, this.period, this.period, 110 TimeUnit.MILLISECONDS); 111 this.metricsWALSource = CompatibilitySingletonFactory.getInstance(MetricsWALSource.class); 112 this.allocator = regionServer.getRpcServer().getByteBuffAllocator(); 113 114 try { 115 this.dfsHedgedReadMetrics = FSUtils.getDFSHedgedReadMetrics(regionServer.getConfiguration()); 116 } catch (IOException e) { 117 LOG.warn("Failed to get hedged metrics", e); 118 } 119 if (LOG.isInfoEnabled()) { 120 LOG.info("Computing regionserver metrics every " + this.period + " milliseconds"); 121 } 122 } 123 124 private void initBlockCache() { 125 this.blockCache = this.regionServer.getBlockCache().orElse(null); 126 this.cacheStats = this.blockCache != null ? this.blockCache.getStats() : null; 127 if (this.cacheStats != null) { 128 if (this.cacheStats instanceof CombinedBlockCache.CombinedCacheStats) { 129 l1Stats = ((CombinedBlockCache.CombinedCacheStats) this.cacheStats).getLruCacheStats(); 130 l2Stats = ((CombinedBlockCache.CombinedCacheStats) this.cacheStats).getBucketCacheStats(); 131 } else { 132 l1Stats = this.cacheStats; 133 } 134 } 135 if (this.blockCache != null) { 136 if (this.blockCache instanceof CombinedBlockCache) { 137 l1Cache = ((CombinedBlockCache) this.blockCache).getFirstLevelCache(); 138 l2Cache = ((CombinedBlockCache) this.blockCache).getSecondLevelCache(); 139 } else { 140 l1Cache = this.blockCache; 141 } 142 } 143 } 144 145 /** 146 * Initializes the mob file cache. 147 */ 148 private void initMobFileCache() { 149 this.mobFileCache = this.regionServer.getMobFileCache().orElse(null); 150 } 151 152 @Override 153 public String getClusterId() { 154 return regionServer.getClusterId(); 155 } 156 157 @Override 158 public long getStartCode() { 159 return regionServer.getStartcode(); 160 } 161 162 @Override 163 public String getZookeeperQuorum() { 164 ZKWatcher zk = regionServer.getZooKeeper(); 165 if (zk == null) { 166 return ""; 167 } 168 return zk.getQuorum(); 169 } 170 171 @Override 172 public String getCoprocessors() { 173 String[] coprocessors = regionServer.getRegionServerCoprocessors(); 174 if (coprocessors == null || coprocessors.length == 0) { 175 return ""; 176 } 177 return StringUtils.join(coprocessors, ", "); 178 } 179 180 @Override 181 public String getServerName() { 182 ServerName serverName = regionServer.getServerName(); 183 if (serverName == null) { 184 return ""; 185 } 186 return serverName.getServerName(); 187 } 188 189 @Override 190 public long getNumOnlineRegions() { 191 Collection<HRegion> onlineRegionsLocalContext = regionServer.getOnlineRegionsLocalContext(); 192 if (onlineRegionsLocalContext == null) { 193 return 0; 194 } 195 return onlineRegionsLocalContext.size(); 196 } 197 198 @Override 199 public long getTotalRequestCount() { 200 return regionServer.getRpcServices().requestCount.sum(); 201 } 202 203 @Override 204 public long getTotalRowActionRequestCount() { 205 return aggregate.readRequestsCount + aggregate.writeRequestsCount; 206 } 207 208 @Override 209 public int getSplitQueueSize() { 210 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 211 return compactSplit == null ? 0 : compactSplit.getSplitQueueSize(); 212 } 213 214 @Override 215 public int getCompactionQueueSize() { 216 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 217 return compactSplit == null ? 0 : compactSplit.getCompactionQueueSize(); 218 } 219 220 @Override 221 public int getSmallCompactionQueueSize() { 222 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 223 return compactSplit == null ? 0 : compactSplit.getSmallCompactionQueueSize(); 224 } 225 226 @Override 227 public int getLargeCompactionQueueSize() { 228 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 229 return compactSplit == null ? 0 : compactSplit.getLargeCompactionQueueSize(); 230 } 231 232 @Override 233 public int getFlushQueueSize() { 234 // If there is no flusher there should be no queue. 235 if (this.regionServer.getMemStoreFlusher() == null) { 236 return 0; 237 } 238 return this.regionServer.getMemStoreFlusher().getFlushQueueSize(); 239 } 240 241 @Override 242 public long getBlockCacheCount() { 243 return this.blockCache != null ? this.blockCache.getBlockCount() : 0L; 244 } 245 246 @Override 247 public long getBlockCacheDataBlockCount() { 248 return this.blockCache != null ? this.blockCache.getDataBlockCount() : 0L; 249 } 250 251 @Override 252 public long getMemStoreLimit() { 253 return this.regionServer.getRegionServerAccounting().getGlobalMemStoreLimit(); 254 } 255 256 @Override 257 public long getOnHeapMemStoreLimit() { 258 return this.regionServer.getRegionServerAccounting().getGlobalOnHeapMemStoreLimit(); 259 } 260 261 @Override 262 public long getOffHeapMemStoreLimit() { 263 return this.regionServer.getRegionServerAccounting().getGlobalOffHeapMemStoreLimit(); 264 } 265 266 @Override 267 public long getBlockCacheSize() { 268 return this.blockCache != null ? this.blockCache.getCurrentSize() : 0L; 269 } 270 271 @Override 272 public long getBlockCacheFreeSize() { 273 return this.blockCache != null ? this.blockCache.getFreeSize() : 0L; 274 } 275 276 @Override 277 public long getBlockCacheHitCount() { 278 return this.cacheStats != null ? this.cacheStats.getHitCount() : 0L; 279 } 280 281 @Override 282 public long getBlockCachePrimaryHitCount() { 283 return this.cacheStats != null ? this.cacheStats.getPrimaryHitCount() : 0L; 284 } 285 286 @Override 287 public long getBlockCacheHitCachingCount() { 288 return this.cacheStats != null ? this.cacheStats.getHitCachingCount() : 0L; 289 } 290 291 @Override 292 public long getBlockCacheMissCount() { 293 return this.cacheStats != null ? this.cacheStats.getMissCount() : 0L; 294 } 295 296 @Override 297 public long getBlockCachePrimaryMissCount() { 298 return this.cacheStats != null ? this.cacheStats.getPrimaryMissCount() : 0L; 299 } 300 301 @Override 302 public long getBlockCacheMissCachingCount() { 303 return this.cacheStats != null ? this.cacheStats.getMissCachingCount() : 0L; 304 } 305 306 @Override 307 public long getBlockCacheEvictedCount() { 308 return this.cacheStats != null ? this.cacheStats.getEvictedCount() : 0L; 309 } 310 311 @Override 312 public long getBlockCachePrimaryEvictedCount() { 313 return this.cacheStats != null ? this.cacheStats.getPrimaryEvictedCount() : 0L; 314 } 315 316 @Override 317 public double getBlockCacheHitPercent() { 318 double ratio = this.cacheStats != null ? this.cacheStats.getHitRatio() : 0.0; 319 if (Double.isNaN(ratio)) { 320 ratio = 0; 321 } 322 return (ratio * 100); 323 } 324 325 @Override 326 public double getBlockCacheHitCachingPercent() { 327 double ratio = this.cacheStats != null ? this.cacheStats.getHitCachingRatio() : 0.0; 328 if (Double.isNaN(ratio)) { 329 ratio = 0; 330 } 331 return (ratio * 100); 332 } 333 334 @Override 335 public long getBlockCacheFailedInsertions() { 336 return this.cacheStats != null ? this.cacheStats.getFailedInserts() : 0L; 337 } 338 339 public long getL1CacheSize() { 340 return this.l1Cache != null ? this.l1Cache.getCurrentSize() : 0L; 341 } 342 343 public long getL1CacheFreeSize() { 344 return this.l1Cache != null ? this.l1Cache.getFreeSize() : 0L; 345 } 346 347 public long getL1CacheCount() { 348 return this.l1Cache != null ? this.l1Cache.getBlockCount() : 0L; 349 } 350 351 public long getL1CacheEvictedCount() { 352 return this.l1Stats != null ? this.l1Stats.getEvictedCount() : 0L; 353 } 354 355 public long getL2CacheSize() { 356 return this.l2Cache != null ? this.l2Cache.getCurrentSize() : 0L; 357 } 358 359 public long getL2CacheFreeSize() { 360 return this.l2Cache != null ? this.l2Cache.getFreeSize() : 0L; 361 } 362 363 public long getL2CacheCount() { 364 return this.l2Cache != null ? this.l2Cache.getBlockCount() : 0L; 365 } 366 367 public long getL2CacheEvictedCount() { 368 return this.l2Stats != null ? this.l2Stats.getEvictedCount() : 0L; 369 } 370 371 @Override 372 public long getL1CacheHitCount() { 373 return this.l1Stats != null ? this.l1Stats.getHitCount() : 0L; 374 } 375 376 @Override 377 public long getL1CacheMissCount() { 378 return this.l1Stats != null ? this.l1Stats.getMissCount() : 0L; 379 } 380 381 @Override 382 public double getL1CacheHitRatio() { 383 return this.l1Stats != null ? this.l1Stats.getHitRatio() : 0.0; 384 } 385 386 @Override 387 public double getL1CacheMissRatio() { 388 return this.l1Stats != null ? this.l1Stats.getMissRatio() : 0.0; 389 } 390 391 @Override 392 public long getL2CacheHitCount() { 393 return this.l2Stats != null ? this.l2Stats.getHitCount() : 0L; 394 } 395 396 @Override 397 public long getL2CacheMissCount() { 398 return this.l2Stats != null ? this.l2Stats.getMissCount() : 0L; 399 } 400 401 @Override 402 public double getL2CacheHitRatio() { 403 return this.l2Stats != null ? this.l2Stats.getHitRatio() : 0.0; 404 } 405 406 @Override 407 public double getL2CacheMissRatio() { 408 return this.l2Stats != null ? this.l2Stats.getMissRatio() : 0.0; 409 } 410 411 @Override 412 public void forceRecompute() { 413 this.runnable.run(); 414 } 415 416 @Override 417 public long getNumStores() { 418 return aggregate.numStores; 419 } 420 421 @Override 422 public long getNumWALFiles() { 423 return numWALFiles; 424 } 425 426 @Override 427 public long getWALFileSize() { 428 return walFileSize; 429 } 430 431 @Override 432 public List<String> getWALExcludeDNs() { 433 if (excludeDatanodeManager == null) { 434 return Collections.emptyList(); 435 } 436 return excludeDatanodeManager.getExcludeDNs().entrySet().stream() 437 .map(e -> e.getKey().toString() + ", " + e.getValue()).collect(Collectors.toList()); 438 } 439 440 @Override 441 public long getNumWALSlowAppend() { 442 return metricsWALSource.getSlowAppendCount(); 443 } 444 445 @Override 446 public long getNumStoreFiles() { 447 return aggregate.numStoreFiles; 448 } 449 450 @Override 451 public long getMaxStoreFiles() { 452 return aggregate.maxStoreFileCount; 453 } 454 455 @Override 456 public long getMaxStoreFileAge() { 457 return aggregate.maxStoreFileAge; 458 } 459 460 @Override 461 public long getMinStoreFileAge() { 462 return aggregate.minStoreFileAge; 463 } 464 465 @Override 466 public long getAvgStoreFileAge() { 467 return aggregate.avgStoreFileAge; 468 } 469 470 @Override 471 public long getNumReferenceFiles() { 472 return aggregate.numReferenceFiles; 473 } 474 475 @Override 476 public long getMemStoreSize() { 477 return aggregate.memstoreSize; 478 } 479 480 @Override 481 public long getOnHeapMemStoreSize() { 482 return aggregate.onHeapMemstoreSize; 483 } 484 485 @Override 486 public long getOffHeapMemStoreSize() { 487 return aggregate.offHeapMemstoreSize; 488 } 489 490 @Override 491 public long getStoreFileSize() { 492 return aggregate.storeFileSize; 493 } 494 495 @Override 496 public double getStoreFileSizeGrowthRate() { 497 return aggregate.storeFileSizeGrowthRate; 498 } 499 500 @Override 501 public double getRequestsPerSecond() { 502 return aggregate.requestsPerSecond; 503 } 504 505 @Override 506 public long getReadRequestsCount() { 507 return aggregate.readRequestsCount; 508 } 509 510 @Override 511 public long getCpRequestsCount() { 512 return aggregate.cpRequestsCount; 513 } 514 515 @Override 516 public double getReadRequestsRatePerSecond() { 517 return aggregate.readRequestsRatePerSecond; 518 } 519 520 @Override 521 public long getFilteredReadRequestsCount() { 522 return aggregate.filteredReadRequestsCount; 523 } 524 525 @Override 526 public long getWriteRequestsCount() { 527 return aggregate.writeRequestsCount; 528 } 529 530 @Override 531 public double getWriteRequestsRatePerSecond() { 532 return aggregate.writeRequestsRatePerSecond; 533 } 534 535 @Override 536 public long getRpcGetRequestsCount() { 537 return regionServer.getRpcServices().rpcGetRequestCount.sum(); 538 } 539 540 @Override 541 public long getRpcScanRequestsCount() { 542 return regionServer.getRpcServices().rpcScanRequestCount.sum(); 543 } 544 545 @Override 546 public long getRpcFullScanRequestsCount() { 547 return regionServer.getRpcServices().rpcFullScanRequestCount.sum(); 548 } 549 550 @Override 551 public long getRpcMultiRequestsCount() { 552 return regionServer.getRpcServices().rpcMultiRequestCount.sum(); 553 } 554 555 @Override 556 public long getRpcMutateRequestsCount() { 557 return regionServer.getRpcServices().rpcMutateRequestCount.sum(); 558 } 559 560 @Override 561 public long getCheckAndMutateChecksFailed() { 562 return aggregate.checkAndMutateChecksFailed; 563 } 564 565 @Override 566 public long getCheckAndMutateChecksPassed() { 567 return aggregate.checkAndMutateChecksPassed; 568 } 569 570 @Override 571 public long getStoreFileIndexSize() { 572 return aggregate.storefileIndexSize; 573 } 574 575 @Override 576 public long getTotalStaticIndexSize() { 577 return aggregate.totalStaticIndexSize; 578 } 579 580 @Override 581 public long getTotalStaticBloomSize() { 582 return aggregate.totalStaticBloomSize; 583 } 584 585 @Override 586 public long getBloomFilterRequestsCount() { 587 return aggregate.bloomFilterRequestsCount; 588 } 589 590 @Override 591 public long getBloomFilterNegativeResultsCount() { 592 return aggregate.bloomFilterNegativeResultsCount; 593 } 594 595 @Override 596 public long getBloomFilterEligibleRequestsCount() { 597 return aggregate.bloomFilterEligibleRequestsCount; 598 } 599 600 @Override 601 public long getNumMutationsWithoutWAL() { 602 return aggregate.numMutationsWithoutWAL; 603 } 604 605 @Override 606 public long getDataInMemoryWithoutWAL() { 607 return aggregate.dataInMemoryWithoutWAL; 608 } 609 610 @Override 611 public double getPercentFileLocal() { 612 return aggregate.percentFileLocal; 613 } 614 615 @Override 616 public double getPercentFileLocalPrimaryRegions() { 617 return aggregate.percentFileLocalPrimaryRegions; 618 } 619 620 @Override 621 public double getPercentFileLocalSecondaryRegions() { 622 return aggregate.percentFileLocalSecondaryRegions; 623 } 624 625 @Override 626 public long getUpdatesBlockedTime() { 627 if (this.regionServer.getMemStoreFlusher() == null) { 628 return 0; 629 } 630 return this.regionServer.getMemStoreFlusher().getUpdatesBlockedMsHighWater().sum(); 631 } 632 633 @Override 634 public long getFlushedCellsCount() { 635 return aggregate.flushedCellsCount; 636 } 637 638 @Override 639 public long getCompactedCellsCount() { 640 return aggregate.compactedCellsCount; 641 } 642 643 @Override 644 public long getMajorCompactedCellsCount() { 645 return aggregate.majorCompactedCellsCount; 646 } 647 648 @Override 649 public long getFlushedCellsSize() { 650 return aggregate.flushedCellsSize; 651 } 652 653 @Override 654 public long getCompactedCellsSize() { 655 return aggregate.compactedCellsSize; 656 } 657 658 @Override 659 public long getMajorCompactedCellsSize() { 660 return aggregate.majorCompactedCellsSize; 661 } 662 663 @Override 664 public long getCellsCountCompactedFromMob() { 665 return aggregate.cellsCountCompactedFromMob; 666 } 667 668 @Override 669 public long getCellsCountCompactedToMob() { 670 return aggregate.cellsCountCompactedToMob; 671 } 672 673 @Override 674 public long getCellsSizeCompactedFromMob() { 675 return aggregate.cellsSizeCompactedFromMob; 676 } 677 678 @Override 679 public long getCellsSizeCompactedToMob() { 680 return aggregate.cellsSizeCompactedToMob; 681 } 682 683 @Override 684 public long getMobFlushCount() { 685 return aggregate.mobFlushCount; 686 } 687 688 @Override 689 public long getMobFlushedCellsCount() { 690 return aggregate.mobFlushedCellsCount; 691 } 692 693 @Override 694 public long getMobFlushedCellsSize() { 695 return aggregate.mobFlushedCellsSize; 696 } 697 698 @Override 699 public long getMobScanCellsCount() { 700 return aggregate.mobScanCellsCount; 701 } 702 703 @Override 704 public long getMobScanCellsSize() { 705 return aggregate.mobScanCellsSize; 706 } 707 708 @Override 709 public long getMobFileCacheAccessCount() { 710 return mobFileCacheAccessCount; 711 } 712 713 @Override 714 public long getMobFileCacheMissCount() { 715 return mobFileCacheMissCount; 716 } 717 718 @Override 719 public long getMobFileCacheCount() { 720 return mobFileCacheCount; 721 } 722 723 @Override 724 public long getMobFileCacheEvictedCount() { 725 return mobFileCacheEvictedCount; 726 } 727 728 @Override 729 public double getMobFileCacheHitPercent() { 730 return mobFileCacheHitRatio * 100; 731 } 732 733 @Override 734 public int getActiveScanners() { 735 return regionServer.getRpcServices().getScannersCount(); 736 } 737 738 private static final class RegionMetricAggregate { 739 private long numStores = 0; 740 private long numStoreFiles = 0; 741 private long memstoreSize = 0; 742 private long onHeapMemstoreSize = 0; 743 private long offHeapMemstoreSize = 0; 744 private long storeFileSize = 0; 745 private double storeFileSizeGrowthRate = 0; 746 private long maxStoreFileCount = 0; 747 private long maxStoreFileAge = 0; 748 private long minStoreFileAge = Long.MAX_VALUE; 749 private long avgStoreFileAge = 0; 750 private long numReferenceFiles = 0; 751 752 private long cpRequestsCount = 0; 753 private double requestsPerSecond = 0.0; 754 private long readRequestsCount = 0; 755 private double readRequestsRatePerSecond = 0; 756 private long filteredReadRequestsCount = 0; 757 private long writeRequestsCount = 0; 758 private double writeRequestsRatePerSecond = 0; 759 private long checkAndMutateChecksFailed = 0; 760 private long checkAndMutateChecksPassed = 0; 761 private long storefileIndexSize = 0; 762 private long totalStaticIndexSize = 0; 763 private long totalStaticBloomSize = 0; 764 private long bloomFilterRequestsCount = 0; 765 private long bloomFilterNegativeResultsCount = 0; 766 private long bloomFilterEligibleRequestsCount = 0; 767 private long numMutationsWithoutWAL = 0; 768 private long dataInMemoryWithoutWAL = 0; 769 private double percentFileLocal = 0; 770 private double percentFileLocalPrimaryRegions = 0; 771 private double percentFileLocalSecondaryRegions = 0; 772 private long flushedCellsCount = 0; 773 private long compactedCellsCount = 0; 774 private long majorCompactedCellsCount = 0; 775 private long flushedCellsSize = 0; 776 private long compactedCellsSize = 0; 777 private long majorCompactedCellsSize = 0; 778 private long cellsCountCompactedToMob = 0; 779 private long cellsCountCompactedFromMob = 0; 780 private long cellsSizeCompactedToMob = 0; 781 private long cellsSizeCompactedFromMob = 0; 782 private long mobFlushCount = 0; 783 private long mobFlushedCellsCount = 0; 784 private long mobFlushedCellsSize = 0; 785 private long mobScanCellsCount = 0; 786 private long mobScanCellsSize = 0; 787 private long blockedRequestsCount = 0L; 788 private long averageRegionSize = 0L; 789 private long totalReadRequestsDelta = 0; 790 private long totalWriteRequestsDelta = 0; 791 792 private RegionMetricAggregate(RegionMetricAggregate other) { 793 if (other != null) { 794 requestsPerSecond = other.requestsPerSecond; 795 readRequestsRatePerSecond = other.readRequestsRatePerSecond; 796 writeRequestsRatePerSecond = other.writeRequestsRatePerSecond; 797 } 798 } 799 800 private void aggregate(HRegionServer regionServer, 801 Map<String, ArrayList<Long>> requestsCountCache) { 802 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); 803 HDFSBlocksDistribution hdfsBlocksDistributionPrimaryRegions = new HDFSBlocksDistribution(); 804 HDFSBlocksDistribution hdfsBlocksDistributionSecondaryRegions = new HDFSBlocksDistribution(); 805 806 long avgAgeNumerator = 0; 807 long numHFiles = 0; 808 int regionCount = 0; 809 810 for (HRegion r : regionServer.getOnlineRegionsLocalContext()) { 811 Deltas deltas = calculateReadWriteDeltas(r, requestsCountCache); 812 totalReadRequestsDelta += deltas.readRequestsCountDelta; 813 totalWriteRequestsDelta += deltas.writeRequestsCountDelta; 814 815 numMutationsWithoutWAL += r.getNumMutationsWithoutWAL(); 816 dataInMemoryWithoutWAL += r.getDataInMemoryWithoutWAL(); 817 cpRequestsCount += r.getCpRequestsCount(); 818 readRequestsCount += r.getReadRequestsCount(); 819 filteredReadRequestsCount += r.getFilteredReadRequestsCount(); 820 writeRequestsCount += r.getWriteRequestsCount(); 821 checkAndMutateChecksFailed += r.getCheckAndMutateChecksFailed(); 822 checkAndMutateChecksPassed += r.getCheckAndMutateChecksPassed(); 823 blockedRequestsCount += r.getBlockedRequestsCount(); 824 825 StoreFileStats storeFileStats = aggregateStores(r.getStores()); 826 numHFiles += storeFileStats.numHFiles; 827 avgAgeNumerator += storeFileStats.avgAgeNumerator; 828 829 HDFSBlocksDistribution distro = r.getHDFSBlocksDistribution(); 830 hdfsBlocksDistribution.add(distro); 831 if (r.getRegionInfo().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 832 hdfsBlocksDistributionPrimaryRegions.add(distro); 833 } 834 if (r.getRegionInfo().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 835 hdfsBlocksDistributionSecondaryRegions.add(distro); 836 } 837 838 regionCount++; 839 } 840 841 float localityIndex = 842 hdfsBlocksDistribution.getBlockLocalityIndex(regionServer.getServerName().getHostname()); 843 percentFileLocal = Double.isNaN(localityIndex) ? 0 : (localityIndex * 100); 844 845 float localityIndexPrimaryRegions = hdfsBlocksDistributionPrimaryRegions 846 .getBlockLocalityIndex(regionServer.getServerName().getHostname()); 847 percentFileLocalPrimaryRegions = 848 Double.isNaN(localityIndexPrimaryRegions) ? 0 : (localityIndexPrimaryRegions * 100); 849 850 float localityIndexSecondaryRegions = hdfsBlocksDistributionSecondaryRegions 851 .getBlockLocalityIndex(regionServer.getServerName().getHostname()); 852 percentFileLocalSecondaryRegions = 853 Double.isNaN(localityIndexSecondaryRegions) ? 0 : (localityIndexSecondaryRegions * 100); 854 855 if (regionCount > 0) { 856 averageRegionSize = (memstoreSize + storeFileSize) / regionCount; 857 } 858 859 // if there were no store files, we'll never have updated this with Math.min 860 // so set it to 0, which is a better value to display in case of no storefiles 861 if (minStoreFileAge == Long.MAX_VALUE) { 862 this.minStoreFileAge = 0; 863 } 864 865 if (numHFiles != 0) { 866 avgStoreFileAge = avgAgeNumerator / numHFiles; 867 } 868 } 869 870 private static final class Deltas { 871 private final long readRequestsCountDelta; 872 private final long writeRequestsCountDelta; 873 874 private Deltas(long readRequestsCountDelta, long writeRequestsCountDelta) { 875 this.readRequestsCountDelta = readRequestsCountDelta; 876 this.writeRequestsCountDelta = writeRequestsCountDelta; 877 } 878 } 879 880 private Deltas calculateReadWriteDeltas(HRegion r, 881 Map<String, ArrayList<Long>> requestsCountCache) { 882 String encodedRegionName = r.getRegionInfo().getEncodedName(); 883 long currentReadRequestsCount = r.getReadRequestsCount(); 884 long currentWriteRequestsCount = r.getWriteRequestsCount(); 885 if (requestsCountCache.containsKey(encodedRegionName)) { 886 long lastReadRequestsCount = requestsCountCache.get(encodedRegionName).get(0); 887 long lastWriteRequestsCount = requestsCountCache.get(encodedRegionName).get(1); 888 889 // Update cache for our next comparison 890 requestsCountCache.get(encodedRegionName).set(0, currentReadRequestsCount); 891 requestsCountCache.get(encodedRegionName).set(1, currentWriteRequestsCount); 892 893 long readRequestsDelta = currentReadRequestsCount - lastReadRequestsCount; 894 long writeRequestsDelta = currentWriteRequestsCount - lastWriteRequestsCount; 895 return new Deltas(readRequestsDelta, writeRequestsDelta); 896 } else { 897 // List[0] -> readRequestCount 898 // List[1] -> writeRequestCount 899 ArrayList<Long> requests = new ArrayList<Long>(2); 900 requests.add(currentReadRequestsCount); 901 requests.add(currentWriteRequestsCount); 902 requestsCountCache.put(encodedRegionName, requests); 903 return new Deltas(currentReadRequestsCount, currentWriteRequestsCount); 904 } 905 } 906 907 public void updateRates(long timeSinceLastRun, long expectedPeriod, long lastStoreFileSize) { 908 requestsPerSecond = 909 (totalReadRequestsDelta + totalWriteRequestsDelta) / (timeSinceLastRun / 1000.0); 910 911 double readRequestsRatePerMilliSecond = (double) totalReadRequestsDelta / expectedPeriod; 912 double writeRequestsRatePerMilliSecond = (double) totalWriteRequestsDelta / expectedPeriod; 913 914 readRequestsRatePerSecond = readRequestsRatePerMilliSecond * 1000.0; 915 writeRequestsRatePerSecond = writeRequestsRatePerMilliSecond * 1000.0; 916 917 long intervalStoreFileSize = storeFileSize - lastStoreFileSize; 918 storeFileSizeGrowthRate = (double) intervalStoreFileSize * 1000.0 / expectedPeriod; 919 } 920 921 private static final class StoreFileStats { 922 private final long numHFiles; 923 private final long avgAgeNumerator; 924 925 private StoreFileStats(long numHFiles, long avgAgeNumerator) { 926 this.numHFiles = numHFiles; 927 this.avgAgeNumerator = avgAgeNumerator; 928 } 929 } 930 931 private StoreFileStats aggregateStores(List<HStore> stores) { 932 numStores += stores.size(); 933 long numHFiles = 0; 934 long avgAgeNumerator = 0; 935 for (Store store : stores) { 936 numStoreFiles += store.getStorefilesCount(); 937 memstoreSize += store.getMemStoreSize().getDataSize(); 938 onHeapMemstoreSize += store.getMemStoreSize().getHeapSize(); 939 offHeapMemstoreSize += store.getMemStoreSize().getOffHeapSize(); 940 storeFileSize += store.getStorefilesSize(); 941 maxStoreFileCount = Math.max(maxStoreFileCount, store.getStorefilesCount()); 942 943 maxStoreFileAge = 944 Math.max(store.getMaxStoreFileAge().orElse(maxStoreFileAge), maxStoreFileAge); 945 minStoreFileAge = 946 Math.min(store.getMinStoreFileAge().orElse(minStoreFileAge), minStoreFileAge); 947 948 long storeHFiles = store.getNumHFiles(); 949 numHFiles += storeHFiles; 950 numReferenceFiles += store.getNumReferenceFiles(); 951 952 OptionalDouble storeAvgStoreFileAge = store.getAvgStoreFileAge(); 953 if (storeAvgStoreFileAge.isPresent()) { 954 avgAgeNumerator = 955 (long) (avgAgeNumerator + storeAvgStoreFileAge.getAsDouble() * storeHFiles); 956 } 957 958 storefileIndexSize += store.getStorefilesRootLevelIndexSize(); 959 totalStaticBloomSize += store.getTotalStaticBloomSize(); 960 totalStaticIndexSize += store.getTotalStaticIndexSize(); 961 bloomFilterRequestsCount += store.getBloomFilterRequestsCount(); 962 bloomFilterNegativeResultsCount += store.getBloomFilterNegativeResultsCount(); 963 bloomFilterEligibleRequestsCount += store.getBloomFilterEligibleRequestsCount(); 964 flushedCellsCount += store.getFlushedCellsCount(); 965 compactedCellsCount += store.getCompactedCellsCount(); 966 majorCompactedCellsCount += store.getMajorCompactedCellsCount(); 967 flushedCellsSize += store.getFlushedCellsSize(); 968 compactedCellsSize += store.getCompactedCellsSize(); 969 majorCompactedCellsSize += store.getMajorCompactedCellsSize(); 970 if (store instanceof HMobStore) { 971 HMobStore mobStore = (HMobStore) store; 972 cellsCountCompactedToMob += mobStore.getCellsCountCompactedToMob(); 973 cellsCountCompactedFromMob += mobStore.getCellsCountCompactedFromMob(); 974 cellsSizeCompactedToMob += mobStore.getCellsSizeCompactedToMob(); 975 cellsSizeCompactedFromMob += mobStore.getCellsSizeCompactedFromMob(); 976 mobFlushCount += mobStore.getMobFlushCount(); 977 mobFlushedCellsCount += mobStore.getMobFlushedCellsCount(); 978 mobFlushedCellsSize += mobStore.getMobFlushedCellsSize(); 979 mobScanCellsCount += mobStore.getMobScanCellsCount(); 980 mobScanCellsSize += mobStore.getMobScanCellsSize(); 981 } 982 } 983 984 return new StoreFileStats(numHFiles, avgAgeNumerator); 985 } 986 987 } 988 989 /** 990 * This is the runnable that will be executed on the executor every PERIOD number of seconds It 991 * will take metrics/numbers from all of the regions and use them to compute point in time 992 * metrics. 993 */ 994 public class RegionServerMetricsWrapperRunnable implements Runnable { 995 996 private long lastRan = 0; 997 private long lastStoreFileSize = 0; 998 999 @Override 1000 synchronized public void run() { 1001 try { 1002 RegionMetricAggregate newVal = new RegionMetricAggregate(aggregate); 1003 newVal.aggregate(regionServer, requestsCountCache); 1004 1005 // Compute the number of requests per second 1006 long currentTime = EnvironmentEdgeManager.currentTime(); 1007 1008 // assume that it took PERIOD seconds to start the executor. 1009 // this is a guess but it's a pretty good one. 1010 if (lastRan == 0) { 1011 lastRan = currentTime - period; 1012 } 1013 1014 long timeSinceLastRun = currentTime - lastRan; 1015 // If we've time traveled keep the last requests per second. 1016 if (timeSinceLastRun > 0) { 1017 newVal.updateRates(timeSinceLastRun, period, lastStoreFileSize); 1018 } 1019 1020 aggregate = newVal; 1021 1022 List<WALProvider> providers = regionServer.getWalFactory().getAllWALProviders(); 1023 long numWALFilesTmp = 0; 1024 long walFileSizeTmp = 0; 1025 for (WALProvider provider : providers) { 1026 numWALFilesTmp += provider.getNumLogFiles(); 1027 walFileSizeTmp += provider.getLogFileSize(); 1028 } 1029 numWALFiles = numWALFilesTmp; 1030 walFileSize = walFileSizeTmp; 1031 1032 mobFileCacheAccessCount = mobFileCache != null ? mobFileCache.getAccessCount() : 0L; 1033 mobFileCacheMissCount = mobFileCache != null ? mobFileCache.getMissCount() : 0L; 1034 mobFileCacheHitRatio = mobFileCache != null ? mobFileCache.getHitRatio() : 0.0; 1035 if (Double.isNaN(mobFileCacheHitRatio)) { 1036 mobFileCacheHitRatio = 0.0; 1037 } 1038 mobFileCacheEvictedCount = mobFileCache != null ? mobFileCache.getEvictedFileCount() : 0L; 1039 mobFileCacheCount = mobFileCache != null ? mobFileCache.getCacheSize() : 0; 1040 1041 lastStoreFileSize = aggregate.storeFileSize; 1042 lastRan = currentTime; 1043 } catch (Throwable e) { 1044 LOG.warn("Caught exception! Will suppress and retry.", e); 1045 } 1046 } 1047 } 1048 1049 @Override 1050 public long getHedgedReadOps() { 1051 return this.dfsHedgedReadMetrics == null ? 0 : this.dfsHedgedReadMetrics.getHedgedReadOps(); 1052 } 1053 1054 @Override 1055 public long getHedgedReadWins() { 1056 return this.dfsHedgedReadMetrics == null ? 0 : this.dfsHedgedReadMetrics.getHedgedReadWins(); 1057 } 1058 1059 @Override 1060 public long getHedgedReadOpsInCurThread() { 1061 return this.dfsHedgedReadMetrics == null 1062 ? 0 1063 : this.dfsHedgedReadMetrics.getHedgedReadOpsInCurThread(); 1064 } 1065 1066 @Override 1067 public long getTotalBytesRead() { 1068 return FSDataInputStreamWrapper.getTotalBytesRead(); 1069 } 1070 1071 @Override 1072 public long getLocalBytesRead() { 1073 return FSDataInputStreamWrapper.getLocalBytesRead(); 1074 } 1075 1076 @Override 1077 public long getShortCircuitBytesRead() { 1078 return FSDataInputStreamWrapper.getShortCircuitBytesRead(); 1079 } 1080 1081 @Override 1082 public long getZeroCopyBytesRead() { 1083 return FSDataInputStreamWrapper.getZeroCopyBytesRead(); 1084 } 1085 1086 @Override 1087 public long getBlockedRequestsCount() { 1088 return aggregate.blockedRequestsCount; 1089 } 1090 1091 @Override 1092 public long getAverageRegionSize() { 1093 return aggregate.averageRegionSize; 1094 } 1095 1096 @Override 1097 public long getDataMissCount() { 1098 return this.cacheStats != null ? this.cacheStats.getDataMissCount() : 0L; 1099 } 1100 1101 @Override 1102 public long getLeafIndexMissCount() { 1103 return this.cacheStats != null ? this.cacheStats.getLeafIndexMissCount() : 0L; 1104 } 1105 1106 @Override 1107 public long getBloomChunkMissCount() { 1108 return this.cacheStats != null ? this.cacheStats.getBloomChunkMissCount() : 0L; 1109 } 1110 1111 @Override 1112 public long getMetaMissCount() { 1113 return this.cacheStats != null ? this.cacheStats.getMetaMissCount() : 0L; 1114 } 1115 1116 @Override 1117 public long getRootIndexMissCount() { 1118 return this.cacheStats != null ? this.cacheStats.getRootIndexMissCount() : 0L; 1119 } 1120 1121 @Override 1122 public long getIntermediateIndexMissCount() { 1123 return this.cacheStats != null ? this.cacheStats.getIntermediateIndexMissCount() : 0L; 1124 } 1125 1126 @Override 1127 public long getFileInfoMissCount() { 1128 return this.cacheStats != null ? this.cacheStats.getFileInfoMissCount() : 0L; 1129 } 1130 1131 @Override 1132 public long getGeneralBloomMetaMissCount() { 1133 return this.cacheStats != null ? this.cacheStats.getGeneralBloomMetaMissCount() : 0L; 1134 } 1135 1136 @Override 1137 public long getDeleteFamilyBloomMissCount() { 1138 return this.cacheStats != null ? this.cacheStats.getDeleteFamilyBloomMissCount() : 0L; 1139 } 1140 1141 @Override 1142 public long getTrailerMissCount() { 1143 return this.cacheStats != null ? this.cacheStats.getTrailerMissCount() : 0L; 1144 } 1145 1146 @Override 1147 public long getDataHitCount() { 1148 return this.cacheStats != null ? this.cacheStats.getDataHitCount() : 0L; 1149 } 1150 1151 @Override 1152 public long getLeafIndexHitCount() { 1153 return this.cacheStats != null ? this.cacheStats.getLeafIndexHitCount() : 0L; 1154 } 1155 1156 @Override 1157 public long getBloomChunkHitCount() { 1158 return this.cacheStats != null ? this.cacheStats.getBloomChunkHitCount() : 0L; 1159 } 1160 1161 @Override 1162 public long getMetaHitCount() { 1163 return this.cacheStats != null ? this.cacheStats.getMetaHitCount() : 0L; 1164 } 1165 1166 @Override 1167 public long getRootIndexHitCount() { 1168 return this.cacheStats != null ? this.cacheStats.getRootIndexHitCount() : 0L; 1169 } 1170 1171 @Override 1172 public long getIntermediateIndexHitCount() { 1173 return this.cacheStats != null ? this.cacheStats.getIntermediateIndexHitCount() : 0L; 1174 } 1175 1176 @Override 1177 public long getFileInfoHitCount() { 1178 return this.cacheStats != null ? this.cacheStats.getFileInfoHitCount() : 0L; 1179 } 1180 1181 @Override 1182 public long getGeneralBloomMetaHitCount() { 1183 return this.cacheStats != null ? this.cacheStats.getGeneralBloomMetaHitCount() : 0L; 1184 } 1185 1186 @Override 1187 public long getDeleteFamilyBloomHitCount() { 1188 return this.cacheStats != null ? this.cacheStats.getDeleteFamilyBloomHitCount() : 0L; 1189 } 1190 1191 @Override 1192 public long getTrailerHitCount() { 1193 return this.cacheStats != null ? this.cacheStats.getTrailerHitCount() : 0L; 1194 } 1195 1196 @Override 1197 public long getByteBuffAllocatorHeapAllocationBytes() { 1198 return ByteBuffAllocator.getHeapAllocationBytes(allocator, ByteBuffAllocator.HEAP); 1199 } 1200 1201 @Override 1202 public long getByteBuffAllocatorPoolAllocationBytes() { 1203 return this.allocator.getPoolAllocationBytes(); 1204 } 1205 1206 @Override 1207 public double getByteBuffAllocatorHeapAllocRatio() { 1208 return ByteBuffAllocator.getHeapAllocationRatio(allocator, ByteBuffAllocator.HEAP); 1209 } 1210 1211 @Override 1212 public long getByteBuffAllocatorTotalBufferCount() { 1213 return this.allocator.getTotalBufferCount(); 1214 } 1215 1216 @Override 1217 public long getByteBuffAllocatorUsedBufferCount() { 1218 return this.allocator.getUsedBufferCount(); 1219 } 1220 1221 // Visible for testing 1222 long getPeriod() { 1223 return period; 1224 } 1225}