001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.List; 025import java.util.Map; 026import java.util.OptionalDouble; 027import java.util.concurrent.ConcurrentHashMap; 028import java.util.concurrent.ScheduledExecutorService; 029import java.util.concurrent.TimeUnit; 030import java.util.stream.Collectors; 031import org.apache.commons.lang3.StringUtils; 032import org.apache.hadoop.hbase.CompatibilitySingletonFactory; 033import org.apache.hadoop.hbase.HConstants; 034import org.apache.hadoop.hbase.HDFSBlocksDistribution; 035import org.apache.hadoop.hbase.ServerName; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.io.ByteBuffAllocator; 038import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 039import org.apache.hadoop.hbase.io.asyncfs.monitor.ExcludeDatanodeManager; 040import org.apache.hadoop.hbase.io.hfile.BlockCache; 041import org.apache.hadoop.hbase.io.hfile.CacheStats; 042import org.apache.hadoop.hbase.io.hfile.CombinedBlockCache; 043import org.apache.hadoop.hbase.mob.MobFileCache; 044import org.apache.hadoop.hbase.regionserver.wal.MetricsWALSource; 045import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 046import org.apache.hadoop.hbase.util.FSUtils; 047import org.apache.hadoop.hbase.wal.WALProvider; 048import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 049import org.apache.hadoop.hdfs.DFSHedgedReadMetrics; 050import org.apache.hadoop.metrics2.MetricsExecutor; 051import org.apache.yetus.audience.InterfaceAudience; 052import org.slf4j.Logger; 053import org.slf4j.LoggerFactory; 054 055/** 056 * Impl for exposing HRegionServer Information through Hadoop's metrics 2 system. 057 */ 058@InterfaceAudience.Private 059class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper { 060 061 private static final Logger LOG = LoggerFactory.getLogger(MetricsRegionServerWrapperImpl.class); 062 063 private final HRegionServer regionServer; 064 private final MetricsWALSource metricsWALSource; 065 private final ByteBuffAllocator allocator; 066 067 private BlockCache blockCache; 068 private BlockCache l1Cache = null; 069 private BlockCache l2Cache = null; 070 private MobFileCache mobFileCache; 071 private CacheStats cacheStats; 072 private CacheStats l1Stats = null; 073 private CacheStats l2Stats = null; 074 private volatile long numWALFiles = 0; 075 private volatile long walFileSize = 0; 076 private volatile long mobFileCacheAccessCount = 0; 077 private volatile long mobFileCacheMissCount = 0; 078 private volatile double mobFileCacheHitRatio = 0; 079 private volatile long mobFileCacheEvictedCount = 0; 080 private volatile long mobFileCacheCount = 0; 081 082 private volatile RegionMetricAggregate aggregate = new RegionMetricAggregate(null); 083 084 protected final Map<String, ArrayList<Long>> requestsCountCache = 085 new ConcurrentHashMap<String, ArrayList<Long>>(); 086 087 private ScheduledExecutorService executor; 088 private Runnable runnable; 089 private long period; 090 091 /** 092 * Can be null if not on hdfs. 093 */ 094 private DFSHedgedReadMetrics dfsHedgedReadMetrics; 095 096 private final ExcludeDatanodeManager excludeDatanodeManager; 097 098 public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) { 099 this.regionServer = regionServer; 100 initBlockCache(); 101 initMobFileCache(); 102 this.excludeDatanodeManager = this.regionServer.getWalFactory().getExcludeDatanodeManager(); 103 104 this.period = regionServer.getConfiguration().getLong(HConstants.REGIONSERVER_METRICS_PERIOD, 105 HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD); 106 107 this.executor = CompatibilitySingletonFactory.getInstance(MetricsExecutor.class).getExecutor(); 108 this.runnable = new RegionServerMetricsWrapperRunnable(); 109 this.executor.scheduleWithFixedDelay(this.runnable, this.period, this.period, 110 TimeUnit.MILLISECONDS); 111 this.metricsWALSource = CompatibilitySingletonFactory.getInstance(MetricsWALSource.class); 112 this.allocator = regionServer.getRpcServer().getByteBuffAllocator(); 113 114 try { 115 this.dfsHedgedReadMetrics = FSUtils.getDFSHedgedReadMetrics(regionServer.getConfiguration()); 116 } catch (IOException e) { 117 LOG.warn("Failed to get hedged metrics", e); 118 } 119 if (LOG.isInfoEnabled()) { 120 LOG.info("Computing regionserver metrics every " + this.period + " milliseconds"); 121 } 122 } 123 124 private void initBlockCache() { 125 this.blockCache = this.regionServer.getBlockCache().orElse(null); 126 this.cacheStats = this.blockCache != null ? this.blockCache.getStats() : null; 127 if (this.cacheStats != null) { 128 if (this.cacheStats instanceof CombinedBlockCache.CombinedCacheStats) { 129 l1Stats = ((CombinedBlockCache.CombinedCacheStats) this.cacheStats).getLruCacheStats(); 130 l2Stats = ((CombinedBlockCache.CombinedCacheStats) this.cacheStats).getBucketCacheStats(); 131 } else { 132 l1Stats = this.cacheStats; 133 } 134 } 135 if (this.blockCache != null) { 136 if (this.blockCache instanceof CombinedBlockCache) { 137 l1Cache = ((CombinedBlockCache) this.blockCache).getFirstLevelCache(); 138 l2Cache = ((CombinedBlockCache) this.blockCache).getSecondLevelCache(); 139 } else { 140 l1Cache = this.blockCache; 141 } 142 } 143 } 144 145 /** 146 * Initializes the mob file cache. 147 */ 148 private void initMobFileCache() { 149 this.mobFileCache = this.regionServer.getMobFileCache().orElse(null); 150 } 151 152 @Override 153 public String getClusterId() { 154 return regionServer.getClusterId(); 155 } 156 157 @Override 158 public long getStartCode() { 159 return regionServer.getStartcode(); 160 } 161 162 @Override 163 public String getZookeeperQuorum() { 164 ZKWatcher zk = regionServer.getZooKeeper(); 165 if (zk == null) { 166 return ""; 167 } 168 return zk.getQuorum(); 169 } 170 171 @Override 172 public String getCoprocessors() { 173 String[] coprocessors = regionServer.getRegionServerCoprocessors(); 174 if (coprocessors == null || coprocessors.length == 0) { 175 return ""; 176 } 177 return StringUtils.join(coprocessors, ", "); 178 } 179 180 @Override 181 public String getServerName() { 182 ServerName serverName = regionServer.getServerName(); 183 if (serverName == null) { 184 return ""; 185 } 186 return serverName.getServerName(); 187 } 188 189 @Override 190 public long getNumOnlineRegions() { 191 Collection<HRegion> onlineRegionsLocalContext = regionServer.getOnlineRegionsLocalContext(); 192 if (onlineRegionsLocalContext == null) { 193 return 0; 194 } 195 return onlineRegionsLocalContext.size(); 196 } 197 198 @Override 199 public long getTotalRequestCount() { 200 return regionServer.getRpcServices().requestCount.sum(); 201 } 202 203 @Override 204 public long getTotalRowActionRequestCount() { 205 return aggregate.readRequestsCount + aggregate.writeRequestsCount; 206 } 207 208 @Override 209 public int getSplitQueueSize() { 210 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 211 return compactSplit == null ? 0 : compactSplit.getSplitQueueSize(); 212 } 213 214 @Override 215 public int getCompactionQueueSize() { 216 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 217 return compactSplit == null ? 0 : compactSplit.getCompactionQueueSize(); 218 } 219 220 @Override 221 public int getSmallCompactionQueueSize() { 222 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 223 return compactSplit == null ? 0 : compactSplit.getSmallCompactionQueueSize(); 224 } 225 226 @Override 227 public int getLargeCompactionQueueSize() { 228 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 229 return compactSplit == null ? 0 : compactSplit.getLargeCompactionQueueSize(); 230 } 231 232 @Override 233 public int getFlushQueueSize() { 234 // If there is no flusher there should be no queue. 235 if (this.regionServer.getMemStoreFlusher() == null) { 236 return 0; 237 } 238 return this.regionServer.getMemStoreFlusher().getFlushQueueSize(); 239 } 240 241 @Override 242 public long getBlockCacheCount() { 243 return this.blockCache != null ? this.blockCache.getBlockCount() : 0L; 244 } 245 246 @Override 247 public long getBlockCacheDataBlockCount() { 248 return this.blockCache != null ? this.blockCache.getDataBlockCount() : 0L; 249 } 250 251 @Override 252 public long getMemStoreLimit() { 253 return this.regionServer.getRegionServerAccounting().getGlobalMemStoreLimit(); 254 } 255 256 @Override 257 public long getOnHeapMemStoreLimit() { 258 return this.regionServer.getRegionServerAccounting().getGlobalOnHeapMemStoreLimit(); 259 } 260 261 @Override 262 public long getOffHeapMemStoreLimit() { 263 return this.regionServer.getRegionServerAccounting().getGlobalOffHeapMemStoreLimit(); 264 } 265 266 @Override 267 public long getBlockCacheSize() { 268 return this.blockCache != null ? this.blockCache.getCurrentSize() : 0L; 269 } 270 271 @Override 272 public long getBlockCacheFreeSize() { 273 return this.blockCache != null ? this.blockCache.getFreeSize() : 0L; 274 } 275 276 @Override 277 public long getBlockCacheHitCount() { 278 return this.cacheStats != null ? this.cacheStats.getHitCount() : 0L; 279 } 280 281 @Override 282 public long getBlockCachePrimaryHitCount() { 283 return this.cacheStats != null ? this.cacheStats.getPrimaryHitCount() : 0L; 284 } 285 286 @Override 287 public long getBlockCacheHitCachingCount() { 288 return this.cacheStats != null ? this.cacheStats.getHitCachingCount() : 0L; 289 } 290 291 @Override 292 public long getBlockCacheMissCount() { 293 return this.cacheStats != null ? this.cacheStats.getMissCount() : 0L; 294 } 295 296 @Override 297 public long getBlockCachePrimaryMissCount() { 298 return this.cacheStats != null ? this.cacheStats.getPrimaryMissCount() : 0L; 299 } 300 301 @Override 302 public long getBlockCacheMissCachingCount() { 303 return this.cacheStats != null ? this.cacheStats.getMissCachingCount() : 0L; 304 } 305 306 @Override 307 public long getBlockCacheEvictedCount() { 308 return this.cacheStats != null ? this.cacheStats.getEvictedCount() : 0L; 309 } 310 311 @Override 312 public long getBlockCachePrimaryEvictedCount() { 313 return this.cacheStats != null ? this.cacheStats.getPrimaryEvictedCount() : 0L; 314 } 315 316 @Override 317 public double getBlockCacheHitPercent() { 318 double ratio = this.cacheStats != null ? this.cacheStats.getHitRatio() : 0.0; 319 if (Double.isNaN(ratio)) { 320 ratio = 0; 321 } 322 return (ratio * 100); 323 } 324 325 @Override 326 public double getBlockCacheHitCachingPercent() { 327 double ratio = this.cacheStats != null ? this.cacheStats.getHitCachingRatio() : 0.0; 328 if (Double.isNaN(ratio)) { 329 ratio = 0; 330 } 331 return (ratio * 100); 332 } 333 334 @Override 335 public long getBlockCacheFailedInsertions() { 336 return this.cacheStats != null ? this.cacheStats.getFailedInserts() : 0L; 337 } 338 339 public long getL1CacheSize() { 340 return this.l1Cache != null ? this.l1Cache.getCurrentSize() : 0L; 341 } 342 343 public long getL1CacheFreeSize() { 344 return this.l1Cache != null ? this.l1Cache.getFreeSize() : 0L; 345 } 346 347 public long getL1CacheCount() { 348 return this.l1Cache != null ? this.l1Cache.getBlockCount() : 0L; 349 } 350 351 public long getL1CacheEvictedCount() { 352 return this.l1Stats != null ? this.l1Stats.getEvictedCount() : 0L; 353 } 354 355 public long getL2CacheSize() { 356 return this.l2Cache != null ? this.l2Cache.getCurrentSize() : 0L; 357 } 358 359 public long getL2CacheFreeSize() { 360 return this.l2Cache != null ? this.l2Cache.getFreeSize() : 0L; 361 } 362 363 public long getL2CacheCount() { 364 return this.l2Cache != null ? this.l2Cache.getBlockCount() : 0L; 365 } 366 367 public long getL2CacheEvictedCount() { 368 return this.l2Stats != null ? this.l2Stats.getEvictedCount() : 0L; 369 } 370 371 @Override 372 public long getL1CacheHitCount() { 373 return this.l1Stats != null ? this.l1Stats.getHitCount() : 0L; 374 } 375 376 @Override 377 public long getL1CacheMissCount() { 378 return this.l1Stats != null ? this.l1Stats.getMissCount() : 0L; 379 } 380 381 @Override 382 public double getL1CacheHitRatio() { 383 return this.l1Stats != null ? this.l1Stats.getHitRatio() : 0.0; 384 } 385 386 @Override 387 public double getL1CacheMissRatio() { 388 return this.l1Stats != null ? this.l1Stats.getMissRatio() : 0.0; 389 } 390 391 @Override 392 public long getL2CacheHitCount() { 393 return this.l2Stats != null ? this.l2Stats.getHitCount() : 0L; 394 } 395 396 @Override 397 public long getL2CacheMissCount() { 398 return this.l2Stats != null ? this.l2Stats.getMissCount() : 0L; 399 } 400 401 @Override 402 public double getL2CacheHitRatio() { 403 return this.l2Stats != null ? this.l2Stats.getHitRatio() : 0.0; 404 } 405 406 @Override 407 public double getL2CacheMissRatio() { 408 return this.l2Stats != null ? this.l2Stats.getMissRatio() : 0.0; 409 } 410 411 @Override 412 public void forceRecompute() { 413 this.runnable.run(); 414 } 415 416 @Override 417 public long getNumStores() { 418 return aggregate.numStores; 419 } 420 421 @Override 422 public long getNumWALFiles() { 423 return numWALFiles; 424 } 425 426 @Override 427 public long getWALFileSize() { 428 return walFileSize; 429 } 430 431 @Override 432 public List<String> getWALExcludeDNs() { 433 if (excludeDatanodeManager == null) { 434 return Collections.emptyList(); 435 } 436 return excludeDatanodeManager.getExcludeDNs().entrySet().stream().map(e -> e.getKey().toString() 437 + " - " + e.getValue().getSecond() + " - " + e.getValue().getFirst()) 438 .collect(Collectors.toList()); 439 } 440 441 @Override 442 public long getNumWALSlowAppend() { 443 return metricsWALSource.getSlowAppendCount(); 444 } 445 446 @Override 447 public long getNumStoreFiles() { 448 return aggregate.numStoreFiles; 449 } 450 451 @Override 452 public long getMaxStoreFiles() { 453 return aggregate.maxStoreFileCount; 454 } 455 456 @Override 457 public long getMaxStoreFileAge() { 458 return aggregate.maxStoreFileAge; 459 } 460 461 @Override 462 public long getMinStoreFileAge() { 463 return aggregate.minStoreFileAge; 464 } 465 466 @Override 467 public long getAvgStoreFileAge() { 468 return aggregate.avgStoreFileAge; 469 } 470 471 @Override 472 public long getNumReferenceFiles() { 473 return aggregate.numReferenceFiles; 474 } 475 476 @Override 477 public long getMemStoreSize() { 478 return aggregate.memstoreSize; 479 } 480 481 @Override 482 public long getOnHeapMemStoreSize() { 483 return aggregate.onHeapMemstoreSize; 484 } 485 486 @Override 487 public long getOffHeapMemStoreSize() { 488 return aggregate.offHeapMemstoreSize; 489 } 490 491 @Override 492 public long getStoreFileSize() { 493 return aggregate.storeFileSize; 494 } 495 496 @Override 497 public double getStoreFileSizeGrowthRate() { 498 return aggregate.storeFileSizeGrowthRate; 499 } 500 501 @Override 502 public double getRequestsPerSecond() { 503 return aggregate.requestsPerSecond; 504 } 505 506 @Override 507 public long getReadRequestsCount() { 508 return aggregate.readRequestsCount; 509 } 510 511 @Override 512 public long getCpRequestsCount() { 513 return aggregate.cpRequestsCount; 514 } 515 516 @Override 517 public double getReadRequestsRatePerSecond() { 518 return aggregate.readRequestsRatePerSecond; 519 } 520 521 @Override 522 public long getFilteredReadRequestsCount() { 523 return aggregate.filteredReadRequestsCount; 524 } 525 526 @Override 527 public long getWriteRequestsCount() { 528 return aggregate.writeRequestsCount; 529 } 530 531 @Override 532 public double getWriteRequestsRatePerSecond() { 533 return aggregate.writeRequestsRatePerSecond; 534 } 535 536 @Override 537 public long getRpcGetRequestsCount() { 538 return regionServer.getRpcServices().rpcGetRequestCount.sum(); 539 } 540 541 @Override 542 public long getRpcScanRequestsCount() { 543 return regionServer.getRpcServices().rpcScanRequestCount.sum(); 544 } 545 546 @Override 547 public long getRpcFullScanRequestsCount() { 548 return regionServer.getRpcServices().rpcFullScanRequestCount.sum(); 549 } 550 551 @Override 552 public long getRpcMultiRequestsCount() { 553 return regionServer.getRpcServices().rpcMultiRequestCount.sum(); 554 } 555 556 @Override 557 public long getRpcMutateRequestsCount() { 558 return regionServer.getRpcServices().rpcMutateRequestCount.sum(); 559 } 560 561 @Override 562 public long getCheckAndMutateChecksFailed() { 563 return aggregate.checkAndMutateChecksFailed; 564 } 565 566 @Override 567 public long getCheckAndMutateChecksPassed() { 568 return aggregate.checkAndMutateChecksPassed; 569 } 570 571 @Override 572 public long getStoreFileIndexSize() { 573 return aggregate.storefileIndexSize; 574 } 575 576 @Override 577 public long getTotalStaticIndexSize() { 578 return aggregate.totalStaticIndexSize; 579 } 580 581 @Override 582 public long getTotalStaticBloomSize() { 583 return aggregate.totalStaticBloomSize; 584 } 585 586 @Override 587 public long getBloomFilterRequestsCount() { 588 return aggregate.bloomFilterRequestsCount; 589 } 590 591 @Override 592 public long getBloomFilterNegativeResultsCount() { 593 return aggregate.bloomFilterNegativeResultsCount; 594 } 595 596 @Override 597 public long getBloomFilterEligibleRequestsCount() { 598 return aggregate.bloomFilterEligibleRequestsCount; 599 } 600 601 @Override 602 public long getNumMutationsWithoutWAL() { 603 return aggregate.numMutationsWithoutWAL; 604 } 605 606 @Override 607 public long getDataInMemoryWithoutWAL() { 608 return aggregate.dataInMemoryWithoutWAL; 609 } 610 611 @Override 612 public double getPercentFileLocal() { 613 return aggregate.percentFileLocal; 614 } 615 616 @Override 617 public double getPercentFileLocalPrimaryRegions() { 618 return aggregate.percentFileLocalPrimaryRegions; 619 } 620 621 @Override 622 public double getPercentFileLocalSecondaryRegions() { 623 return aggregate.percentFileLocalSecondaryRegions; 624 } 625 626 @Override 627 public long getUpdatesBlockedTime() { 628 if (this.regionServer.getMemStoreFlusher() == null) { 629 return 0; 630 } 631 return this.regionServer.getMemStoreFlusher().getUpdatesBlockedMsHighWater().sum(); 632 } 633 634 @Override 635 public long getFlushedCellsCount() { 636 return aggregate.flushedCellsCount; 637 } 638 639 @Override 640 public long getCompactedCellsCount() { 641 return aggregate.compactedCellsCount; 642 } 643 644 @Override 645 public long getMajorCompactedCellsCount() { 646 return aggregate.majorCompactedCellsCount; 647 } 648 649 @Override 650 public long getFlushedCellsSize() { 651 return aggregate.flushedCellsSize; 652 } 653 654 @Override 655 public long getCompactedCellsSize() { 656 return aggregate.compactedCellsSize; 657 } 658 659 @Override 660 public long getMajorCompactedCellsSize() { 661 return aggregate.majorCompactedCellsSize; 662 } 663 664 @Override 665 public long getCellsCountCompactedFromMob() { 666 return aggregate.cellsCountCompactedFromMob; 667 } 668 669 @Override 670 public long getCellsCountCompactedToMob() { 671 return aggregate.cellsCountCompactedToMob; 672 } 673 674 @Override 675 public long getCellsSizeCompactedFromMob() { 676 return aggregate.cellsSizeCompactedFromMob; 677 } 678 679 @Override 680 public long getCellsSizeCompactedToMob() { 681 return aggregate.cellsSizeCompactedToMob; 682 } 683 684 @Override 685 public long getMobFlushCount() { 686 return aggregate.mobFlushCount; 687 } 688 689 @Override 690 public long getMobFlushedCellsCount() { 691 return aggregate.mobFlushedCellsCount; 692 } 693 694 @Override 695 public long getMobFlushedCellsSize() { 696 return aggregate.mobFlushedCellsSize; 697 } 698 699 @Override 700 public long getMobScanCellsCount() { 701 return aggregate.mobScanCellsCount; 702 } 703 704 @Override 705 public long getMobScanCellsSize() { 706 return aggregate.mobScanCellsSize; 707 } 708 709 @Override 710 public long getMobFileCacheAccessCount() { 711 return mobFileCacheAccessCount; 712 } 713 714 @Override 715 public long getMobFileCacheMissCount() { 716 return mobFileCacheMissCount; 717 } 718 719 @Override 720 public long getMobFileCacheCount() { 721 return mobFileCacheCount; 722 } 723 724 @Override 725 public long getMobFileCacheEvictedCount() { 726 return mobFileCacheEvictedCount; 727 } 728 729 @Override 730 public double getMobFileCacheHitPercent() { 731 return mobFileCacheHitRatio * 100; 732 } 733 734 @Override 735 public int getActiveScanners() { 736 return regionServer.getRpcServices().getScannersCount(); 737 } 738 739 private static final class RegionMetricAggregate { 740 private long numStores = 0; 741 private long numStoreFiles = 0; 742 private long memstoreSize = 0; 743 private long onHeapMemstoreSize = 0; 744 private long offHeapMemstoreSize = 0; 745 private long storeFileSize = 0; 746 private double storeFileSizeGrowthRate = 0; 747 private long maxStoreFileCount = 0; 748 private long maxStoreFileAge = 0; 749 private long minStoreFileAge = Long.MAX_VALUE; 750 private long avgStoreFileAge = 0; 751 private long numReferenceFiles = 0; 752 753 private long cpRequestsCount = 0; 754 private double requestsPerSecond = 0.0; 755 private long readRequestsCount = 0; 756 private double readRequestsRatePerSecond = 0; 757 private long filteredReadRequestsCount = 0; 758 private long writeRequestsCount = 0; 759 private double writeRequestsRatePerSecond = 0; 760 private long checkAndMutateChecksFailed = 0; 761 private long checkAndMutateChecksPassed = 0; 762 private long storefileIndexSize = 0; 763 private long totalStaticIndexSize = 0; 764 private long totalStaticBloomSize = 0; 765 private long bloomFilterRequestsCount = 0; 766 private long bloomFilterNegativeResultsCount = 0; 767 private long bloomFilterEligibleRequestsCount = 0; 768 private long numMutationsWithoutWAL = 0; 769 private long dataInMemoryWithoutWAL = 0; 770 private double percentFileLocal = 0; 771 private double percentFileLocalPrimaryRegions = 0; 772 private double percentFileLocalSecondaryRegions = 0; 773 private long flushedCellsCount = 0; 774 private long compactedCellsCount = 0; 775 private long majorCompactedCellsCount = 0; 776 private long flushedCellsSize = 0; 777 private long compactedCellsSize = 0; 778 private long majorCompactedCellsSize = 0; 779 private long cellsCountCompactedToMob = 0; 780 private long cellsCountCompactedFromMob = 0; 781 private long cellsSizeCompactedToMob = 0; 782 private long cellsSizeCompactedFromMob = 0; 783 private long mobFlushCount = 0; 784 private long mobFlushedCellsCount = 0; 785 private long mobFlushedCellsSize = 0; 786 private long mobScanCellsCount = 0; 787 private long mobScanCellsSize = 0; 788 private long blockedRequestsCount = 0L; 789 private long averageRegionSize = 0L; 790 private long totalReadRequestsDelta = 0; 791 private long totalWriteRequestsDelta = 0; 792 793 private RegionMetricAggregate(RegionMetricAggregate other) { 794 if (other != null) { 795 requestsPerSecond = other.requestsPerSecond; 796 readRequestsRatePerSecond = other.readRequestsRatePerSecond; 797 writeRequestsRatePerSecond = other.writeRequestsRatePerSecond; 798 } 799 } 800 801 private void aggregate(HRegionServer regionServer, 802 Map<String, ArrayList<Long>> requestsCountCache) { 803 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); 804 HDFSBlocksDistribution hdfsBlocksDistributionPrimaryRegions = new HDFSBlocksDistribution(); 805 HDFSBlocksDistribution hdfsBlocksDistributionSecondaryRegions = new HDFSBlocksDistribution(); 806 807 long avgAgeNumerator = 0; 808 long numHFiles = 0; 809 int regionCount = 0; 810 811 for (HRegion r : regionServer.getOnlineRegionsLocalContext()) { 812 Deltas deltas = calculateReadWriteDeltas(r, requestsCountCache); 813 totalReadRequestsDelta += deltas.readRequestsCountDelta; 814 totalWriteRequestsDelta += deltas.writeRequestsCountDelta; 815 816 numMutationsWithoutWAL += r.getNumMutationsWithoutWAL(); 817 dataInMemoryWithoutWAL += r.getDataInMemoryWithoutWAL(); 818 cpRequestsCount += r.getCpRequestsCount(); 819 readRequestsCount += r.getReadRequestsCount(); 820 filteredReadRequestsCount += r.getFilteredReadRequestsCount(); 821 writeRequestsCount += r.getWriteRequestsCount(); 822 checkAndMutateChecksFailed += r.getCheckAndMutateChecksFailed(); 823 checkAndMutateChecksPassed += r.getCheckAndMutateChecksPassed(); 824 blockedRequestsCount += r.getBlockedRequestsCount(); 825 826 StoreFileStats storeFileStats = aggregateStores(r.getStores()); 827 numHFiles += storeFileStats.numHFiles; 828 avgAgeNumerator += storeFileStats.avgAgeNumerator; 829 830 HDFSBlocksDistribution distro = r.getHDFSBlocksDistribution(); 831 hdfsBlocksDistribution.add(distro); 832 if (r.getRegionInfo().getReplicaId() == RegionInfo.DEFAULT_REPLICA_ID) { 833 hdfsBlocksDistributionPrimaryRegions.add(distro); 834 } 835 if (r.getRegionInfo().getReplicaId() != RegionInfo.DEFAULT_REPLICA_ID) { 836 hdfsBlocksDistributionSecondaryRegions.add(distro); 837 } 838 839 regionCount++; 840 } 841 842 float localityIndex = 843 hdfsBlocksDistribution.getBlockLocalityIndex(regionServer.getServerName().getHostname()); 844 percentFileLocal = Double.isNaN(localityIndex) ? 0 : (localityIndex * 100); 845 846 float localityIndexPrimaryRegions = hdfsBlocksDistributionPrimaryRegions 847 .getBlockLocalityIndex(regionServer.getServerName().getHostname()); 848 percentFileLocalPrimaryRegions = 849 Double.isNaN(localityIndexPrimaryRegions) ? 0 : (localityIndexPrimaryRegions * 100); 850 851 float localityIndexSecondaryRegions = hdfsBlocksDistributionSecondaryRegions 852 .getBlockLocalityIndex(regionServer.getServerName().getHostname()); 853 percentFileLocalSecondaryRegions = 854 Double.isNaN(localityIndexSecondaryRegions) ? 0 : (localityIndexSecondaryRegions * 100); 855 856 if (regionCount > 0) { 857 averageRegionSize = (memstoreSize + storeFileSize) / regionCount; 858 } 859 860 // if there were no store files, we'll never have updated this with Math.min 861 // so set it to 0, which is a better value to display in case of no storefiles 862 if (minStoreFileAge == Long.MAX_VALUE) { 863 this.minStoreFileAge = 0; 864 } 865 866 if (numHFiles != 0) { 867 avgStoreFileAge = avgAgeNumerator / numHFiles; 868 } 869 } 870 871 private static final class Deltas { 872 private final long readRequestsCountDelta; 873 private final long writeRequestsCountDelta; 874 875 private Deltas(long readRequestsCountDelta, long writeRequestsCountDelta) { 876 this.readRequestsCountDelta = readRequestsCountDelta; 877 this.writeRequestsCountDelta = writeRequestsCountDelta; 878 } 879 } 880 881 private Deltas calculateReadWriteDeltas(HRegion r, 882 Map<String, ArrayList<Long>> requestsCountCache) { 883 String encodedRegionName = r.getRegionInfo().getEncodedName(); 884 long currentReadRequestsCount = r.getReadRequestsCount(); 885 long currentWriteRequestsCount = r.getWriteRequestsCount(); 886 if (requestsCountCache.containsKey(encodedRegionName)) { 887 long lastReadRequestsCount = requestsCountCache.get(encodedRegionName).get(0); 888 long lastWriteRequestsCount = requestsCountCache.get(encodedRegionName).get(1); 889 890 // Update cache for our next comparison 891 requestsCountCache.get(encodedRegionName).set(0, currentReadRequestsCount); 892 requestsCountCache.get(encodedRegionName).set(1, currentWriteRequestsCount); 893 894 long readRequestsDelta = currentReadRequestsCount - lastReadRequestsCount; 895 long writeRequestsDelta = currentWriteRequestsCount - lastWriteRequestsCount; 896 return new Deltas(readRequestsDelta, writeRequestsDelta); 897 } else { 898 // List[0] -> readRequestCount 899 // List[1] -> writeRequestCount 900 ArrayList<Long> requests = new ArrayList<Long>(2); 901 requests.add(currentReadRequestsCount); 902 requests.add(currentWriteRequestsCount); 903 requestsCountCache.put(encodedRegionName, requests); 904 return new Deltas(currentReadRequestsCount, currentWriteRequestsCount); 905 } 906 } 907 908 public void updateRates(long timeSinceLastRun, long expectedPeriod, long lastStoreFileSize) { 909 requestsPerSecond = 910 (totalReadRequestsDelta + totalWriteRequestsDelta) / (timeSinceLastRun / 1000.0); 911 912 double readRequestsRatePerMilliSecond = (double) totalReadRequestsDelta / expectedPeriod; 913 double writeRequestsRatePerMilliSecond = (double) totalWriteRequestsDelta / expectedPeriod; 914 915 readRequestsRatePerSecond = readRequestsRatePerMilliSecond * 1000.0; 916 writeRequestsRatePerSecond = writeRequestsRatePerMilliSecond * 1000.0; 917 918 long intervalStoreFileSize = storeFileSize - lastStoreFileSize; 919 storeFileSizeGrowthRate = (double) intervalStoreFileSize * 1000.0 / expectedPeriod; 920 } 921 922 private static final class StoreFileStats { 923 private final long numHFiles; 924 private final long avgAgeNumerator; 925 926 private StoreFileStats(long numHFiles, long avgAgeNumerator) { 927 this.numHFiles = numHFiles; 928 this.avgAgeNumerator = avgAgeNumerator; 929 } 930 } 931 932 private StoreFileStats aggregateStores(List<HStore> stores) { 933 numStores += stores.size(); 934 long numHFiles = 0; 935 long avgAgeNumerator = 0; 936 for (Store store : stores) { 937 numStoreFiles += store.getStorefilesCount(); 938 memstoreSize += store.getMemStoreSize().getDataSize(); 939 onHeapMemstoreSize += store.getMemStoreSize().getHeapSize(); 940 offHeapMemstoreSize += store.getMemStoreSize().getOffHeapSize(); 941 storeFileSize += store.getStorefilesSize(); 942 maxStoreFileCount = Math.max(maxStoreFileCount, store.getStorefilesCount()); 943 944 maxStoreFileAge = 945 Math.max(store.getMaxStoreFileAge().orElse(maxStoreFileAge), maxStoreFileAge); 946 minStoreFileAge = 947 Math.min(store.getMinStoreFileAge().orElse(minStoreFileAge), minStoreFileAge); 948 949 long storeHFiles = store.getNumHFiles(); 950 numHFiles += storeHFiles; 951 numReferenceFiles += store.getNumReferenceFiles(); 952 953 OptionalDouble storeAvgStoreFileAge = store.getAvgStoreFileAge(); 954 if (storeAvgStoreFileAge.isPresent()) { 955 avgAgeNumerator = 956 (long) (avgAgeNumerator + storeAvgStoreFileAge.getAsDouble() * storeHFiles); 957 } 958 959 storefileIndexSize += store.getStorefilesRootLevelIndexSize(); 960 totalStaticBloomSize += store.getTotalStaticBloomSize(); 961 totalStaticIndexSize += store.getTotalStaticIndexSize(); 962 bloomFilterRequestsCount += store.getBloomFilterRequestsCount(); 963 bloomFilterNegativeResultsCount += store.getBloomFilterNegativeResultsCount(); 964 bloomFilterEligibleRequestsCount += store.getBloomFilterEligibleRequestsCount(); 965 flushedCellsCount += store.getFlushedCellsCount(); 966 compactedCellsCount += store.getCompactedCellsCount(); 967 majorCompactedCellsCount += store.getMajorCompactedCellsCount(); 968 flushedCellsSize += store.getFlushedCellsSize(); 969 compactedCellsSize += store.getCompactedCellsSize(); 970 majorCompactedCellsSize += store.getMajorCompactedCellsSize(); 971 if (store instanceof HMobStore) { 972 HMobStore mobStore = (HMobStore) store; 973 cellsCountCompactedToMob += mobStore.getCellsCountCompactedToMob(); 974 cellsCountCompactedFromMob += mobStore.getCellsCountCompactedFromMob(); 975 cellsSizeCompactedToMob += mobStore.getCellsSizeCompactedToMob(); 976 cellsSizeCompactedFromMob += mobStore.getCellsSizeCompactedFromMob(); 977 mobFlushCount += mobStore.getMobFlushCount(); 978 mobFlushedCellsCount += mobStore.getMobFlushedCellsCount(); 979 mobFlushedCellsSize += mobStore.getMobFlushedCellsSize(); 980 mobScanCellsCount += mobStore.getMobScanCellsCount(); 981 mobScanCellsSize += mobStore.getMobScanCellsSize(); 982 } 983 } 984 985 return new StoreFileStats(numHFiles, avgAgeNumerator); 986 } 987 988 } 989 990 /** 991 * This is the runnable that will be executed on the executor every PERIOD number of seconds It 992 * will take metrics/numbers from all of the regions and use them to compute point in time 993 * metrics. 994 */ 995 public class RegionServerMetricsWrapperRunnable implements Runnable { 996 997 private long lastRan = 0; 998 private long lastStoreFileSize = 0; 999 1000 @Override 1001 synchronized public void run() { 1002 try { 1003 RegionMetricAggregate newVal = new RegionMetricAggregate(aggregate); 1004 newVal.aggregate(regionServer, requestsCountCache); 1005 1006 // Compute the number of requests per second 1007 long currentTime = EnvironmentEdgeManager.currentTime(); 1008 1009 // assume that it took PERIOD seconds to start the executor. 1010 // this is a guess but it's a pretty good one. 1011 if (lastRan == 0) { 1012 lastRan = currentTime - period; 1013 } 1014 1015 long timeSinceLastRun = currentTime - lastRan; 1016 // If we've time traveled keep the last requests per second. 1017 if (timeSinceLastRun > 0) { 1018 newVal.updateRates(timeSinceLastRun, period, lastStoreFileSize); 1019 } 1020 1021 aggregate = newVal; 1022 1023 List<WALProvider> providers = regionServer.getWalFactory().getAllWALProviders(); 1024 long numWALFilesTmp = 0; 1025 long walFileSizeTmp = 0; 1026 for (WALProvider provider : providers) { 1027 numWALFilesTmp += provider.getNumLogFiles(); 1028 walFileSizeTmp += provider.getLogFileSize(); 1029 } 1030 numWALFiles = numWALFilesTmp; 1031 walFileSize = walFileSizeTmp; 1032 1033 mobFileCacheAccessCount = mobFileCache != null ? mobFileCache.getAccessCount() : 0L; 1034 mobFileCacheMissCount = mobFileCache != null ? mobFileCache.getMissCount() : 0L; 1035 mobFileCacheHitRatio = mobFileCache != null ? mobFileCache.getHitRatio() : 0.0; 1036 if (Double.isNaN(mobFileCacheHitRatio)) { 1037 mobFileCacheHitRatio = 0.0; 1038 } 1039 mobFileCacheEvictedCount = mobFileCache != null ? mobFileCache.getEvictedFileCount() : 0L; 1040 mobFileCacheCount = mobFileCache != null ? mobFileCache.getCacheSize() : 0; 1041 1042 lastStoreFileSize = aggregate.storeFileSize; 1043 lastRan = currentTime; 1044 } catch (Throwable e) { 1045 LOG.warn("Caught exception! Will suppress and retry.", e); 1046 } 1047 } 1048 } 1049 1050 @Override 1051 public long getHedgedReadOps() { 1052 return this.dfsHedgedReadMetrics == null ? 0 : this.dfsHedgedReadMetrics.getHedgedReadOps(); 1053 } 1054 1055 @Override 1056 public long getHedgedReadWins() { 1057 return this.dfsHedgedReadMetrics == null ? 0 : this.dfsHedgedReadMetrics.getHedgedReadWins(); 1058 } 1059 1060 @Override 1061 public long getHedgedReadOpsInCurThread() { 1062 return this.dfsHedgedReadMetrics == null 1063 ? 0 1064 : this.dfsHedgedReadMetrics.getHedgedReadOpsInCurThread(); 1065 } 1066 1067 @Override 1068 public long getTotalBytesRead() { 1069 return FSDataInputStreamWrapper.getTotalBytesRead(); 1070 } 1071 1072 @Override 1073 public long getLocalBytesRead() { 1074 return FSDataInputStreamWrapper.getLocalBytesRead(); 1075 } 1076 1077 @Override 1078 public long getShortCircuitBytesRead() { 1079 return FSDataInputStreamWrapper.getShortCircuitBytesRead(); 1080 } 1081 1082 @Override 1083 public long getZeroCopyBytesRead() { 1084 return FSDataInputStreamWrapper.getZeroCopyBytesRead(); 1085 } 1086 1087 @Override 1088 public long getBlockedRequestsCount() { 1089 return aggregate.blockedRequestsCount; 1090 } 1091 1092 @Override 1093 public long getAverageRegionSize() { 1094 return aggregate.averageRegionSize; 1095 } 1096 1097 @Override 1098 public long getDataMissCount() { 1099 return this.cacheStats != null ? this.cacheStats.getDataMissCount() : 0L; 1100 } 1101 1102 @Override 1103 public long getLeafIndexMissCount() { 1104 return this.cacheStats != null ? this.cacheStats.getLeafIndexMissCount() : 0L; 1105 } 1106 1107 @Override 1108 public long getBloomChunkMissCount() { 1109 return this.cacheStats != null ? this.cacheStats.getBloomChunkMissCount() : 0L; 1110 } 1111 1112 @Override 1113 public long getMetaMissCount() { 1114 return this.cacheStats != null ? this.cacheStats.getMetaMissCount() : 0L; 1115 } 1116 1117 @Override 1118 public long getRootIndexMissCount() { 1119 return this.cacheStats != null ? this.cacheStats.getRootIndexMissCount() : 0L; 1120 } 1121 1122 @Override 1123 public long getIntermediateIndexMissCount() { 1124 return this.cacheStats != null ? this.cacheStats.getIntermediateIndexMissCount() : 0L; 1125 } 1126 1127 @Override 1128 public long getFileInfoMissCount() { 1129 return this.cacheStats != null ? this.cacheStats.getFileInfoMissCount() : 0L; 1130 } 1131 1132 @Override 1133 public long getGeneralBloomMetaMissCount() { 1134 return this.cacheStats != null ? this.cacheStats.getGeneralBloomMetaMissCount() : 0L; 1135 } 1136 1137 @Override 1138 public long getDeleteFamilyBloomMissCount() { 1139 return this.cacheStats != null ? this.cacheStats.getDeleteFamilyBloomMissCount() : 0L; 1140 } 1141 1142 @Override 1143 public long getTrailerMissCount() { 1144 return this.cacheStats != null ? this.cacheStats.getTrailerMissCount() : 0L; 1145 } 1146 1147 @Override 1148 public long getDataHitCount() { 1149 return this.cacheStats != null ? this.cacheStats.getDataHitCount() : 0L; 1150 } 1151 1152 @Override 1153 public long getLeafIndexHitCount() { 1154 return this.cacheStats != null ? this.cacheStats.getLeafIndexHitCount() : 0L; 1155 } 1156 1157 @Override 1158 public long getBloomChunkHitCount() { 1159 return this.cacheStats != null ? this.cacheStats.getBloomChunkHitCount() : 0L; 1160 } 1161 1162 @Override 1163 public long getMetaHitCount() { 1164 return this.cacheStats != null ? this.cacheStats.getMetaHitCount() : 0L; 1165 } 1166 1167 @Override 1168 public long getRootIndexHitCount() { 1169 return this.cacheStats != null ? this.cacheStats.getRootIndexHitCount() : 0L; 1170 } 1171 1172 @Override 1173 public long getIntermediateIndexHitCount() { 1174 return this.cacheStats != null ? this.cacheStats.getIntermediateIndexHitCount() : 0L; 1175 } 1176 1177 @Override 1178 public long getFileInfoHitCount() { 1179 return this.cacheStats != null ? this.cacheStats.getFileInfoHitCount() : 0L; 1180 } 1181 1182 @Override 1183 public long getGeneralBloomMetaHitCount() { 1184 return this.cacheStats != null ? this.cacheStats.getGeneralBloomMetaHitCount() : 0L; 1185 } 1186 1187 @Override 1188 public long getDeleteFamilyBloomHitCount() { 1189 return this.cacheStats != null ? this.cacheStats.getDeleteFamilyBloomHitCount() : 0L; 1190 } 1191 1192 @Override 1193 public long getTrailerHitCount() { 1194 return this.cacheStats != null ? this.cacheStats.getTrailerHitCount() : 0L; 1195 } 1196 1197 @Override 1198 public long getByteBuffAllocatorHeapAllocationBytes() { 1199 return ByteBuffAllocator.getHeapAllocationBytes(allocator, ByteBuffAllocator.HEAP); 1200 } 1201 1202 @Override 1203 public long getByteBuffAllocatorPoolAllocationBytes() { 1204 return this.allocator.getPoolAllocationBytes(); 1205 } 1206 1207 @Override 1208 public double getByteBuffAllocatorHeapAllocRatio() { 1209 return ByteBuffAllocator.getHeapAllocationRatio(allocator, ByteBuffAllocator.HEAP); 1210 } 1211 1212 @Override 1213 public long getByteBuffAllocatorTotalBufferCount() { 1214 return this.allocator.getTotalBufferCount(); 1215 } 1216 1217 @Override 1218 public long getByteBuffAllocatorUsedBufferCount() { 1219 return this.allocator.getUsedBufferCount(); 1220 } 1221 1222 // Visible for testing 1223 long getPeriod() { 1224 return period; 1225 } 1226}