001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.regionserver; 019 020import java.io.IOException; 021import java.util.ArrayList; 022import java.util.Collection; 023import java.util.Collections; 024import java.util.List; 025import java.util.Map; 026import java.util.OptionalDouble; 027import java.util.OptionalLong; 028import java.util.concurrent.ConcurrentHashMap; 029import java.util.concurrent.ScheduledExecutorService; 030import java.util.concurrent.TimeUnit; 031import java.util.stream.Collectors; 032import org.apache.commons.lang3.StringUtils; 033import org.apache.hadoop.hbase.CompatibilitySingletonFactory; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.HDFSBlocksDistribution; 036import org.apache.hadoop.hbase.HRegionInfo; 037import org.apache.hadoop.hbase.ServerName; 038import org.apache.hadoop.hbase.io.ByteBuffAllocator; 039import org.apache.hadoop.hbase.io.FSDataInputStreamWrapper; 040import org.apache.hadoop.hbase.io.asyncfs.monitor.ExcludeDatanodeManager; 041import org.apache.hadoop.hbase.io.hfile.BlockCache; 042import org.apache.hadoop.hbase.io.hfile.CacheStats; 043import org.apache.hadoop.hbase.io.hfile.CombinedBlockCache; 044import org.apache.hadoop.hbase.mob.MobFileCache; 045import org.apache.hadoop.hbase.regionserver.wal.MetricsWALSource; 046import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 047import org.apache.hadoop.hbase.util.FSUtils; 048import org.apache.hadoop.hbase.wal.WALProvider; 049import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 050import org.apache.hadoop.hdfs.DFSHedgedReadMetrics; 051import org.apache.hadoop.metrics2.MetricsExecutor; 052import org.apache.yetus.audience.InterfaceAudience; 053import org.slf4j.Logger; 054import org.slf4j.LoggerFactory; 055 056/** 057 * Impl for exposing HRegionServer Information through Hadoop's metrics 2 system. 058 */ 059@InterfaceAudience.Private 060class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper { 061 062 private static final Logger LOG = LoggerFactory.getLogger(MetricsRegionServerWrapperImpl.class); 063 064 private final HRegionServer regionServer; 065 private final MetricsWALSource metricsWALSource; 066 private final ByteBuffAllocator allocator; 067 068 private BlockCache blockCache; 069 private BlockCache l1Cache = null; 070 private BlockCache l2Cache = null; 071 private MobFileCache mobFileCache; 072 private CacheStats cacheStats; 073 private CacheStats l1Stats = null; 074 private CacheStats l2Stats = null; 075 076 private volatile long numStores = 0; 077 private volatile long numWALFiles = 0; 078 private volatile long walFileSize = 0; 079 private volatile long numStoreFiles = 0; 080 private volatile long memstoreSize = 0; 081 private volatile long onHeapMemstoreSize = 0; 082 private volatile long offHeapMemstoreSize = 0; 083 private volatile long storeFileSize = 0; 084 private volatile long maxStoreFileCount = 0; 085 private volatile long maxStoreFileAge = 0; 086 private volatile long minStoreFileAge = 0; 087 private volatile long avgStoreFileAge = 0; 088 private volatile long numReferenceFiles = 0; 089 private volatile double requestsPerSecond = 0.0; 090 private volatile long readRequestsCount = 0; 091 private volatile double readRequestsRatePerSecond = 0; 092 private volatile long filteredReadRequestsCount = 0; 093 private volatile long writeRequestsCount = 0; 094 private volatile double writeRequestsRatePerSecond = 0; 095 private volatile long checkAndMutateChecksFailed = 0; 096 private volatile long checkAndMutateChecksPassed = 0; 097 private volatile long storefileIndexSize = 0; 098 private volatile long totalStaticIndexSize = 0; 099 private volatile long totalStaticBloomSize = 0; 100 private volatile long bloomFilterRequestsCount = 0; 101 private volatile long bloomFilterNegativeResultsCount = 0; 102 private volatile long bloomFilterEligibleRequestsCount = 0; 103 private volatile long numMutationsWithoutWAL = 0; 104 private volatile long dataInMemoryWithoutWAL = 0; 105 private volatile double percentFileLocal = 0; 106 private volatile double percentFileLocalSecondaryRegions = 0; 107 private volatile long flushedCellsCount = 0; 108 private volatile long compactedCellsCount = 0; 109 private volatile long majorCompactedCellsCount = 0; 110 private volatile long flushedCellsSize = 0; 111 private volatile long compactedCellsSize = 0; 112 private volatile long majorCompactedCellsSize = 0; 113 private volatile long cellsCountCompactedToMob = 0; 114 private volatile long cellsCountCompactedFromMob = 0; 115 private volatile long cellsSizeCompactedToMob = 0; 116 private volatile long cellsSizeCompactedFromMob = 0; 117 private volatile long mobFlushCount = 0; 118 private volatile long mobFlushedCellsCount = 0; 119 private volatile long mobFlushedCellsSize = 0; 120 private volatile long mobScanCellsCount = 0; 121 private volatile long mobScanCellsSize = 0; 122 private volatile long mobFileCacheAccessCount = 0; 123 private volatile long mobFileCacheMissCount = 0; 124 private volatile double mobFileCacheHitRatio = 0; 125 private volatile long mobFileCacheEvictedCount = 0; 126 private volatile long mobFileCacheCount = 0; 127 private volatile long blockedRequestsCount = 0L; 128 private volatile long averageRegionSize = 0L; 129 protected final Map<String, ArrayList<Long>> requestsCountCache = 130 new ConcurrentHashMap<String, ArrayList<Long>>(); 131 132 private ScheduledExecutorService executor; 133 private Runnable runnable; 134 private long period; 135 136 /** 137 * Can be null if not on hdfs. 138 */ 139 private DFSHedgedReadMetrics dfsHedgedReadMetrics; 140 141 private final ExcludeDatanodeManager excludeDatanodeManager; 142 143 public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) { 144 this.regionServer = regionServer; 145 initBlockCache(); 146 initMobFileCache(); 147 this.excludeDatanodeManager = this.regionServer.getWalFactory().getExcludeDatanodeManager(); 148 149 this.period = regionServer.getConfiguration().getLong(HConstants.REGIONSERVER_METRICS_PERIOD, 150 HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD); 151 152 this.executor = CompatibilitySingletonFactory.getInstance(MetricsExecutor.class).getExecutor(); 153 this.runnable = new RegionServerMetricsWrapperRunnable(); 154 this.executor.scheduleWithFixedDelay(this.runnable, this.period, this.period, 155 TimeUnit.MILLISECONDS); 156 this.metricsWALSource = CompatibilitySingletonFactory.getInstance(MetricsWALSource.class); 157 this.allocator = regionServer.getRpcServer().getByteBuffAllocator(); 158 159 try { 160 this.dfsHedgedReadMetrics = FSUtils.getDFSHedgedReadMetrics(regionServer.getConfiguration()); 161 } catch (IOException e) { 162 LOG.warn("Failed to get hedged metrics", e); 163 } 164 if (LOG.isInfoEnabled()) { 165 LOG.info("Computing regionserver metrics every " + this.period + " milliseconds"); 166 } 167 } 168 169 private void initBlockCache() { 170 this.blockCache = this.regionServer.getBlockCache().orElse(null); 171 this.cacheStats = this.blockCache != null ? this.blockCache.getStats() : null; 172 if (this.cacheStats != null) { 173 if (this.cacheStats instanceof CombinedBlockCache.CombinedCacheStats) { 174 l1Stats = ((CombinedBlockCache.CombinedCacheStats) this.cacheStats).getLruCacheStats(); 175 l2Stats = ((CombinedBlockCache.CombinedCacheStats) this.cacheStats).getBucketCacheStats(); 176 } else { 177 l1Stats = this.cacheStats; 178 } 179 } 180 if (this.blockCache != null) { 181 if (this.blockCache instanceof CombinedBlockCache) { 182 l1Cache = ((CombinedBlockCache) this.blockCache).getFirstLevelCache(); 183 l2Cache = ((CombinedBlockCache) this.blockCache).getSecondLevelCache(); 184 } else { 185 l1Cache = this.blockCache; 186 } 187 } 188 } 189 190 /** 191 * Initializes the mob file cache. 192 */ 193 private void initMobFileCache() { 194 this.mobFileCache = this.regionServer.getMobFileCache().orElse(null); 195 } 196 197 @Override 198 public String getClusterId() { 199 return regionServer.getClusterId(); 200 } 201 202 @Override 203 public long getStartCode() { 204 return regionServer.getStartcode(); 205 } 206 207 @Override 208 public String getZookeeperQuorum() { 209 ZKWatcher zk = regionServer.getZooKeeper(); 210 if (zk == null) { 211 return ""; 212 } 213 return zk.getQuorum(); 214 } 215 216 @Override 217 public String getCoprocessors() { 218 String[] coprocessors = regionServer.getRegionServerCoprocessors(); 219 if (coprocessors == null || coprocessors.length == 0) { 220 return ""; 221 } 222 return StringUtils.join(coprocessors, ", "); 223 } 224 225 @Override 226 public String getServerName() { 227 ServerName serverName = regionServer.getServerName(); 228 if (serverName == null) { 229 return ""; 230 } 231 return serverName.getServerName(); 232 } 233 234 @Override 235 public long getNumOnlineRegions() { 236 Collection<HRegion> onlineRegionsLocalContext = regionServer.getOnlineRegionsLocalContext(); 237 if (onlineRegionsLocalContext == null) { 238 return 0; 239 } 240 return onlineRegionsLocalContext.size(); 241 } 242 243 @Override 244 public long getTotalRequestCount() { 245 return regionServer.rpcServices.requestCount.sum(); 246 } 247 248 @Override 249 public long getTotalRowActionRequestCount() { 250 return readRequestsCount + writeRequestsCount; 251 } 252 253 @Override 254 public int getSplitQueueSize() { 255 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 256 return compactSplit == null ? 0 : compactSplit.getSplitQueueSize(); 257 } 258 259 @Override 260 public int getCompactionQueueSize() { 261 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 262 return compactSplit == null ? 0 : compactSplit.getCompactionQueueSize(); 263 } 264 265 @Override 266 public int getSmallCompactionQueueSize() { 267 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 268 return compactSplit == null ? 0 : compactSplit.getSmallCompactionQueueSize(); 269 } 270 271 @Override 272 public int getLargeCompactionQueueSize() { 273 final CompactSplit compactSplit = regionServer.getCompactSplitThread(); 274 return compactSplit == null ? 0 : compactSplit.getLargeCompactionQueueSize(); 275 } 276 277 @Override 278 public int getFlushQueueSize() { 279 // If there is no flusher there should be no queue. 280 if (this.regionServer.getMemStoreFlusher() == null) { 281 return 0; 282 } 283 return this.regionServer.getMemStoreFlusher().getFlushQueueSize(); 284 } 285 286 @Override 287 public long getBlockCacheCount() { 288 return this.blockCache != null ? this.blockCache.getBlockCount() : 0L; 289 } 290 291 @Override 292 public long getBlockCacheDataBlockCount() { 293 return this.blockCache != null ? this.blockCache.getDataBlockCount() : 0L; 294 } 295 296 @Override 297 public long getMemStoreLimit() { 298 return this.regionServer.getRegionServerAccounting().getGlobalMemStoreLimit(); 299 } 300 301 @Override 302 public long getOnHeapMemStoreLimit() { 303 return this.regionServer.getRegionServerAccounting().getGlobalOnHeapMemStoreLimit(); 304 } 305 306 @Override 307 public long getOffHeapMemStoreLimit() { 308 return this.regionServer.getRegionServerAccounting().getGlobalOffHeapMemStoreLimit(); 309 } 310 311 @Override 312 public long getBlockCacheSize() { 313 return this.blockCache != null ? this.blockCache.getCurrentSize() : 0L; 314 } 315 316 @Override 317 public long getBlockCacheFreeSize() { 318 return this.blockCache != null ? this.blockCache.getFreeSize() : 0L; 319 } 320 321 @Override 322 public long getBlockCacheHitCount() { 323 return this.cacheStats != null ? this.cacheStats.getHitCount() : 0L; 324 } 325 326 @Override 327 public long getBlockCachePrimaryHitCount() { 328 return this.cacheStats != null ? this.cacheStats.getPrimaryHitCount() : 0L; 329 } 330 331 @Override 332 public long getBlockCacheMissCount() { 333 return this.cacheStats != null ? this.cacheStats.getMissCount() : 0L; 334 } 335 336 @Override 337 public long getBlockCachePrimaryMissCount() { 338 return this.cacheStats != null ? this.cacheStats.getPrimaryMissCount() : 0L; 339 } 340 341 @Override 342 public long getBlockCacheEvictedCount() { 343 return this.cacheStats != null ? this.cacheStats.getEvictedCount() : 0L; 344 } 345 346 @Override 347 public long getBlockCachePrimaryEvictedCount() { 348 return this.cacheStats != null ? this.cacheStats.getPrimaryEvictedCount() : 0L; 349 } 350 351 @Override 352 public double getBlockCacheHitPercent() { 353 double ratio = this.cacheStats != null ? this.cacheStats.getHitRatio() : 0.0; 354 if (Double.isNaN(ratio)) { 355 ratio = 0; 356 } 357 return (ratio * 100); 358 } 359 360 @Override 361 public double getBlockCacheHitCachingPercent() { 362 double ratio = this.cacheStats != null ? this.cacheStats.getHitCachingRatio() : 0.0; 363 if (Double.isNaN(ratio)) { 364 ratio = 0; 365 } 366 return (ratio * 100); 367 } 368 369 @Override 370 public long getBlockCacheFailedInsertions() { 371 return this.cacheStats != null ? this.cacheStats.getFailedInserts() : 0L; 372 } 373 374 public long getL1CacheSize() { 375 return this.l1Cache != null ? this.l1Cache.getCurrentSize() : 0L; 376 } 377 378 public long getL1CacheFreeSize() { 379 return this.l1Cache != null ? this.l1Cache.getFreeSize() : 0L; 380 } 381 382 public long getL1CacheCount() { 383 return this.l1Cache != null ? this.l1Cache.getBlockCount() : 0L; 384 } 385 386 public long getL1CacheEvictedCount() { 387 return this.l1Stats != null ? this.l1Stats.getEvictedCount() : 0L; 388 } 389 390 public long getL2CacheSize() { 391 return this.l2Cache != null ? this.l2Cache.getCurrentSize() : 0L; 392 } 393 394 public long getL2CacheFreeSize() { 395 return this.l2Cache != null ? this.l2Cache.getFreeSize() : 0L; 396 } 397 398 public long getL2CacheCount() { 399 return this.l2Cache != null ? this.l2Cache.getBlockCount() : 0L; 400 } 401 402 public long getL2CacheEvictedCount() { 403 return this.l2Stats != null ? this.l2Stats.getEvictedCount() : 0L; 404 } 405 406 @Override 407 public long getL1CacheHitCount() { 408 return this.l1Stats != null ? this.l1Stats.getHitCount() : 0L; 409 } 410 411 @Override 412 public long getL1CacheMissCount() { 413 return this.l1Stats != null ? this.l1Stats.getMissCount() : 0L; 414 } 415 416 @Override 417 public double getL1CacheHitRatio() { 418 return this.l1Stats != null ? this.l1Stats.getHitRatio() : 0.0; 419 } 420 421 @Override 422 public double getL1CacheMissRatio() { 423 return this.l1Stats != null ? this.l1Stats.getMissRatio() : 0.0; 424 } 425 426 @Override 427 public long getL2CacheHitCount() { 428 return this.l2Stats != null ? this.l2Stats.getHitCount() : 0L; 429 } 430 431 @Override 432 public long getL2CacheMissCount() { 433 return this.l2Stats != null ? this.l2Stats.getMissCount() : 0L; 434 } 435 436 @Override 437 public double getL2CacheHitRatio() { 438 return this.l2Stats != null ? this.l2Stats.getHitRatio() : 0.0; 439 } 440 441 @Override 442 public double getL2CacheMissRatio() { 443 return this.l2Stats != null ? this.l2Stats.getMissRatio() : 0.0; 444 } 445 446 @Override 447 public void forceRecompute() { 448 this.runnable.run(); 449 } 450 451 @Override 452 public long getNumStores() { 453 return numStores; 454 } 455 456 @Override 457 public long getNumWALFiles() { 458 return numWALFiles; 459 } 460 461 @Override 462 public long getWALFileSize() { 463 return walFileSize; 464 } 465 466 @Override 467 public List<String> getWALExcludeDNs() { 468 if (excludeDatanodeManager == null) { 469 return Collections.emptyList(); 470 } 471 return excludeDatanodeManager.getExcludeDNs().entrySet().stream() 472 .map(e -> e.getKey().toString() + ", " + e.getValue()).collect(Collectors.toList()); 473 } 474 475 @Override 476 public long getNumWALSlowAppend() { 477 return metricsWALSource.getSlowAppendCount(); 478 } 479 480 @Override 481 public long getNumStoreFiles() { 482 return numStoreFiles; 483 } 484 485 @Override 486 public long getMaxStoreFiles() { 487 return maxStoreFileCount; 488 } 489 490 @Override 491 public long getMaxStoreFileAge() { 492 return maxStoreFileAge; 493 } 494 495 @Override 496 public long getMinStoreFileAge() { 497 return minStoreFileAge; 498 } 499 500 @Override 501 public long getAvgStoreFileAge() { 502 return avgStoreFileAge; 503 } 504 505 @Override 506 public long getNumReferenceFiles() { 507 return numReferenceFiles; 508 } 509 510 @Override 511 public long getMemStoreSize() { 512 return memstoreSize; 513 } 514 515 @Override 516 public long getOnHeapMemStoreSize() { 517 return onHeapMemstoreSize; 518 } 519 520 @Override 521 public long getOffHeapMemStoreSize() { 522 return offHeapMemstoreSize; 523 } 524 525 @Override 526 public long getStoreFileSize() { 527 return storeFileSize; 528 } 529 530 @Override 531 public double getRequestsPerSecond() { 532 return requestsPerSecond; 533 } 534 535 @Override 536 public long getReadRequestsCount() { 537 return readRequestsCount; 538 } 539 540 @Override 541 public double getReadRequestsRatePerSecond() { 542 return readRequestsRatePerSecond; 543 } 544 545 @Override 546 public long getFilteredReadRequestsCount() { 547 return filteredReadRequestsCount; 548 } 549 550 @Override 551 public long getWriteRequestsCount() { 552 return writeRequestsCount; 553 } 554 555 @Override 556 public double getWriteRequestsRatePerSecond() { 557 return writeRequestsRatePerSecond; 558 } 559 560 @Override 561 public long getRpcGetRequestsCount() { 562 return regionServer.rpcServices.rpcGetRequestCount.sum(); 563 } 564 565 @Override 566 public long getRpcScanRequestsCount() { 567 return regionServer.rpcServices.rpcScanRequestCount.sum(); 568 } 569 570 @Override 571 public long getRpcFullScanRequestsCount() { 572 return regionServer.rpcServices.rpcFullScanRequestCount.sum(); 573 } 574 575 @Override 576 public long getRpcMultiRequestsCount() { 577 return regionServer.rpcServices.rpcMultiRequestCount.sum(); 578 } 579 580 @Override 581 public long getRpcMutateRequestsCount() { 582 return regionServer.rpcServices.rpcMutateRequestCount.sum(); 583 } 584 585 @Override 586 public long getCheckAndMutateChecksFailed() { 587 return checkAndMutateChecksFailed; 588 } 589 590 @Override 591 public long getCheckAndMutateChecksPassed() { 592 return checkAndMutateChecksPassed; 593 } 594 595 @Override 596 public long getStoreFileIndexSize() { 597 return storefileIndexSize; 598 } 599 600 @Override 601 public long getTotalStaticIndexSize() { 602 return totalStaticIndexSize; 603 } 604 605 @Override 606 public long getTotalStaticBloomSize() { 607 return totalStaticBloomSize; 608 } 609 610 @Override 611 public long getBloomFilterRequestsCount() { 612 return bloomFilterRequestsCount; 613 } 614 615 @Override 616 public long getBloomFilterNegativeResultsCount() { 617 return bloomFilterNegativeResultsCount; 618 } 619 620 @Override 621 public long getBloomFilterEligibleRequestsCount() { 622 return bloomFilterEligibleRequestsCount; 623 } 624 625 @Override 626 public long getNumMutationsWithoutWAL() { 627 return numMutationsWithoutWAL; 628 } 629 630 @Override 631 public long getDataInMemoryWithoutWAL() { 632 return dataInMemoryWithoutWAL; 633 } 634 635 @Override 636 public double getPercentFileLocal() { 637 return percentFileLocal; 638 } 639 640 @Override 641 public double getPercentFileLocalSecondaryRegions() { 642 return percentFileLocalSecondaryRegions; 643 } 644 645 @Override 646 public long getUpdatesBlockedTime() { 647 if (this.regionServer.getMemStoreFlusher() == null) { 648 return 0; 649 } 650 return this.regionServer.getMemStoreFlusher().getUpdatesBlockedMsHighWater().sum(); 651 } 652 653 @Override 654 public long getFlushedCellsCount() { 655 return flushedCellsCount; 656 } 657 658 @Override 659 public long getCompactedCellsCount() { 660 return compactedCellsCount; 661 } 662 663 @Override 664 public long getMajorCompactedCellsCount() { 665 return majorCompactedCellsCount; 666 } 667 668 @Override 669 public long getFlushedCellsSize() { 670 return flushedCellsSize; 671 } 672 673 @Override 674 public long getCompactedCellsSize() { 675 return compactedCellsSize; 676 } 677 678 @Override 679 public long getMajorCompactedCellsSize() { 680 return majorCompactedCellsSize; 681 } 682 683 @Override 684 public long getCellsCountCompactedFromMob() { 685 return cellsCountCompactedFromMob; 686 } 687 688 @Override 689 public long getCellsCountCompactedToMob() { 690 return cellsCountCompactedToMob; 691 } 692 693 @Override 694 public long getCellsSizeCompactedFromMob() { 695 return cellsSizeCompactedFromMob; 696 } 697 698 @Override 699 public long getCellsSizeCompactedToMob() { 700 return cellsSizeCompactedToMob; 701 } 702 703 @Override 704 public long getMobFlushCount() { 705 return mobFlushCount; 706 } 707 708 @Override 709 public long getMobFlushedCellsCount() { 710 return mobFlushedCellsCount; 711 } 712 713 @Override 714 public long getMobFlushedCellsSize() { 715 return mobFlushedCellsSize; 716 } 717 718 @Override 719 public long getMobScanCellsCount() { 720 return mobScanCellsCount; 721 } 722 723 @Override 724 public long getMobScanCellsSize() { 725 return mobScanCellsSize; 726 } 727 728 @Override 729 public long getMobFileCacheAccessCount() { 730 return mobFileCacheAccessCount; 731 } 732 733 @Override 734 public long getMobFileCacheMissCount() { 735 return mobFileCacheMissCount; 736 } 737 738 @Override 739 public long getMobFileCacheCount() { 740 return mobFileCacheCount; 741 } 742 743 @Override 744 public long getMobFileCacheEvictedCount() { 745 return mobFileCacheEvictedCount; 746 } 747 748 @Override 749 public double getMobFileCacheHitPercent() { 750 return mobFileCacheHitRatio * 100; 751 } 752 753 @Override 754 public int getActiveScanners() { 755 return regionServer.getRSRpcServices().getScannersCount(); 756 } 757 758 /** 759 * This is the runnable that will be executed on the executor every PERIOD number of seconds It 760 * will take metrics/numbers from all of the regions and use them to compute point in time 761 * metrics. 762 */ 763 public class RegionServerMetricsWrapperRunnable implements Runnable { 764 765 private long lastRan = 0; 766 767 @Override 768 synchronized public void run() { 769 try { 770 HDFSBlocksDistribution hdfsBlocksDistribution = new HDFSBlocksDistribution(); 771 HDFSBlocksDistribution hdfsBlocksDistributionSecondaryRegions = 772 new HDFSBlocksDistribution(); 773 774 long tempNumStores = 0, tempNumStoreFiles = 0, tempStoreFileSize = 0; 775 long tempMemstoreSize = 0, tempOnHeapMemstoreSize = 0, tempOffHeapMemstoreSize = 0; 776 long tempMaxStoreFileAge = 0, tempNumReferenceFiles = 0; 777 long tempMaxStoreFileCount = 0; 778 long avgAgeNumerator = 0, numHFiles = 0; 779 long tempMinStoreFileAge = Long.MAX_VALUE; 780 long tempReadRequestsCount = 0, tempFilteredReadRequestsCount = 0, 781 tempWriteRequestsCount = 0; 782 long tempCheckAndMutateChecksFailed = 0; 783 long tempCheckAndMutateChecksPassed = 0; 784 long tempStorefileIndexSize = 0; 785 long tempTotalStaticIndexSize = 0; 786 long tempTotalStaticBloomSize = 0; 787 long tempBloomFilterRequestsCount = 0; 788 long tempBloomFilterNegativeResultsCount = 0; 789 long tempBloomFilterEligibleRequestsCount = 0; 790 long tempNumMutationsWithoutWAL = 0; 791 long tempDataInMemoryWithoutWAL = 0; 792 double tempPercentFileLocal = 0; 793 double tempPercentFileLocalSecondaryRegions = 0; 794 long tempFlushedCellsCount = 0; 795 long tempCompactedCellsCount = 0; 796 long tempMajorCompactedCellsCount = 0; 797 long tempFlushedCellsSize = 0; 798 long tempCompactedCellsSize = 0; 799 long tempMajorCompactedCellsSize = 0; 800 long tempCellsCountCompactedToMob = 0; 801 long tempCellsCountCompactedFromMob = 0; 802 long tempCellsSizeCompactedToMob = 0; 803 long tempCellsSizeCompactedFromMob = 0; 804 long tempMobFlushCount = 0; 805 long tempMobFlushedCellsCount = 0; 806 long tempMobFlushedCellsSize = 0; 807 long tempMobScanCellsCount = 0; 808 long tempMobScanCellsSize = 0; 809 long tempBlockedRequestsCount = 0; 810 int regionCount = 0; 811 812 long currentReadRequestsCount = 0; 813 long currentWriteRequestsCount = 0; 814 long lastReadRequestsCount = 0; 815 long lastWriteRequestsCount = 0; 816 long readRequestsDelta = 0; 817 long writeRequestsDelta = 0; 818 long totalReadRequestsDelta = 0; 819 long totalWriteRequestsDelta = 0; 820 String encodedRegionName; 821 for (HRegion r : regionServer.getOnlineRegionsLocalContext()) { 822 encodedRegionName = r.getRegionInfo().getEncodedName(); 823 currentReadRequestsCount = r.getReadRequestsCount(); 824 currentWriteRequestsCount = r.getWriteRequestsCount(); 825 if (requestsCountCache.containsKey(encodedRegionName)) { 826 lastReadRequestsCount = requestsCountCache.get(encodedRegionName).get(0); 827 lastWriteRequestsCount = requestsCountCache.get(encodedRegionName).get(1); 828 readRequestsDelta = currentReadRequestsCount - lastReadRequestsCount; 829 writeRequestsDelta = currentWriteRequestsCount - lastWriteRequestsCount; 830 totalReadRequestsDelta += readRequestsDelta; 831 totalWriteRequestsDelta += writeRequestsDelta; 832 // Update cache for our next comparision 833 requestsCountCache.get(encodedRegionName).set(0, currentReadRequestsCount); 834 requestsCountCache.get(encodedRegionName).set(1, currentWriteRequestsCount); 835 } else { 836 // List[0] -> readRequestCount 837 // List[1] -> writeRequestCount 838 ArrayList<Long> requests = new ArrayList<Long>(2); 839 requests.add(currentReadRequestsCount); 840 requests.add(currentWriteRequestsCount); 841 requestsCountCache.put(encodedRegionName, requests); 842 totalReadRequestsDelta += currentReadRequestsCount; 843 totalWriteRequestsDelta += currentWriteRequestsCount; 844 } 845 tempNumMutationsWithoutWAL += r.getNumMutationsWithoutWAL(); 846 tempDataInMemoryWithoutWAL += r.getDataInMemoryWithoutWAL(); 847 tempReadRequestsCount += r.getReadRequestsCount(); 848 tempFilteredReadRequestsCount += r.getFilteredReadRequestsCount(); 849 tempWriteRequestsCount += r.getWriteRequestsCount(); 850 tempCheckAndMutateChecksFailed += r.getCheckAndMutateChecksFailed(); 851 tempCheckAndMutateChecksPassed += r.getCheckAndMutateChecksPassed(); 852 tempBlockedRequestsCount += r.getBlockedRequestsCount(); 853 List<? extends Store> storeList = r.getStores(); 854 tempNumStores += storeList.size(); 855 for (Store store : storeList) { 856 tempNumStoreFiles += store.getStorefilesCount(); 857 tempMemstoreSize += store.getMemStoreSize().getDataSize(); 858 tempOnHeapMemstoreSize += store.getMemStoreSize().getHeapSize(); 859 tempOffHeapMemstoreSize += store.getMemStoreSize().getOffHeapSize(); 860 tempStoreFileSize += store.getStorefilesSize(); 861 862 tempMaxStoreFileCount = Math.max(tempMaxStoreFileCount, store.getStorefilesCount()); 863 864 OptionalLong storeMaxStoreFileAge = store.getMaxStoreFileAge(); 865 if ( 866 storeMaxStoreFileAge.isPresent() 867 && storeMaxStoreFileAge.getAsLong() > tempMaxStoreFileAge 868 ) { 869 tempMaxStoreFileAge = storeMaxStoreFileAge.getAsLong(); 870 } 871 872 OptionalLong storeMinStoreFileAge = store.getMinStoreFileAge(); 873 if ( 874 storeMinStoreFileAge.isPresent() 875 && storeMinStoreFileAge.getAsLong() < tempMinStoreFileAge 876 ) { 877 tempMinStoreFileAge = storeMinStoreFileAge.getAsLong(); 878 } 879 880 long storeHFiles = store.getNumHFiles(); 881 numHFiles += storeHFiles; 882 tempNumReferenceFiles += store.getNumReferenceFiles(); 883 884 OptionalDouble storeAvgStoreFileAge = store.getAvgStoreFileAge(); 885 if (storeAvgStoreFileAge.isPresent()) { 886 avgAgeNumerator = 887 (long) (avgAgeNumerator + storeAvgStoreFileAge.getAsDouble() * storeHFiles); 888 } 889 890 tempStorefileIndexSize += store.getStorefilesRootLevelIndexSize(); 891 tempTotalStaticBloomSize += store.getTotalStaticBloomSize(); 892 tempTotalStaticIndexSize += store.getTotalStaticIndexSize(); 893 tempBloomFilterRequestsCount += store.getBloomFilterRequestsCount(); 894 tempBloomFilterNegativeResultsCount += store.getBloomFilterNegativeResultsCount(); 895 tempBloomFilterEligibleRequestsCount += store.getBloomFilterEligibleRequestsCount(); 896 tempFlushedCellsCount += store.getFlushedCellsCount(); 897 tempCompactedCellsCount += store.getCompactedCellsCount(); 898 tempMajorCompactedCellsCount += store.getMajorCompactedCellsCount(); 899 tempFlushedCellsSize += store.getFlushedCellsSize(); 900 tempCompactedCellsSize += store.getCompactedCellsSize(); 901 tempMajorCompactedCellsSize += store.getMajorCompactedCellsSize(); 902 if (store instanceof HMobStore) { 903 HMobStore mobStore = (HMobStore) store; 904 tempCellsCountCompactedToMob += mobStore.getCellsCountCompactedToMob(); 905 tempCellsCountCompactedFromMob += mobStore.getCellsCountCompactedFromMob(); 906 tempCellsSizeCompactedToMob += mobStore.getCellsSizeCompactedToMob(); 907 tempCellsSizeCompactedFromMob += mobStore.getCellsSizeCompactedFromMob(); 908 tempMobFlushCount += mobStore.getMobFlushCount(); 909 tempMobFlushedCellsCount += mobStore.getMobFlushedCellsCount(); 910 tempMobFlushedCellsSize += mobStore.getMobFlushedCellsSize(); 911 tempMobScanCellsCount += mobStore.getMobScanCellsCount(); 912 tempMobScanCellsSize += mobStore.getMobScanCellsSize(); 913 } 914 } 915 916 HDFSBlocksDistribution distro = r.getHDFSBlocksDistribution(); 917 hdfsBlocksDistribution.add(distro); 918 if (r.getRegionInfo().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) { 919 hdfsBlocksDistributionSecondaryRegions.add(distro); 920 } 921 regionCount++; 922 } 923 float localityIndex = 924 hdfsBlocksDistribution.getBlockLocalityIndex(regionServer.getServerName().getHostname()); 925 tempPercentFileLocal = Double.isNaN(tempBlockedRequestsCount) ? 0 : (localityIndex * 100); 926 927 float localityIndexSecondaryRegions = hdfsBlocksDistributionSecondaryRegions 928 .getBlockLocalityIndex(regionServer.getServerName().getHostname()); 929 tempPercentFileLocalSecondaryRegions = 930 Double.isNaN(localityIndexSecondaryRegions) ? 0 : (localityIndexSecondaryRegions * 100); 931 932 // Compute the number of requests per second 933 long currentTime = EnvironmentEdgeManager.currentTime(); 934 935 // assume that it took PERIOD seconds to start the executor. 936 // this is a guess but it's a pretty good one. 937 if (lastRan == 0) { 938 lastRan = currentTime - period; 939 } 940 // If we've time traveled keep the last requests per second. 941 if ((currentTime - lastRan) > 0) { 942 requestsPerSecond = 943 (totalReadRequestsDelta + totalWriteRequestsDelta) / ((currentTime - lastRan) / 1000.0); 944 945 double readRequestsRatePerMilliSecond = (double) totalReadRequestsDelta / period; 946 double writeRequestsRatePerMilliSecond = (double) totalWriteRequestsDelta / period; 947 948 readRequestsRatePerSecond = readRequestsRatePerMilliSecond * 1000.0; 949 writeRequestsRatePerSecond = writeRequestsRatePerMilliSecond * 1000.0; 950 } 951 lastRan = currentTime; 952 953 final WALProvider provider = regionServer.getWalFactory().getWALProvider(); 954 final WALProvider metaProvider = regionServer.getWalFactory().getMetaWALProvider(); 955 numWALFiles = (provider == null ? 0 : provider.getNumLogFiles()) 956 + (metaProvider == null ? 0 : metaProvider.getNumLogFiles()); 957 walFileSize = (provider == null ? 0 : provider.getLogFileSize()) 958 + (metaProvider == null ? 0 : metaProvider.getLogFileSize()); 959 // Copy over computed values so that no thread sees half computed values. 960 numStores = tempNumStores; 961 numStoreFiles = tempNumStoreFiles; 962 memstoreSize = tempMemstoreSize; 963 onHeapMemstoreSize = tempOnHeapMemstoreSize; 964 offHeapMemstoreSize = tempOffHeapMemstoreSize; 965 storeFileSize = tempStoreFileSize; 966 maxStoreFileCount = tempMaxStoreFileCount; 967 maxStoreFileAge = tempMaxStoreFileAge; 968 if (regionCount > 0) { 969 averageRegionSize = (memstoreSize + storeFileSize) / regionCount; 970 } 971 if (tempMinStoreFileAge != Long.MAX_VALUE) { 972 minStoreFileAge = tempMinStoreFileAge; 973 } 974 975 if (numHFiles != 0) { 976 avgStoreFileAge = avgAgeNumerator / numHFiles; 977 } 978 979 numReferenceFiles = tempNumReferenceFiles; 980 readRequestsCount = tempReadRequestsCount; 981 filteredReadRequestsCount = tempFilteredReadRequestsCount; 982 writeRequestsCount = tempWriteRequestsCount; 983 checkAndMutateChecksFailed = tempCheckAndMutateChecksFailed; 984 checkAndMutateChecksPassed = tempCheckAndMutateChecksPassed; 985 storefileIndexSize = tempStorefileIndexSize; 986 totalStaticIndexSize = tempTotalStaticIndexSize; 987 totalStaticBloomSize = tempTotalStaticBloomSize; 988 bloomFilterRequestsCount = tempBloomFilterRequestsCount; 989 bloomFilterNegativeResultsCount = tempBloomFilterNegativeResultsCount; 990 bloomFilterEligibleRequestsCount = tempBloomFilterEligibleRequestsCount; 991 numMutationsWithoutWAL = tempNumMutationsWithoutWAL; 992 dataInMemoryWithoutWAL = tempDataInMemoryWithoutWAL; 993 percentFileLocal = tempPercentFileLocal; 994 percentFileLocalSecondaryRegions = tempPercentFileLocalSecondaryRegions; 995 flushedCellsCount = tempFlushedCellsCount; 996 compactedCellsCount = tempCompactedCellsCount; 997 majorCompactedCellsCount = tempMajorCompactedCellsCount; 998 flushedCellsSize = tempFlushedCellsSize; 999 compactedCellsSize = tempCompactedCellsSize; 1000 majorCompactedCellsSize = tempMajorCompactedCellsSize; 1001 cellsCountCompactedToMob = tempCellsCountCompactedToMob; 1002 cellsCountCompactedFromMob = tempCellsCountCompactedFromMob; 1003 cellsSizeCompactedToMob = tempCellsSizeCompactedToMob; 1004 cellsSizeCompactedFromMob = tempCellsSizeCompactedFromMob; 1005 mobFlushCount = tempMobFlushCount; 1006 mobFlushedCellsCount = tempMobFlushedCellsCount; 1007 mobFlushedCellsSize = tempMobFlushedCellsSize; 1008 mobScanCellsCount = tempMobScanCellsCount; 1009 mobScanCellsSize = tempMobScanCellsSize; 1010 mobFileCacheAccessCount = mobFileCache != null ? mobFileCache.getAccessCount() : 0L; 1011 mobFileCacheMissCount = mobFileCache != null ? mobFileCache.getMissCount() : 0L; 1012 mobFileCacheHitRatio = mobFileCache != null ? mobFileCache.getHitRatio() : 0.0; 1013 if (Double.isNaN(mobFileCacheHitRatio)) { 1014 mobFileCacheHitRatio = 0.0; 1015 } 1016 mobFileCacheEvictedCount = mobFileCache != null ? mobFileCache.getEvictedFileCount() : 0L; 1017 mobFileCacheCount = mobFileCache != null ? mobFileCache.getCacheSize() : 0; 1018 blockedRequestsCount = tempBlockedRequestsCount; 1019 } catch (Throwable e) { 1020 LOG.warn("Caught exception! Will suppress and retry.", e); 1021 } 1022 } 1023 } 1024 1025 @Override 1026 public long getHedgedReadOps() { 1027 return this.dfsHedgedReadMetrics == null ? 0 : this.dfsHedgedReadMetrics.getHedgedReadOps(); 1028 } 1029 1030 @Override 1031 public long getHedgedReadWins() { 1032 return this.dfsHedgedReadMetrics == null ? 0 : this.dfsHedgedReadMetrics.getHedgedReadWins(); 1033 } 1034 1035 @Override 1036 public long getHedgedReadOpsInCurThread() { 1037 return this.dfsHedgedReadMetrics == null 1038 ? 0 1039 : this.dfsHedgedReadMetrics.getHedgedReadOpsInCurThread(); 1040 } 1041 1042 @Override 1043 public long getTotalBytesRead() { 1044 return FSDataInputStreamWrapper.getTotalBytesRead(); 1045 } 1046 1047 @Override 1048 public long getLocalBytesRead() { 1049 return FSDataInputStreamWrapper.getLocalBytesRead(); 1050 } 1051 1052 @Override 1053 public long getShortCircuitBytesRead() { 1054 return FSDataInputStreamWrapper.getShortCircuitBytesRead(); 1055 } 1056 1057 @Override 1058 public long getZeroCopyBytesRead() { 1059 return FSDataInputStreamWrapper.getZeroCopyBytesRead(); 1060 } 1061 1062 @Override 1063 public long getBlockedRequestsCount() { 1064 return blockedRequestsCount; 1065 } 1066 1067 @Override 1068 public long getAverageRegionSize() { 1069 return averageRegionSize; 1070 } 1071 1072 @Override 1073 public long getDataMissCount() { 1074 return this.cacheStats != null ? this.cacheStats.getDataMissCount() : 0L; 1075 } 1076 1077 @Override 1078 public long getLeafIndexMissCount() { 1079 return this.cacheStats != null ? this.cacheStats.getLeafIndexMissCount() : 0L; 1080 } 1081 1082 @Override 1083 public long getBloomChunkMissCount() { 1084 return this.cacheStats != null ? this.cacheStats.getBloomChunkMissCount() : 0L; 1085 } 1086 1087 @Override 1088 public long getMetaMissCount() { 1089 return this.cacheStats != null ? this.cacheStats.getMetaMissCount() : 0L; 1090 } 1091 1092 @Override 1093 public long getRootIndexMissCount() { 1094 return this.cacheStats != null ? this.cacheStats.getRootIndexMissCount() : 0L; 1095 } 1096 1097 @Override 1098 public long getIntermediateIndexMissCount() { 1099 return this.cacheStats != null ? this.cacheStats.getIntermediateIndexMissCount() : 0L; 1100 } 1101 1102 @Override 1103 public long getFileInfoMissCount() { 1104 return this.cacheStats != null ? this.cacheStats.getFileInfoMissCount() : 0L; 1105 } 1106 1107 @Override 1108 public long getGeneralBloomMetaMissCount() { 1109 return this.cacheStats != null ? this.cacheStats.getGeneralBloomMetaMissCount() : 0L; 1110 } 1111 1112 @Override 1113 public long getDeleteFamilyBloomMissCount() { 1114 return this.cacheStats != null ? this.cacheStats.getDeleteFamilyBloomMissCount() : 0L; 1115 } 1116 1117 @Override 1118 public long getTrailerMissCount() { 1119 return this.cacheStats != null ? this.cacheStats.getTrailerMissCount() : 0L; 1120 } 1121 1122 @Override 1123 public long getDataHitCount() { 1124 return this.cacheStats != null ? this.cacheStats.getDataHitCount() : 0L; 1125 } 1126 1127 @Override 1128 public long getLeafIndexHitCount() { 1129 return this.cacheStats != null ? this.cacheStats.getLeafIndexHitCount() : 0L; 1130 } 1131 1132 @Override 1133 public long getBloomChunkHitCount() { 1134 return this.cacheStats != null ? this.cacheStats.getBloomChunkHitCount() : 0L; 1135 } 1136 1137 @Override 1138 public long getMetaHitCount() { 1139 return this.cacheStats != null ? this.cacheStats.getMetaHitCount() : 0L; 1140 } 1141 1142 @Override 1143 public long getRootIndexHitCount() { 1144 return this.cacheStats != null ? this.cacheStats.getRootIndexHitCount() : 0L; 1145 } 1146 1147 @Override 1148 public long getIntermediateIndexHitCount() { 1149 return this.cacheStats != null ? this.cacheStats.getIntermediateIndexHitCount() : 0L; 1150 } 1151 1152 @Override 1153 public long getFileInfoHitCount() { 1154 return this.cacheStats != null ? this.cacheStats.getFileInfoHitCount() : 0L; 1155 } 1156 1157 @Override 1158 public long getGeneralBloomMetaHitCount() { 1159 return this.cacheStats != null ? this.cacheStats.getGeneralBloomMetaHitCount() : 0L; 1160 } 1161 1162 @Override 1163 public long getDeleteFamilyBloomHitCount() { 1164 return this.cacheStats != null ? this.cacheStats.getDeleteFamilyBloomHitCount() : 0L; 1165 } 1166 1167 @Override 1168 public long getTrailerHitCount() { 1169 return this.cacheStats != null ? this.cacheStats.getTrailerHitCount() : 0L; 1170 } 1171 1172 @Override 1173 public long getByteBuffAllocatorHeapAllocationBytes() { 1174 return ByteBuffAllocator.getHeapAllocationBytes(allocator, ByteBuffAllocator.HEAP); 1175 } 1176 1177 @Override 1178 public long getByteBuffAllocatorPoolAllocationBytes() { 1179 return this.allocator.getPoolAllocationBytes(); 1180 } 1181 1182 @Override 1183 public double getByteBuffAllocatorHeapAllocRatio() { 1184 return ByteBuffAllocator.getHeapAllocationRatio(allocator, ByteBuffAllocator.HEAP); 1185 } 1186 1187 @Override 1188 public long getByteBuffAllocatorTotalBufferCount() { 1189 return this.allocator.getTotalBufferCount(); 1190 } 1191 1192 @Override 1193 public long getByteBuffAllocatorUsedBufferCount() { 1194 return this.allocator.getUsedBufferCount(); 1195 } 1196}