View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.Collection;
22  import java.util.List;
23  import java.util.concurrent.ScheduledExecutorService;
24  import java.util.concurrent.TimeUnit;
25  
26  import org.apache.commons.lang.StringUtils;
27  import org.apache.commons.logging.Log;
28  import org.apache.commons.logging.LogFactory;
29  import org.apache.hadoop.hbase.classification.InterfaceAudience;
30  import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
31  import org.apache.hadoop.hbase.HConstants;
32  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
33  import org.apache.hadoop.hbase.HRegionInfo;
34  import org.apache.hadoop.hbase.ServerName;
35  import org.apache.hadoop.hbase.io.hfile.BlockCache;
36  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
37  import org.apache.hadoop.hbase.io.hfile.CacheStats;
38  import org.apache.hadoop.hbase.wal.BoundedRegionGroupingProvider;
39  import org.apache.hadoop.hbase.wal.DefaultWALProvider;
40  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
41  import org.apache.hadoop.hbase.util.FSUtils;
42  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
43  import org.apache.hadoop.hdfs.DFSHedgedReadMetrics;
44  import org.apache.hadoop.metrics2.MetricsExecutor;
45  
46  /**
47   * Impl for exposing HRegionServer Information through Hadoop's metrics 2 system.
48   */
49  @InterfaceAudience.Private
50  class MetricsRegionServerWrapperImpl
51      implements MetricsRegionServerWrapper {
52  
53    private static final Log LOG = LogFactory.getLog(MetricsRegionServerWrapperImpl.class);
54  
55    private final HRegionServer regionServer;
56  
57    private BlockCache blockCache;
58  
59    private volatile long numStores = 0;
60    private volatile long numWALFiles = 0;
61    private volatile long walFileSize = 0;
62    private volatile long numStoreFiles = 0;
63    private volatile long memstoreSize = 0;
64    private volatile long storeFileSize = 0;
65    private volatile double requestsPerSecond = 0.0;
66    private volatile long readRequestsCount = 0;
67    private volatile long writeRequestsCount = 0;
68    private volatile long checkAndMutateChecksFailed = 0;
69    private volatile long checkAndMutateChecksPassed = 0;
70    private volatile long storefileIndexSize = 0;
71    private volatile long totalStaticIndexSize = 0;
72    private volatile long totalStaticBloomSize = 0;
73    private volatile long numMutationsWithoutWAL = 0;
74    private volatile long dataInMemoryWithoutWAL = 0;
75    private volatile int percentFileLocal = 0;
76    private volatile int percentFileLocalSecondaryRegions = 0;
77    private volatile long flushedCellsCount = 0;
78    private volatile long compactedCellsCount = 0;
79    private volatile long majorCompactedCellsCount = 0;
80    private volatile long flushedCellsSize = 0;
81    private volatile long compactedCellsSize = 0;
82    private volatile long majorCompactedCellsSize = 0;
83    private volatile long blockedRequestsCount = 0L;
84  
85    private CacheStats cacheStats;
86    private ScheduledExecutorService executor;
87    private Runnable runnable;
88    private long period;
89  
90    /**
91     * Can be null if not on hdfs.
92     */
93    private DFSHedgedReadMetrics dfsHedgedReadMetrics;
94  
95    public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) {
96      this.regionServer = regionServer;
97      initBlockCache();
98  
99      this.period =
100         regionServer.conf.getLong(HConstants.REGIONSERVER_METRICS_PERIOD,
101           HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD);
102 
103     this.executor = CompatibilitySingletonFactory.getInstance(MetricsExecutor.class).getExecutor();
104     this.runnable = new RegionServerMetricsWrapperRunnable();
105     this.executor.scheduleWithFixedDelay(this.runnable, this.period, this.period,
106       TimeUnit.MILLISECONDS);
107 
108     try {
109       this.dfsHedgedReadMetrics = FSUtils.getDFSHedgedReadMetrics(regionServer.getConfiguration());
110     } catch (IOException e) {
111       LOG.warn("Failed to get hedged metrics", e);
112     }
113     if (LOG.isInfoEnabled()) {
114       LOG.info("Computing regionserver metrics every " + this.period + " milliseconds");
115     }
116   }
117 
118   /**
119    * It's possible that due to threading the block cache could not be initialized
120    * yet (testing multiple region servers in one jvm).  So we need to try and initialize
121    * the blockCache and cacheStats reference multiple times until we succeed.
122    */
123   private synchronized  void initBlockCache() {
124     CacheConfig cacheConfig = this.regionServer.cacheConfig;
125     if (cacheConfig != null && this.blockCache == null) {
126       this.blockCache = cacheConfig.getBlockCache();
127     }
128 
129     if (this.blockCache != null && this.cacheStats == null) {
130       this.cacheStats = blockCache.getStats();
131     }
132   }
133 
134   @Override
135   public String getClusterId() {
136     return regionServer.getClusterId();
137   }
138 
139   @Override
140   public long getStartCode() {
141     return regionServer.getStartcode();
142   }
143 
144   @Override
145   public String getZookeeperQuorum() {
146     ZooKeeperWatcher zk = regionServer.getZooKeeper();
147     if (zk == null) {
148       return "";
149     }
150     return zk.getQuorum();
151   }
152 
153   @Override
154   public String getCoprocessors() {
155     String[] coprocessors = regionServer.getRegionServerCoprocessors();
156     if (coprocessors == null || coprocessors.length == 0) {
157       return "";
158     }
159     return StringUtils.join(coprocessors, ", ");
160   }
161 
162   @Override
163   public String getServerName() {
164     ServerName serverName = regionServer.getServerName();
165     if (serverName == null) {
166       return "";
167     }
168     return serverName.getServerName();
169   }
170 
171   @Override
172   public long getNumOnlineRegions() {
173     Collection<Region> onlineRegionsLocalContext = regionServer.getOnlineRegionsLocalContext();
174     if (onlineRegionsLocalContext == null) {
175       return 0;
176     }
177     return onlineRegionsLocalContext.size();
178   }
179 
180   @Override
181   public long getTotalRequestCount() {
182     return regionServer.rpcServices.requestCount.get();
183   }
184 
185   @Override
186   public int getSplitQueueSize() {
187     if (this.regionServer.compactSplitThread == null) {
188       return 0;
189     }
190     return this.regionServer.compactSplitThread.getSplitQueueSize();
191   }
192 
193   @Override
194   public int getCompactionQueueSize() {
195     //The thread could be zero.  if so assume there is no queue.
196     if (this.regionServer.compactSplitThread == null) {
197       return 0;
198     }
199     return this.regionServer.compactSplitThread.getCompactionQueueSize();
200   }
201 
202   @Override
203   public int getSmallCompactionQueueSize() {
204     //The thread could be zero.  if so assume there is no queue.
205     if (this.regionServer.compactSplitThread == null) {
206       return 0;
207     }
208     return this.regionServer.compactSplitThread.getSmallCompactionQueueSize();
209   }
210 
211   @Override
212   public int getLargeCompactionQueueSize() {
213     //The thread could be zero.  if so assume there is no queue.
214     if (this.regionServer.compactSplitThread == null) {
215       return 0;
216     }
217     return this.regionServer.compactSplitThread.getLargeCompactionQueueSize();
218   }
219 
220   @Override
221   public int getFlushQueueSize() {
222     //If there is no flusher there should be no queue.
223     if (this.regionServer.cacheFlusher == null) {
224       return 0;
225     }
226     return this.regionServer.cacheFlusher.getFlushQueueSize();
227   }
228 
229   @Override
230   public long getBlockCacheCount() {
231     if (this.blockCache == null) {
232       return 0;
233     }
234     return this.blockCache.getBlockCount();
235   }
236 
237   @Override
238   public long getBlockCacheSize() {
239     if (this.blockCache == null) {
240       return 0;
241     }
242     return this.blockCache.getCurrentSize();
243   }
244 
245   @Override
246   public long getBlockCacheFreeSize() {
247     if (this.blockCache == null) {
248       return 0;
249     }
250     return this.blockCache.getFreeSize();
251   }
252 
253   @Override
254   public long getBlockCacheHitCount() {
255     if (this.cacheStats == null) {
256       return 0;
257     }
258     return this.cacheStats.getHitCount();
259   }
260 
261   @Override
262   public long getBlockCacheMissCount() {
263     if (this.cacheStats == null) {
264       return 0;
265     }
266     return this.cacheStats.getMissCount();
267   }
268 
269   @Override
270   public long getBlockCacheEvictedCount() {
271     if (this.cacheStats == null) {
272       return 0;
273     }
274     return this.cacheStats.getEvictedCount();
275   }
276 
277   @Override
278   public double getBlockCacheHitPercent() {
279     if (this.cacheStats == null) {
280       return 0;
281     }
282     return (int) (this.cacheStats.getHitRatio() * 100);
283   }
284 
285   @Override
286   public int getBlockCacheHitCachingPercent() {
287     if (this.cacheStats == null) {
288       return 0;
289     }
290     return (int) (this.cacheStats.getHitCachingRatio() * 100);
291   }
292 
293   @Override public void forceRecompute() {
294     this.runnable.run();
295   }
296 
297   @Override
298   public long getNumStores() {
299     return numStores;
300   }
301   
302   @Override
303   public long getNumWALFiles() {
304     return numWALFiles;
305   }
306 
307   @Override
308   public long getWALFileSize() {
309     return walFileSize;
310   }
311   
312   @Override
313   public long getNumStoreFiles() {
314     return numStoreFiles;
315   }
316 
317   @Override
318   public long getMemstoreSize() {
319     return memstoreSize;
320   }
321 
322   @Override
323   public long getStoreFileSize() {
324     return storeFileSize;
325   }
326 
327   @Override public double getRequestsPerSecond() {
328     return requestsPerSecond;
329   }
330 
331   @Override
332   public long getReadRequestsCount() {
333     return readRequestsCount;
334   }
335 
336   @Override
337   public long getWriteRequestsCount() {
338     return writeRequestsCount;
339   }
340 
341   @Override
342   public long getCheckAndMutateChecksFailed() {
343     return checkAndMutateChecksFailed;
344   }
345 
346   @Override
347   public long getCheckAndMutateChecksPassed() {
348     return checkAndMutateChecksPassed;
349   }
350 
351   @Override
352   public long getStoreFileIndexSize() {
353     return storefileIndexSize;
354   }
355 
356   @Override
357   public long getTotalStaticIndexSize() {
358     return totalStaticIndexSize;
359   }
360 
361   @Override
362   public long getTotalStaticBloomSize() {
363     return totalStaticBloomSize;
364   }
365 
366   @Override
367   public long getNumMutationsWithoutWAL() {
368     return numMutationsWithoutWAL;
369   }
370 
371   @Override
372   public long getDataInMemoryWithoutWAL() {
373     return dataInMemoryWithoutWAL;
374   }
375 
376   @Override
377   public int getPercentFileLocal() {
378     return percentFileLocal;
379   }
380 
381   @Override
382   public int getPercentFileLocalSecondaryRegions() {
383     return percentFileLocalSecondaryRegions;
384   }
385 
386   @Override
387   public long getUpdatesBlockedTime() {
388     if (this.regionServer.cacheFlusher == null) {
389       return 0;
390     }
391     return this.regionServer.cacheFlusher.getUpdatesBlockedMsHighWater().get();
392   }
393 
394   @Override
395   public long getFlushedCellsCount() {
396     return flushedCellsCount;
397   }
398 
399   @Override
400   public long getCompactedCellsCount() {
401     return compactedCellsCount;
402   }
403 
404   @Override
405   public long getMajorCompactedCellsCount() {
406     return majorCompactedCellsCount;
407   }
408 
409   @Override
410   public long getFlushedCellsSize() {
411     return flushedCellsSize;
412   }
413 
414   @Override
415   public long getCompactedCellsSize() {
416     return compactedCellsSize;
417   }
418 
419   @Override
420   public long getMajorCompactedCellsSize() {
421     return majorCompactedCellsSize;
422   }
423 
424   /**
425    * This is the runnable that will be executed on the executor every PERIOD number of seconds
426    * It will take metrics/numbers from all of the regions and use them to compute point in
427    * time metrics.
428    */
429   public class RegionServerMetricsWrapperRunnable implements Runnable {
430 
431     private long lastRan = 0;
432     private long lastRequestCount = 0;
433 
434     @Override
435     synchronized public void run() {
436       initBlockCache();
437       cacheStats = blockCache.getStats();
438 
439       HDFSBlocksDistribution hdfsBlocksDistribution =
440           new HDFSBlocksDistribution();
441       HDFSBlocksDistribution hdfsBlocksDistributionSecondaryRegions =
442           new HDFSBlocksDistribution();
443 
444       long tempNumStores = 0;
445       long tempNumStoreFiles = 0;
446       long tempMemstoreSize = 0;
447       long tempStoreFileSize = 0;
448       long tempReadRequestsCount = 0;
449       long tempWriteRequestsCount = 0;
450       long tempCheckAndMutateChecksFailed = 0;
451       long tempCheckAndMutateChecksPassed = 0;
452       long tempStorefileIndexSize = 0;
453       long tempTotalStaticIndexSize = 0;
454       long tempTotalStaticBloomSize = 0;
455       long tempNumMutationsWithoutWAL = 0;
456       long tempDataInMemoryWithoutWAL = 0;
457       int tempPercentFileLocal = 0;
458       int tempPercentFileLocalSecondaryRegions = 0;
459       long tempFlushedCellsCount = 0;
460       long tempCompactedCellsCount = 0;
461       long tempMajorCompactedCellsCount = 0;
462       long tempFlushedCellsSize = 0;
463       long tempCompactedCellsSize = 0;
464       long tempMajorCompactedCellsSize = 0;
465       long tempBlockedRequestsCount = 0L;
466 
467       for (Region r : regionServer.getOnlineRegionsLocalContext()) {
468         tempNumMutationsWithoutWAL += r.getNumMutationsWithoutWAL();
469         tempDataInMemoryWithoutWAL += r.getDataInMemoryWithoutWAL();
470         tempReadRequestsCount += r.getReadRequestsCount();
471         tempWriteRequestsCount += r.getWriteRequestsCount();
472         tempCheckAndMutateChecksFailed += r.getCheckAndMutateChecksFailed();
473         tempCheckAndMutateChecksPassed += r.getCheckAndMutateChecksPassed();
474         tempBlockedRequestsCount += r.getBlockedRequestsCount();
475         List<Store> storeList = r.getStores();
476         tempNumStores += storeList.size();
477         for (Store store : storeList) {
478           tempNumStoreFiles += store.getStorefilesCount();
479           tempMemstoreSize += store.getMemStoreSize();
480           tempStoreFileSize += store.getStorefilesSize();
481           tempStorefileIndexSize += store.getStorefilesIndexSize();
482           tempTotalStaticBloomSize += store.getTotalStaticBloomSize();
483           tempTotalStaticIndexSize += store.getTotalStaticIndexSize();
484           tempFlushedCellsCount += store.getFlushedCellsCount();
485           tempCompactedCellsCount += store.getCompactedCellsCount();
486           tempMajorCompactedCellsCount += store.getMajorCompactedCellsCount();
487           tempFlushedCellsSize += store.getFlushedCellsSize();
488           tempCompactedCellsSize += store.getCompactedCellsSize();
489           tempMajorCompactedCellsSize += store.getMajorCompactedCellsSize();
490         }
491 
492         HDFSBlocksDistribution distro = r.getHDFSBlocksDistribution();
493         hdfsBlocksDistribution.add(distro);
494         if (r.getRegionInfo().getReplicaId() != HRegionInfo.DEFAULT_REPLICA_ID) {
495           hdfsBlocksDistributionSecondaryRegions.add(distro);
496         }
497       }
498 
499       float localityIndex = hdfsBlocksDistribution.getBlockLocalityIndex(
500           regionServer.getServerName().getHostname());
501       tempPercentFileLocal = (int) (localityIndex * 100);
502 
503       float localityIndexSecondaryRegions = hdfsBlocksDistributionSecondaryRegions
504           .getBlockLocalityIndex(regionServer.getServerName().getHostname());
505       tempPercentFileLocalSecondaryRegions = (int) (localityIndexSecondaryRegions * 100);
506 
507       //Compute the number of requests per second
508       long currentTime = EnvironmentEdgeManager.currentTime();
509 
510       // assume that it took PERIOD seconds to start the executor.
511       // this is a guess but it's a pretty good one.
512       if (lastRan == 0) {
513         lastRan = currentTime - period;
514       }
515 
516 
517       //If we've time traveled keep the last requests per second.
518       if ((currentTime - lastRan) > 0) {
519         long currentRequestCount = getTotalRequestCount();
520         requestsPerSecond = (currentRequestCount - lastRequestCount) /
521             ((currentTime - lastRan) / 1000.0);
522         lastRequestCount = currentRequestCount;
523       }
524       lastRan = currentTime;
525 
526       numWALFiles = DefaultWALProvider.getNumLogFiles(regionServer.walFactory) +
527           BoundedRegionGroupingProvider.getNumLogFiles(regionServer.walFactory);
528       walFileSize = DefaultWALProvider.getLogFileSize(regionServer.walFactory) +
529           BoundedRegionGroupingProvider.getLogFileSize(regionServer.walFactory);
530       //Copy over computed values so that no thread sees half computed values.
531       numStores = tempNumStores;
532       numStoreFiles = tempNumStoreFiles;
533       memstoreSize = tempMemstoreSize;
534       storeFileSize = tempStoreFileSize;
535       readRequestsCount = tempReadRequestsCount;
536       writeRequestsCount = tempWriteRequestsCount;
537       checkAndMutateChecksFailed = tempCheckAndMutateChecksFailed;
538       checkAndMutateChecksPassed = tempCheckAndMutateChecksPassed;
539       storefileIndexSize = tempStorefileIndexSize;
540       totalStaticIndexSize = tempTotalStaticIndexSize;
541       totalStaticBloomSize = tempTotalStaticBloomSize;
542       numMutationsWithoutWAL = tempNumMutationsWithoutWAL;
543       dataInMemoryWithoutWAL = tempDataInMemoryWithoutWAL;
544       percentFileLocal = tempPercentFileLocal;
545       percentFileLocalSecondaryRegions = tempPercentFileLocalSecondaryRegions;
546       flushedCellsCount = tempFlushedCellsCount;
547       compactedCellsCount = tempCompactedCellsCount;
548       majorCompactedCellsCount = tempMajorCompactedCellsCount;
549       flushedCellsSize = tempFlushedCellsSize;
550       compactedCellsSize = tempCompactedCellsSize;
551       majorCompactedCellsSize = tempMajorCompactedCellsSize;
552       blockedRequestsCount = tempBlockedRequestsCount;
553     }
554   }
555 
556   @Override
557   public long getHedgedReadOps() {
558     return this.dfsHedgedReadMetrics == null? 0: this.dfsHedgedReadMetrics.getHedgedReadOps();
559   }
560 
561   @Override
562   public long getHedgedReadWins() {
563     return this.dfsHedgedReadMetrics == null? 0: this.dfsHedgedReadMetrics.getHedgedReadWins();
564   }
565 
566   @Override
567   public long getBlockedRequestsCount() {
568     return blockedRequestsCount;
569   }
570 }