View Javadoc

1   /**
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  package org.apache.hadoop.hbase.regionserver;
19  
20  import java.io.IOException;
21  import java.util.Collection;
22  import java.util.concurrent.ScheduledExecutorService;
23  import java.util.concurrent.TimeUnit;
24  
25  import org.apache.commons.lang.StringUtils;
26  import org.apache.commons.logging.Log;
27  import org.apache.commons.logging.LogFactory;
28  import org.apache.hadoop.hbase.classification.InterfaceAudience;
29  import org.apache.hadoop.hbase.CompatibilitySingletonFactory;
30  import org.apache.hadoop.hbase.HConstants;
31  import org.apache.hadoop.hbase.HDFSBlocksDistribution;
32  import org.apache.hadoop.hbase.ServerName;
33  import org.apache.hadoop.hbase.io.hfile.BlockCache;
34  import org.apache.hadoop.hbase.io.hfile.CacheConfig;
35  import org.apache.hadoop.hbase.io.hfile.CacheStats;
36  import org.apache.hadoop.hbase.wal.DefaultWALProvider;
37  import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
38  import org.apache.hadoop.hbase.util.FSUtils;
39  import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
40  import org.apache.hadoop.hdfs.DFSHedgedReadMetrics;
41  import org.apache.hadoop.metrics2.MetricsExecutor;
42  
43  /**
44   * Impl for exposing HRegionServer Information through Hadoop's metrics 2 system.
45   */
46  @InterfaceAudience.Private
47  class MetricsRegionServerWrapperImpl
48      implements MetricsRegionServerWrapper {
49  
50    public static final Log LOG = LogFactory.getLog(MetricsRegionServerWrapperImpl.class);
51  
52    private final HRegionServer regionServer;
53  
54    private BlockCache blockCache;
55  
56    private volatile long numStores = 0;
57    private volatile long numWALFiles = 0;
58    private volatile long walFileSize = 0;
59    private volatile long numStoreFiles = 0;
60    private volatile long memstoreSize = 0;
61    private volatile long storeFileSize = 0;
62    private volatile double requestsPerSecond = 0.0;
63    private volatile long readRequestsCount = 0;
64    private volatile long writeRequestsCount = 0;
65    private volatile long checkAndMutateChecksFailed = 0;
66    private volatile long checkAndMutateChecksPassed = 0;
67    private volatile long storefileIndexSize = 0;
68    private volatile long totalStaticIndexSize = 0;
69    private volatile long totalStaticBloomSize = 0;
70    private volatile long numMutationsWithoutWAL = 0;
71    private volatile long dataInMemoryWithoutWAL = 0;
72    private volatile int percentFileLocal = 0;
73    private volatile long flushedCellsCount = 0;
74    private volatile long compactedCellsCount = 0;
75    private volatile long majorCompactedCellsCount = 0;
76    private volatile long flushedCellsSize = 0;
77    private volatile long compactedCellsSize = 0;
78    private volatile long majorCompactedCellsSize = 0;
79    private volatile long blockedRequestsCount = 0L;
80  
81    private CacheStats cacheStats;
82    private ScheduledExecutorService executor;
83    private Runnable runnable;
84    private long period;
85  
86    /**
87     * Can be null if not on hdfs.
88     */
89    private DFSHedgedReadMetrics dfsHedgedReadMetrics;
90  
91    public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) {
92      this.regionServer = regionServer;
93      initBlockCache();
94  
95      this.period =
96          regionServer.conf.getLong(HConstants.REGIONSERVER_METRICS_PERIOD,
97            HConstants.DEFAULT_REGIONSERVER_METRICS_PERIOD);
98  
99      this.executor = CompatibilitySingletonFactory.getInstance(MetricsExecutor.class).getExecutor();
100     this.runnable = new RegionServerMetricsWrapperRunnable();
101     this.executor.scheduleWithFixedDelay(this.runnable, this.period, this.period,
102       TimeUnit.MILLISECONDS);
103 
104     try {
105       this.dfsHedgedReadMetrics = FSUtils.getDFSHedgedReadMetrics(regionServer.getConfiguration());
106     } catch (IOException e) {
107       LOG.warn("Failed to get hedged metrics", e);
108     }
109     if (LOG.isInfoEnabled()) {
110       LOG.info("Computing regionserver metrics every " + this.period + " milliseconds");
111     }
112   }
113 
114   /**
115    * It's possible that due to threading the block cache could not be initialized
116    * yet (testing multiple region servers in one jvm).  So we need to try and initialize
117    * the blockCache and cacheStats reference multiple times until we succeed.
118    */
119   private synchronized  void initBlockCache() {
120     CacheConfig cacheConfig = this.regionServer.cacheConfig;
121     if (cacheConfig != null && this.blockCache == null) {
122       this.blockCache = cacheConfig.getBlockCache();
123     }
124 
125     if (this.blockCache != null && this.cacheStats == null) {
126       this.cacheStats = blockCache.getStats();
127     }
128   }
129 
130   @Override
131   public String getClusterId() {
132     return regionServer.getClusterId();
133   }
134 
135   @Override
136   public long getStartCode() {
137     return regionServer.getStartcode();
138   }
139 
140   @Override
141   public String getZookeeperQuorum() {
142     ZooKeeperWatcher zk = regionServer.getZooKeeper();
143     if (zk == null) {
144       return "";
145     }
146     return zk.getQuorum();
147   }
148 
149   @Override
150   public String getCoprocessors() {
151     String[] coprocessors = regionServer.getRegionServerCoprocessors();
152     if (coprocessors == null || coprocessors.length == 0) {
153       return "";
154     }
155     return StringUtils.join(coprocessors, ", ");
156   }
157 
158   @Override
159   public String getServerName() {
160     ServerName serverName = regionServer.getServerName();
161     if (serverName == null) {
162       return "";
163     }
164     return serverName.getServerName();
165   }
166 
167   @Override
168   public long getNumOnlineRegions() {
169     Collection<HRegion> onlineRegionsLocalContext = regionServer.getOnlineRegionsLocalContext();
170     if (onlineRegionsLocalContext == null) {
171       return 0;
172     }
173     return onlineRegionsLocalContext.size();
174   }
175 
176   @Override
177   public long getTotalRequestCount() {
178     return regionServer.rpcServices.requestCount.get();
179   }
180 
181   @Override
182   public int getSplitQueueSize() {
183     if (this.regionServer.compactSplitThread == null) {
184       return 0;
185     }
186     return this.regionServer.compactSplitThread.getSplitQueueSize();
187   }
188 
189   @Override
190   public int getCompactionQueueSize() {
191     //The thread could be zero.  if so assume there is no queue.
192     if (this.regionServer.compactSplitThread == null) {
193       return 0;
194     }
195     return this.regionServer.compactSplitThread.getCompactionQueueSize();
196   }
197 
198   @Override
199   public int getSmallCompactionQueueSize() {
200     //The thread could be zero.  if so assume there is no queue.
201     if (this.regionServer.compactSplitThread == null) {
202       return 0;
203     }
204     return this.regionServer.compactSplitThread.getSmallCompactionQueueSize();
205   }
206 
207   @Override
208   public int getLargeCompactionQueueSize() {
209     //The thread could be zero.  if so assume there is no queue.
210     if (this.regionServer.compactSplitThread == null) {
211       return 0;
212     }
213     return this.regionServer.compactSplitThread.getLargeCompactionQueueSize();
214   }
215 
216   @Override
217   public int getFlushQueueSize() {
218     //If there is no flusher there should be no queue.
219     if (this.regionServer.cacheFlusher == null) {
220       return 0;
221     }
222     return this.regionServer.cacheFlusher.getFlushQueueSize();
223   }
224 
225   @Override
226   public long getBlockCacheCount() {
227     if (this.blockCache == null) {
228       return 0;
229     }
230     return this.blockCache.getBlockCount();
231   }
232 
233   @Override
234   public long getBlockCacheSize() {
235     if (this.blockCache == null) {
236       return 0;
237     }
238     return this.blockCache.getCurrentSize();
239   }
240 
241   @Override
242   public long getBlockCacheFreeSize() {
243     if (this.blockCache == null) {
244       return 0;
245     }
246     return this.blockCache.getFreeSize();
247   }
248 
249   @Override
250   public long getBlockCacheHitCount() {
251     if (this.cacheStats == null) {
252       return 0;
253     }
254     return this.cacheStats.getHitCount();
255   }
256 
257   @Override
258   public long getBlockCacheMissCount() {
259     if (this.cacheStats == null) {
260       return 0;
261     }
262     return this.cacheStats.getMissCount();
263   }
264 
265   @Override
266   public long getBlockCacheEvictedCount() {
267     if (this.cacheStats == null) {
268       return 0;
269     }
270     return this.cacheStats.getEvictedCount();
271   }
272 
273   @Override
274   public double getBlockCacheHitPercent() {
275     if (this.cacheStats == null) {
276       return 0;
277     }
278     return (int) (this.cacheStats.getHitRatio() * 100);
279   }
280 
281   @Override
282   public int getBlockCacheHitCachingPercent() {
283     if (this.cacheStats == null) {
284       return 0;
285     }
286     return (int) (this.cacheStats.getHitCachingRatio() * 100);
287   }
288 
289   @Override public void forceRecompute() {
290     this.runnable.run();
291   }
292 
293   @Override
294   public long getNumStores() {
295     return numStores;
296   }
297   
298   @Override
299   public long getNumWALFiles() {
300     return numWALFiles;
301   }
302 
303   @Override
304   public long getWALFileSize() {
305     return walFileSize;
306   }
307   
308   @Override
309   public long getNumStoreFiles() {
310     return numStoreFiles;
311   }
312 
313   @Override
314   public long getMemstoreSize() {
315     return memstoreSize;
316   }
317 
318   @Override
319   public long getStoreFileSize() {
320     return storeFileSize;
321   }
322 
323   @Override public double getRequestsPerSecond() {
324     return requestsPerSecond;
325   }
326 
327   @Override
328   public long getReadRequestsCount() {
329     return readRequestsCount;
330   }
331 
332   @Override
333   public long getWriteRequestsCount() {
334     return writeRequestsCount;
335   }
336 
337   @Override
338   public long getCheckAndMutateChecksFailed() {
339     return checkAndMutateChecksFailed;
340   }
341 
342   @Override
343   public long getCheckAndMutateChecksPassed() {
344     return checkAndMutateChecksPassed;
345   }
346 
347   @Override
348   public long getStoreFileIndexSize() {
349     return storefileIndexSize;
350   }
351 
352   @Override
353   public long getTotalStaticIndexSize() {
354     return totalStaticIndexSize;
355   }
356 
357   @Override
358   public long getTotalStaticBloomSize() {
359     return totalStaticBloomSize;
360   }
361 
362   @Override
363   public long getNumMutationsWithoutWAL() {
364     return numMutationsWithoutWAL;
365   }
366 
367   @Override
368   public long getDataInMemoryWithoutWAL() {
369     return dataInMemoryWithoutWAL;
370   }
371 
372   @Override
373   public int getPercentFileLocal() {
374     return percentFileLocal;
375   }
376 
377   @Override
378   public long getUpdatesBlockedTime() {
379     if (this.regionServer.cacheFlusher == null) {
380       return 0;
381     }
382     return this.regionServer.cacheFlusher.getUpdatesBlockedMsHighWater().get();
383   }
384 
385   @Override
386   public long getFlushedCellsCount() {
387     return flushedCellsCount;
388   }
389 
390   @Override
391   public long getCompactedCellsCount() {
392     return compactedCellsCount;
393   }
394 
395   @Override
396   public long getMajorCompactedCellsCount() {
397     return majorCompactedCellsCount;
398   }
399 
400   @Override
401   public long getFlushedCellsSize() {
402     return flushedCellsSize;
403   }
404 
405   @Override
406   public long getCompactedCellsSize() {
407     return compactedCellsSize;
408   }
409 
410   @Override
411   public long getMajorCompactedCellsSize() {
412     return majorCompactedCellsSize;
413   }
414 
415   /**
416    * This is the runnable that will be executed on the executor every PERIOD number of seconds
417    * It will take metrics/numbers from all of the regions and use them to compute point in
418    * time metrics.
419    */
420   public class RegionServerMetricsWrapperRunnable implements Runnable {
421 
422     private long lastRan = 0;
423     private long lastRequestCount = 0;
424 
425     @Override
426     synchronized public void run() {
427       initBlockCache();
428       cacheStats = blockCache.getStats();
429 
430       HDFSBlocksDistribution hdfsBlocksDistribution =
431           new HDFSBlocksDistribution();
432 
433       long tempNumStores = 0;
434       long tempNumStoreFiles = 0;
435       long tempMemstoreSize = 0;
436       long tempStoreFileSize = 0;
437       long tempReadRequestsCount = 0;
438       long tempWriteRequestsCount = 0;
439       long tempCheckAndMutateChecksFailed = 0;
440       long tempCheckAndMutateChecksPassed = 0;
441       long tempStorefileIndexSize = 0;
442       long tempTotalStaticIndexSize = 0;
443       long tempTotalStaticBloomSize = 0;
444       long tempNumMutationsWithoutWAL = 0;
445       long tempDataInMemoryWithoutWAL = 0;
446       int tempPercentFileLocal = 0;
447       long tempFlushedCellsCount = 0;
448       long tempCompactedCellsCount = 0;
449       long tempMajorCompactedCellsCount = 0;
450       long tempFlushedCellsSize = 0;
451       long tempCompactedCellsSize = 0;
452       long tempMajorCompactedCellsSize = 0;
453       long tempBlockedRequestsCount = 0L;
454 
455       for (HRegion r : regionServer.getOnlineRegionsLocalContext()) {
456         tempNumMutationsWithoutWAL += r.numMutationsWithoutWAL.get();
457         tempDataInMemoryWithoutWAL += r.dataInMemoryWithoutWAL.get();
458         tempReadRequestsCount += r.readRequestsCount.get();
459         tempWriteRequestsCount += r.writeRequestsCount.get();
460         tempCheckAndMutateChecksFailed += r.checkAndMutateChecksFailed.get();
461         tempCheckAndMutateChecksPassed += r.checkAndMutateChecksPassed.get();
462         tempBlockedRequestsCount += r.getBlockedRequestsCount();
463         tempNumStores += r.stores.size();
464         for (Store store : r.stores.values()) {
465           tempNumStoreFiles += store.getStorefilesCount();
466           tempMemstoreSize += store.getMemStoreSize();
467           tempStoreFileSize += store.getStorefilesSize();
468           tempStorefileIndexSize += store.getStorefilesIndexSize();
469           tempTotalStaticBloomSize += store.getTotalStaticBloomSize();
470           tempTotalStaticIndexSize += store.getTotalStaticIndexSize();
471           tempFlushedCellsCount += store.getFlushedCellsCount();
472           tempCompactedCellsCount += store.getCompactedCellsCount();
473           tempMajorCompactedCellsCount += store.getMajorCompactedCellsCount();
474           tempFlushedCellsSize += store.getFlushedCellsSize();
475           tempCompactedCellsSize += store.getCompactedCellsSize();
476           tempMajorCompactedCellsSize += store.getMajorCompactedCellsSize();
477         }
478 
479         hdfsBlocksDistribution.add(r.getHDFSBlocksDistribution());
480       }
481 
482       float localityIndex = hdfsBlocksDistribution.getBlockLocalityIndex(
483           regionServer.getServerName().getHostname());
484       tempPercentFileLocal = (int) (localityIndex * 100);
485 
486 
487       //Compute the number of requests per second
488       long currentTime = EnvironmentEdgeManager.currentTime();
489 
490       // assume that it took PERIOD seconds to start the executor.
491       // this is a guess but it's a pretty good one.
492       if (lastRan == 0) {
493         lastRan = currentTime - period;
494       }
495 
496 
497       //If we've time traveled keep the last requests per second.
498       if ((currentTime - lastRan) > 0) {
499         long currentRequestCount = getTotalRequestCount();
500         requestsPerSecond = (currentRequestCount - lastRequestCount) / ((currentTime - lastRan) / 1000.0);
501         lastRequestCount = currentRequestCount;
502       }
503       lastRan = currentTime;
504 
505       numWALFiles = DefaultWALProvider.getNumLogFiles(regionServer.walFactory);
506       walFileSize = DefaultWALProvider.getLogFileSize(regionServer.walFactory);
507 
508       //Copy over computed values so that no thread sees half computed values.
509       numStores = tempNumStores;
510       numStoreFiles = tempNumStoreFiles;
511       memstoreSize = tempMemstoreSize;
512       storeFileSize = tempStoreFileSize;
513       readRequestsCount = tempReadRequestsCount;
514       writeRequestsCount = tempWriteRequestsCount;
515       checkAndMutateChecksFailed = tempCheckAndMutateChecksFailed;
516       checkAndMutateChecksPassed = tempCheckAndMutateChecksPassed;
517       storefileIndexSize = tempStorefileIndexSize;
518       totalStaticIndexSize = tempTotalStaticIndexSize;
519       totalStaticBloomSize = tempTotalStaticBloomSize;
520       numMutationsWithoutWAL = tempNumMutationsWithoutWAL;
521       dataInMemoryWithoutWAL = tempDataInMemoryWithoutWAL;
522       percentFileLocal = tempPercentFileLocal;
523       flushedCellsCount = tempFlushedCellsCount;
524       compactedCellsCount = tempCompactedCellsCount;
525       majorCompactedCellsCount = tempMajorCompactedCellsCount;
526       flushedCellsSize = tempFlushedCellsSize;
527       compactedCellsSize = tempCompactedCellsSize;
528       majorCompactedCellsSize = tempMajorCompactedCellsSize;
529       blockedRequestsCount = tempBlockedRequestsCount;
530     }
531   }
532 
533   @Override
534   public long getHedgedReadOps() {
535     return this.dfsHedgedReadMetrics == null? 0: this.dfsHedgedReadMetrics.getHedgedReadOps();
536   }
537 
538   @Override
539   public long getHedgedReadWins() {
540     return this.dfsHedgedReadMetrics == null? 0: this.dfsHedgedReadMetrics.getHedgedReadWins();
541   }
542 
543   @Override
544   public long getBlockedRequestsCount() {
545     return blockedRequestsCount;
546   }
547 }