001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase;
019
020import edu.umd.cs.findbugs.annotations.Nullable;
021import java.util.List;
022import java.util.Map;
023import org.apache.hadoop.hbase.client.RegionInfo;
024import org.apache.hadoop.hbase.client.RegionStatesCount;
025import org.apache.hadoop.hbase.master.RegionState;
026import org.apache.yetus.audience.InterfaceAudience;
027
028/**
029 * Metrics information on the HBase cluster.
030 * <p>
031 * <tt>ClusterMetrics</tt> provides clients with information such as:
032 * <ul>
033 * <li>The count and names of region servers in the cluster.</li>
034 * <li>The count and names of dead region servers in the cluster.</li>
035 * <li>The name of the active master for the cluster.</li>
036 * <li>The name(s) of the backup master(s) for the cluster, if they exist.</li>
037 * <li>The average cluster load.</li>
038 * <li>The number of regions deployed on the cluster.</li>
039 * <li>The number of requests since last report.</li>
040 * <li>Detailed region server loading and resource usage information, per server and per
041 * region.</li>
042 * <li>Regions in transition at master</li>
043 * <li>The unique cluster ID</li>
044 * </ul>
045 * <tt>{@link Option}</tt> provides a way to get desired ClusterStatus information. The following
046 * codes will get all the cluster information.
047 *
048 * <pre>
049 * {
050 *   &#64;code
051 *   // Original version still works
052 *   Admin admin = connection.getAdmin();
053 *   ClusterMetrics metrics = admin.getClusterStatus();
054 *   // or below, a new version which has the same effects
055 *   ClusterMetrics metrics = admin.getClusterStatus(EnumSet.allOf(Option.class));
056 * }
057 * </pre>
058 *
059 * If information about live servers is the only wanted. then codes in the following way:
060 *
061 * <pre>
062 * {
063 *   &#64;code
064 *   Admin admin = connection.getAdmin();
065 *   ClusterMetrics metrics = admin.getClusterStatus(EnumSet.of(Option.LIVE_SERVERS));
066 * }
067 * </pre>
068 */
069@InterfaceAudience.Public
070public interface ClusterMetrics {
071
072  /** Returns the HBase version string as reported by the HMaster */
073  @Nullable
074  String getHBaseVersion();
075
076  /** Returns the names of region servers on the dead list */
077  List<ServerName> getDeadServerNames();
078
079  /** Returns the names of region servers on the unknown list */
080  List<ServerName> getUnknownServerNames();
081
082  /** Returns the names of region servers on the live list */
083  Map<ServerName, ServerMetrics> getLiveServerMetrics();
084
085  /** Returns the number of regions deployed on the cluster */
086  default int getRegionCount() {
087    return getLiveServerMetrics().entrySet().stream()
088      .mapToInt(v -> v.getValue().getRegionMetrics().size()).sum();
089  }
090
091  /** Returns the number of requests since last report */
092  default long getRequestCount() {
093    return getLiveServerMetrics().entrySet().stream()
094      .flatMap(v -> v.getValue().getRegionMetrics().values().stream())
095      .mapToLong(RegionMetrics::getRequestCount).sum();
096  }
097
098  /**
099   * Returns detailed information about the current master {@link ServerName}.
100   * @return current master information if it exists
101   */
102  @Nullable
103  ServerName getMasterName();
104
105  /** Returns the names of backup masters */
106  List<ServerName> getBackupMasterNames();
107
108  @InterfaceAudience.Private
109  List<RegionState> getRegionStatesInTransition();
110
111  @Nullable
112  String getClusterId();
113
114  List<String> getMasterCoprocessorNames();
115
116  default long getLastMajorCompactionTimestamp(TableName table) {
117    return getLiveServerMetrics().values().stream()
118      .flatMap(s -> s.getRegionMetrics().values().stream())
119      .filter(r -> RegionInfo.getTable(r.getRegionName()).equals(table))
120      .mapToLong(RegionMetrics::getLastMajorCompactionTimestamp).min().orElse(0);
121  }
122
123  default long getLastMajorCompactionTimestamp(byte[] regionName) {
124    return getLiveServerMetrics().values().stream()
125      .filter(s -> s.getRegionMetrics().containsKey(regionName)).findAny()
126      .map(s -> s.getRegionMetrics().get(regionName).getLastMajorCompactionTimestamp()).orElse(0L);
127  }
128
129  @Nullable
130  Boolean getBalancerOn();
131
132  int getMasterInfoPort();
133
134  List<ServerName> getServersName();
135
136  /** Returns the average cluster load */
137  default double getAverageLoad() {
138    int serverSize = getLiveServerMetrics().size();
139    if (serverSize == 0) {
140      return 0;
141    }
142    return (double) getRegionCount() / (double) serverSize;
143  }
144
145  /**
146   * Provide region states count for given table. e.g howmany regions of give table are
147   * opened/closed/rit etc
148   * @return map of table to region states count
149   */
150  Map<TableName, RegionStatesCount> getTableRegionStatesCount();
151
152  /**
153   * Provide the list of master tasks
154   */
155  @Nullable
156  List<ServerTask> getMasterTasks();
157
158  /**
159   * Kinds of ClusterMetrics
160   */
161  enum Option {
162    /**
163     * metrics about hbase version
164     */
165    HBASE_VERSION,
166    /**
167     * metrics about cluster id
168     */
169    CLUSTER_ID,
170    /**
171     * metrics about balancer is on or not
172     */
173    BALANCER_ON,
174    /**
175     * metrics about live region servers
176     */
177    LIVE_SERVERS,
178    /**
179     * metrics about dead region servers
180     */
181    DEAD_SERVERS,
182    /**
183     * metrics about unknown region servers
184     */
185    UNKNOWN_SERVERS,
186    /**
187     * metrics about master name
188     */
189    MASTER,
190    /**
191     * metrics about backup masters name
192     */
193    BACKUP_MASTERS,
194    /**
195     * metrics about master coprocessors
196     */
197    MASTER_COPROCESSORS,
198    /**
199     * metrics about regions in transition
200     */
201    REGIONS_IN_TRANSITION,
202    /**
203     * metrics info port
204     */
205    MASTER_INFO_PORT,
206    /**
207     * metrics about live region servers name
208     */
209    SERVERS_NAME,
210    /**
211     * metrics about table to no of regions status count
212     */
213    TABLE_TO_REGIONS_COUNT,
214    /**
215     * metrics about monitored tasks
216     */
217    TASKS,
218  }
219}