001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.jupiter.api.Assertions.assertEquals;
021import static org.junit.jupiter.api.Assertions.assertFalse;
022import static org.junit.jupiter.api.Assertions.assertTrue;
023
024import java.io.IOException;
025import java.io.InterruptedIOException;
026import java.io.UncheckedIOException;
027import java.util.ArrayList;
028import java.util.EnumSet;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032import org.apache.hadoop.conf.Configuration;
033import org.apache.hadoop.fs.FileSystem;
034import org.apache.hadoop.fs.Path;
035import org.apache.hadoop.hbase.ClusterMetrics;
036import org.apache.hadoop.hbase.CompatibilityFactory;
037import org.apache.hadoop.hbase.HBaseTestingUtil;
038import org.apache.hadoop.hbase.NamespaceDescriptor;
039import org.apache.hadoop.hbase.ServerMetricsBuilder;
040import org.apache.hadoop.hbase.ServerName;
041import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
042import org.apache.hadoop.hbase.StartTestingClusterOption;
043import org.apache.hadoop.hbase.TableName;
044import org.apache.hadoop.hbase.YouAreDeadException;
045import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
046import org.apache.hadoop.hbase.client.RegionStatesCount;
047import org.apache.hadoop.hbase.client.TableDescriptor;
048import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
049import org.apache.hadoop.hbase.test.MetricsAssertHelper;
050import org.apache.hadoop.hbase.testclassification.MasterTests;
051import org.apache.hadoop.hbase.testclassification.MediumTests;
052import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
053import org.apache.hadoop.hbase.util.FSTableDescriptors;
054import org.apache.zookeeper.KeeperException;
055import org.junit.jupiter.api.AfterAll;
056import org.junit.jupiter.api.BeforeAll;
057import org.junit.jupiter.api.Tag;
058import org.junit.jupiter.api.Test;
059import org.slf4j.Logger;
060import org.slf4j.LoggerFactory;
061
062import org.apache.hbase.thirdparty.com.google.protobuf.RpcController;
063import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException;
064
065import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
066import org.apache.hadoop.hbase.shaded.protobuf.generated.ClusterStatusProtos;
067import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos;
068import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupRequest;
069import org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProtos.RegionServerStartupResponse;
070
071@Tag(MasterTests.TAG)
072@Tag(MediumTests.TAG)
073public class TestMasterMetrics {
074
075  private static final Logger LOG = LoggerFactory.getLogger(TestMasterMetrics.class);
076  private static final MetricsAssertHelper metricsHelper =
077    CompatibilityFactory.getInstance(MetricsAssertHelper.class);
078  private static final String COLUMN_FAMILY = "cf";
079
080  private static SingleProcessHBaseCluster cluster;
081  private static HMaster master;
082  private static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
083
084  public static class MyMaster extends HMaster {
085
086    public MyMaster(Configuration conf) throws IOException, KeeperException, InterruptedException {
087      super(conf);
088    }
089
090    @Override
091    protected MasterRpcServices createRpcServices() throws IOException {
092      return new MasterRpcServices(this) {
093
094        @Override
095        public RegionServerStartupResponse regionServerStartup(RpcController controller,
096          RegionServerStartupRequest request) throws ServiceException {
097          RegionServerStartupResponse resp = super.regionServerStartup(controller, request);
098          ServerManager serverManager = getServerManager();
099          // to let the region server actual online otherwise we can not assign meta region
100          new HashMap<>(serverManager.getOnlineServers()).forEach((sn, sm) -> {
101            if (sm.getLastReportTimestamp() <= 0) {
102              try {
103                serverManager.regionServerReport(sn,
104                  ServerMetricsBuilder.newBuilder(sn).setVersionNumber(sm.getVersionNumber())
105                    .setVersion(sm.getVersion())
106                    .setLastReportTimestamp(EnvironmentEdgeManager.currentTime()).build());
107              } catch (YouAreDeadException e) {
108                throw new UncheckedIOException(e);
109              }
110            }
111          });
112          return resp;
113        }
114      };
115    }
116  }
117
118  public static class MyRegionServer
119    extends SingleProcessHBaseCluster.MiniHBaseClusterRegionServer {
120
121    public MyRegionServer(Configuration conf) throws IOException, InterruptedException {
122      super(conf);
123    }
124
125    @Override
126    protected void tryRegionServerReport(long reportStartTime, long reportEndTime) {
127      // do nothing
128    }
129  }
130
131  @BeforeAll
132  public static void startCluster() throws Exception {
133    LOG.info("Starting cluster");
134    // Set master class and use default values for other options.
135    StartTestingClusterOption option = StartTestingClusterOption.builder()
136      .masterClass(MyMaster.class).rsClass(MyRegionServer.class).build();
137    TEST_UTIL.startMiniCluster(option);
138    cluster = TEST_UTIL.getHBaseCluster();
139    LOG.info("Waiting for active/ready master");
140    cluster.waitForActiveAndReadyMaster();
141    master = cluster.getMaster();
142  }
143
144  @AfterAll
145  public static void after() throws Exception {
146    master.stopMaster();
147    TEST_UTIL.shutdownMiniCluster();
148  }
149
150  @Test
151  public void testClusterRequests() throws Exception {
152    // sending fake request to master to see how metric value has changed
153    RegionServerStatusProtos.RegionServerReportRequest.Builder request =
154      RegionServerStatusProtos.RegionServerReportRequest.newBuilder();
155    ServerName serverName = cluster.getMaster(0).getServerName();
156    request.setServer(ProtobufUtil.toServerName(serverName));
157    long expectedRequestNumber = 10000;
158
159    MetricsMasterSource masterSource = master.getMasterMetrics().getMetricsSource();
160    ClusterStatusProtos.ServerLoad sl = ClusterStatusProtos.ServerLoad.newBuilder()
161      .setTotalNumberOfRequests(expectedRequestNumber).setReadRequestsCount(expectedRequestNumber)
162      .setWriteRequestsCount(expectedRequestNumber).build();
163    request.setLoad(sl);
164
165    master.getMasterRpcServices().regionServerReport(null, request.build());
166    metricsHelper.assertCounter("cluster_requests", expectedRequestNumber, masterSource);
167    metricsHelper.assertCounter("cluster_read_requests", expectedRequestNumber, masterSource);
168    metricsHelper.assertCounter("cluster_write_requests", expectedRequestNumber, masterSource);
169
170    expectedRequestNumber = 15000;
171
172    sl = ClusterStatusProtos.ServerLoad.newBuilder().setTotalNumberOfRequests(expectedRequestNumber)
173      .setReadRequestsCount(expectedRequestNumber).setWriteRequestsCount(expectedRequestNumber)
174      .build();
175    request.setLoad(sl);
176
177    master.getMasterRpcServices().regionServerReport(null, request.build());
178    metricsHelper.assertCounter("cluster_requests", expectedRequestNumber, masterSource);
179    metricsHelper.assertCounter("cluster_read_requests", expectedRequestNumber, masterSource);
180    metricsHelper.assertCounter("cluster_write_requests", expectedRequestNumber, masterSource);
181  }
182
183  @Test
184  public void testDefaultMasterMetrics() throws Exception {
185    MetricsMasterSource masterSource = master.getMasterMetrics().getMetricsSource();
186    metricsHelper.assertGauge("numRegionServers", 1, masterSource);
187    metricsHelper.assertGauge("averageLoad", 1, masterSource);
188    metricsHelper.assertGauge("numDeadRegionServers", 0, masterSource);
189    metricsHelper.assertGauge("numDrainingRegionServers", 0, masterSource);
190
191    metricsHelper.assertGauge("masterStartTime", master.getMasterStartTime(), masterSource);
192    metricsHelper.assertGauge("masterActiveTime", master.getMasterActiveTime(), masterSource);
193
194    metricsHelper.assertTag("isActiveMaster", "true", masterSource);
195    metricsHelper.assertTag("serverName", master.getServerName().toString(), masterSource);
196    metricsHelper.assertTag("clusterId", master.getClusterId(), masterSource);
197    metricsHelper.assertTag("zookeeperQuorum", master.getZooKeeper().getQuorum(), masterSource);
198
199    metricsHelper.assertCounter(MetricsMasterSource.SERVER_CRASH_METRIC_PREFIX + "SubmittedCount",
200      0, masterSource);
201    metricsHelper.assertGauge("oldWALsDirSize", master.getMasterWalManager().getOldWALsDirSize(),
202      masterSource);
203  }
204
205  @Test
206  public void testDefaultMasterProcMetrics() {
207    MetricsMasterProcSource masterSource = master.getMasterMetrics().getMetricsProcSource();
208    metricsHelper.assertGauge("numMasterWALs", master.getNumWALFiles(), masterSource);
209  }
210
211  // Verifies a foreign meta table does not show up in the table regions state
212  @Test
213  public void testClusterMetricsSkippingForeignMetaTable() throws Exception {
214    TableName replicaMetaTable = TableName.valueOf("hbase", "meta_replica");
215    TableDescriptor replicaMetaDescriptor = TableDescriptorBuilder.newBuilder(replicaMetaTable)
216      .setColumnFamily(ColumnFamilyDescriptorBuilder.of("info")).build();
217    master.getTableDescriptors().update(replicaMetaDescriptor, true);
218    try {
219      Map<TableName, RegionStatesCount> tableRegionStatesCount = getTableRegionStatesCount();
220
221      assertFalse(tableRegionStatesCount.containsKey(replicaMetaTable),
222        "Foreign meta table should not be present");
223      assertTrue(tableRegionStatesCount.containsKey(TableName.META_TABLE_NAME),
224        "Local meta should be present");
225
226    } finally {
227      master.getTableDescriptors().remove(replicaMetaTable);
228    }
229  }
230
231  // This test adds foreign file descriptors to the cluster's table descriptor cache. It then
232  // verifies the foreign tables do not show up in the table regions state.
233  @Test
234  public void testClusterMetricsSkippingCachedForeignTables() throws Exception {
235    List<TableName> allTables = new ArrayList<>();
236
237    // These tables, including the cluster's meta table, should not be foreign to the cluster.
238    // The cluster should be able to find their state.
239    allTables.add(TableName.META_TABLE_NAME);
240    List<TableName> familiarTables = new ArrayList<>();
241    familiarTables.add(TableName.valueOf(null, "familiarTable1"));
242    familiarTables.add(TableName.valueOf("", "familiarTable2"));
243    familiarTables.add(TableName.valueOf("default", "familiarTable3"));
244    familiarTables.add(TableName.valueOf("familiarNamespace", "familiarTable4"));
245    familiarTables.add(TableName.valueOf("familiarNamespace", "familiarTable5"));
246
247    // Create these "familiar" tables so their state can be found
248    TEST_UTIL.getAdmin().createNamespace(NamespaceDescriptor.create("familiarNamespace").build());
249    for (TableName familiarTable : familiarTables) {
250      TEST_UTIL.createTable(familiarTable, COLUMN_FAMILY);
251      allTables.add(familiarTable);
252    }
253
254    // hbase:meta is a familiar table that was created automatically
255    familiarTables.add(TableName.META_TABLE_NAME);
256
257    // These tables should be foreign to the cluster.
258    // The cluster should not be able to find their state.
259    allTables.add(TableName.valueOf("hbase", "meta_replica"));
260    allTables.add(TableName.valueOf(null, "defaultNamespaceTable1"));
261    allTables.add(TableName.valueOf("", "defaultNamespaceTable2"));
262    allTables.add(TableName.valueOf("default", "defaultNamespaceTable3"));
263    allTables.add(TableName.valueOf("customNamespace", "customNamespaceTable1"));
264    allTables.add(TableName.valueOf("customNamespace", "customNamespaceTable2"));
265    allTables.add(TableName.valueOf("anotherNamespace", "anotherNamespaceTable"));
266    allTables.add(TableName.valueOf("sharedNamespace", "sharedNamespaceTable1"));
267    allTables.add(TableName.valueOf("sharedNamespace", "sharedNamespaceTable2"));
268
269    // Update master's table descriptors to have all tables
270    TableDescriptor foreignTableDescriptor;
271    for (TableName tableName : allTables) {
272      foreignTableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
273        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(COLUMN_FAMILY)).build();
274      master.getTableDescriptors().update(foreignTableDescriptor, true);
275    }
276
277    // The state of the meta table and the familiar tables we created should exist.
278    // The other tables' state should not exist.
279    for (TableName tableName : allTables) {
280      try {
281        Map<TableName, RegionStatesCount> tableRegionStatesCount = getTableRegionStatesCount();
282
283        if (
284          tableName.equals(TableName.META_TABLE_NAME)
285            || tableName.getQualifierAsString().startsWith("familiar")
286        ) {
287          assertTrue(tableRegionStatesCount.containsKey(tableName),
288            "Expected this table's state to exist: " + tableName);
289        } else {
290          assertFalse(tableRegionStatesCount.containsKey(tableName),
291            "This foreign table's state should not exist: " + tableName);
292        }
293      } finally {
294        if (!TableName.META_TABLE_NAME.equals(tableName) && familiarTables.contains(tableName)) {
295          LOG.debug("Deleting table: {}", tableName);
296          TEST_UTIL.deleteTable(tableName);
297        } else if (!familiarTables.contains(tableName)) {
298          LOG.debug("Removing table descriptor for foreign table: {}", tableName);
299          master.getTableDescriptors().remove(tableName);
300        }
301      }
302    }
303  }
304
305  // This test creates foreign file descriptors on the filesystem in addition to updating the
306  // table descriptor cache. It then verifies the foreign tables do not show up in the
307  // table regions state.
308  @Test
309  public void testClusterMetricsSkippingForeignTablesOnFileSystem() throws IOException {
310    List<TableName> familiarTables = new ArrayList<>();
311    List<TableName> foreignTables = new ArrayList<>();
312    FileSystem fs = TEST_UTIL.getTestFileSystem();
313    Path testDir = TEST_UTIL.getDataTestDirOnTestFS();
314    LOG.info("The test dir is: {}", testDir);
315
316    // Create tables whose state are familiar to this cluster
317    familiarTables.add(TableName.valueOf("testTable1"));
318    familiarTables.add(TableName.valueOf("testTable2"));
319    TEST_UTIL.getAdmin().createNamespace(NamespaceDescriptor.create("myNamespace").build());
320    familiarTables.add(TableName.valueOf("myNamespace", "tableWithNamespace1"));
321    for (TableName tableName : familiarTables) {
322      TEST_UTIL.createTable(tableName, COLUMN_FAMILY);
323    }
324
325    // hbase:meta is a familiar table that was created automatically
326    familiarTables.add(TableName.META_TABLE_NAME);
327
328    // There should now be 4 tables, including hbase:meta
329    Map<String, TableDescriptor> tableDescriptorMap = master.getTableDescriptors().getAll();
330    assertEquals(4, tableDescriptorMap.size());
331    for (TableName tableName : familiarTables) {
332      assertTrue(
333        tableDescriptorMap
334          .containsKey(tableName.getNamespaceAsString() + ":" + tableName.getQualifierAsString()),
335        "Expected table descriptor map to contain table: " + tableName);
336    }
337
338    createTableDescriptorOnFileSystem("hbase", "meta_replica", foreignTables);
339    createTableDescriptorOnFileSystem("default", "foreignTable1", foreignTables);
340    createTableDescriptorOnFileSystem("customForeignNs", "customForeignNsTable1", foreignTables);
341
342    // Verify the table descriptors were created on the filesystem
343    for (TableName tableName : foreignTables) {
344      Path tableDescPath = new Path(testDir,
345        "data" + Path.SEPARATOR + tableName.getNamespaceAsString() + Path.SEPARATOR
346          + tableName.getQualifierAsString() + Path.SEPARATOR + FSTableDescriptors.TABLEINFO_DIR);
347      assertTrue(fs.exists(tableDescPath),
348        "Expected table descriptor directory to exist: " + tableDescPath);
349    }
350
351    Map<TableName, RegionStatesCount> tableRegionStatesCount = getTableRegionStatesCount();
352
353    // The foreign tables should not be in the table state
354    assertEquals(4, tableRegionStatesCount.size());
355    for (TableName tableName : familiarTables) {
356      assertTrue(tableRegionStatesCount.containsKey(tableName),
357        "Expected table regions state count to contain: " + tableName);
358      // Delete unneeded tables
359      if (!TableName.META_TABLE_NAME.equals(tableName)) {
360        LOG.debug("Deleting table: {}", tableName);
361        TEST_UTIL.deleteTable(tableName);
362      }
363    }
364    for (TableName tableName : foreignTables) {
365      assertFalse(tableRegionStatesCount.containsKey(tableName),
366        "Expected table regions state count to NOT contain: " + tableName);
367      // Remove unneeded table descriptors
368      LOG.debug("Removing table descriptor for foreign table: {}", tableName);
369      master.getTableDescriptors().remove(tableName);
370    }
371  }
372
373  private Map<TableName, RegionStatesCount> getTableRegionStatesCount()
374    throws InterruptedIOException {
375    ClusterMetrics metrics = master.getClusterMetricsWithoutCoprocessor(
376      EnumSet.of(ClusterMetrics.Option.TABLE_TO_REGIONS_COUNT));
377    return metrics.getTableRegionStatesCount();
378  }
379
380  private void createTableDescriptorOnFileSystem(String namespace, String qualifier,
381    List<TableName> tableNameList) throws IOException {
382    // Create a table descriptor on the filesystem that uses the provided namespace
383    TableName tableName = TableName.valueOf(namespace, qualifier);
384    TableDescriptor tableDescriptor = TableDescriptorBuilder.newBuilder(tableName)
385      .setColumnFamily(ColumnFamilyDescriptorBuilder.of(COLUMN_FAMILY)).build();
386    // Create the table descriptor on the filesystem and update the file descriptor cache
387    master.getTableDescriptors().update(tableDescriptor, false);
388
389    tableNameList.add(tableName);
390  }
391}