001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.jupiter.api.Assertions.fail;
021
022import java.io.IOException;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.CompatibilityFactory;
025import org.apache.hadoop.hbase.HBaseTestingUtil;
026import org.apache.hadoop.hbase.HConstants;
027import org.apache.hadoop.hbase.SingleProcessHBaseCluster;
028import org.apache.hadoop.hbase.TableName;
029import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
030import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
031import org.apache.hadoop.hbase.client.Put;
032import org.apache.hadoop.hbase.client.Table;
033import org.apache.hadoop.hbase.client.TableDescriptor;
034import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
035import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
036import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
037import org.apache.hadoop.hbase.test.MetricsAssertHelper;
038import org.apache.hadoop.hbase.testclassification.MasterTests;
039import org.apache.hadoop.hbase.testclassification.MediumTests;
040import org.apache.hadoop.hbase.util.Bytes;
041import org.apache.hadoop.hbase.util.TableDescriptorChecker;
042import org.junit.jupiter.api.AfterAll;
043import org.junit.jupiter.api.BeforeAll;
044import org.junit.jupiter.api.BeforeEach;
045import org.junit.jupiter.api.Tag;
046import org.junit.jupiter.api.Test;
047import org.junit.jupiter.api.TestInfo;
048import org.slf4j.Logger;
049import org.slf4j.LoggerFactory;
050
051@Tag(MasterTests.TAG)
052@Tag(MediumTests.TAG)
053public class TestAssignmentManagerMetrics {
054
055  private static final Logger LOG = LoggerFactory.getLogger(TestAssignmentManagerMetrics.class);
056  private static final MetricsAssertHelper METRICS_HELPER =
057    CompatibilityFactory.getInstance(MetricsAssertHelper.class);
058
059  private static SingleProcessHBaseCluster CLUSTER;
060  private static HMaster MASTER;
061  private static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil();
062  private static final int MSG_INTERVAL = 1000;
063  private String testMethodName;
064
065  @BeforeEach
066  public void setTestMethod(TestInfo testInfo) {
067    testMethodName = testInfo.getTestMethod().get().getName();
068  }
069
070  @BeforeAll
071  public static void startCluster() throws Exception {
072    LOG.info("Starting cluster");
073    Configuration conf = TEST_UTIL.getConfiguration();
074
075    // Enable sanity check for coprocessor, so that region reopen fails on the RS
076    conf.setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, true);
077
078    // set RIT stuck warning threshold to a small value
079    conf.setInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 20);
080
081    // set msgInterval to 1 second
082    conf.setInt("hbase.regionserver.msginterval", MSG_INTERVAL);
083
084    // set tablesOnMaster to none
085    conf.set("hbase.balancer.tablesOnMaster", "none");
086
087    // set client sync wait timeout to 5sec
088    conf.setInt("hbase.client.sync.wait.timeout.msec", 5000);
089    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
090    conf.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, 2500);
091    // set a small interval for updating rit metrics
092    conf.setInt(AssignmentManager.RIT_CHORE_INTERVAL_MSEC_CONF_KEY, MSG_INTERVAL);
093    // set a small assign attempts for avoiding assert when retrying. (HBASE-20533)
094    conf.setInt(AssignmentManager.ASSIGN_MAX_ATTEMPTS, 3);
095
096    // keep rs online so it can report the failed opens.
097    conf.setBoolean(CoprocessorHost.ABORT_ON_ERROR_KEY, false);
098    TEST_UTIL.startMiniCluster(1);
099    CLUSTER = TEST_UTIL.getHBaseCluster();
100    MASTER = CLUSTER.getMaster();
101    // Disable sanity check for coprocessor, so that modify table runs on the HMaster
102    MASTER.getConfiguration().setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, false);
103  }
104
105  @AfterAll
106  public static void after() throws Exception {
107    LOG.info("AFTER {} <= IS THIS NULL?", TEST_UTIL);
108    TEST_UTIL.shutdownMiniCluster();
109  }
110
111  @Test
112  public void testRITAssignmentManagerMetrics() throws Exception {
113    final TableName TABLENAME = TableName.valueOf(testMethodName);
114    final byte[] FAMILY = Bytes.toBytes("family");
115    try (Table table = TEST_UTIL.createTable(TABLENAME, FAMILY)) {
116      final byte[] row = Bytes.toBytes("row");
117      final byte[] qualifier = Bytes.toBytes("qualifier");
118      final byte[] value = Bytes.toBytes("value");
119
120      Put put = new Put(row);
121      put.addColumn(FAMILY, qualifier, value);
122      table.put(put);
123
124      // Sleep 3 seconds, wait for doMetrics chore catching up
125      Thread.sleep(MSG_INTERVAL * 3);
126
127      // check the RIT is 0
128      MetricsAssignmentManagerSource amSource =
129        MASTER.getAssignmentManager().getAssignmentManagerMetrics().getMetricsProcSource();
130
131      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 0, amSource);
132      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 0,
133        amSource);
134
135      // alter table with a non-existing coprocessor
136      TableDescriptor htd = TableDescriptorBuilder.newBuilder(TABLENAME)
137        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY))
138        .setCoprocessor(CoprocessorDescriptorBuilder.newBuilder("com.foo.FooRegionObserver")
139          .setJarPath("hdfs:///foo.jar").setPriority(1001).setProperty("arg1", "1")
140          .setProperty("arg2", "2").build())
141        .build();
142      try {
143        TEST_UTIL.getAdmin().modifyTable(htd);
144        fail("Expected region failed to open");
145      } catch (IOException e) {
146        // expected, the RS will crash and the assignment will spin forever waiting for a RS
147        // to assign the region. the region will not go to FAILED_OPEN because in this case
148        // we have just one RS and it will do one retry.
149        LOG.info("Expected error", e);
150      }
151
152      // Sleep 5 seconds, wait for doMetrics chore catching up
153      // the rit count consists of rit and failed opens. see RegionInTransitionStat#update
154      // Waiting for the completion of rit makes the assert stable.
155      TEST_UTIL.waitUntilNoRegionTransitScheduled();
156      Thread.sleep(MSG_INTERVAL * 5);
157      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1, amSource);
158      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 1,
159        amSource);
160      METRICS_HELPER.assertCounter(
161        MetricsAssignmentManagerSource.ASSIGN_METRIC_PREFIX + "SubmittedCount", 2, amSource);
162    }
163  }
164}