001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.Assert.fail;
021
022import java.io.IOException;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.CompatibilityFactory;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseTestingUtility;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.MiniHBaseCluster;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
031import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
032import org.apache.hadoop.hbase.client.Put;
033import org.apache.hadoop.hbase.client.Table;
034import org.apache.hadoop.hbase.client.TableDescriptor;
035import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
036import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
037import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
038import org.apache.hadoop.hbase.test.MetricsAssertHelper;
039import org.apache.hadoop.hbase.testclassification.MasterTests;
040import org.apache.hadoop.hbase.testclassification.MediumTests;
041import org.apache.hadoop.hbase.util.Bytes;
042import org.apache.hadoop.hbase.util.TableDescriptorChecker;
043import org.junit.AfterClass;
044import org.junit.BeforeClass;
045import org.junit.ClassRule;
046import org.junit.Rule;
047import org.junit.Test;
048import org.junit.experimental.categories.Category;
049import org.junit.rules.TestName;
050import org.slf4j.Logger;
051import org.slf4j.LoggerFactory;
052
053@Category({ MasterTests.class, MediumTests.class })
054public class TestAssignmentManagerMetrics {
055
056  @ClassRule
057  public static final HBaseClassTestRule CLASS_RULE =
058    HBaseClassTestRule.forClass(TestAssignmentManagerMetrics.class);
059
060  private static final Logger LOG = LoggerFactory.getLogger(TestAssignmentManagerMetrics.class);
061  private static final MetricsAssertHelper METRICS_HELPER =
062    CompatibilityFactory.getInstance(MetricsAssertHelper.class);
063
064  private static MiniHBaseCluster CLUSTER;
065  private static HMaster MASTER;
066  private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
067  private static final int MSG_INTERVAL = 1000;
068
069  @Rule
070  public TestName name = new TestName();
071
072  @BeforeClass
073  public static void startCluster() throws Exception {
074    LOG.info("Starting cluster");
075    Configuration conf = TEST_UTIL.getConfiguration();
076
077    // Disable sanity check for coprocessor
078    conf.setBoolean(TableDescriptorChecker.TABLE_SANITY_CHECKS, false);
079
080    // set RIT stuck warning threshold to a small value
081    conf.setInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 20);
082
083    // set msgInterval to 1 second
084    conf.setInt("hbase.regionserver.msginterval", MSG_INTERVAL);
085
086    // set tablesOnMaster to none
087    conf.set("hbase.balancer.tablesOnMaster", "none");
088
089    // set client sync wait timeout to 5sec
090    conf.setInt("hbase.client.sync.wait.timeout.msec", 5000);
091    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
092    conf.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, 2500);
093    // set a small interval for updating rit metrics
094    conf.setInt(AssignmentManager.RIT_CHORE_INTERVAL_MSEC_CONF_KEY, MSG_INTERVAL);
095    // set a small assign attempts for avoiding assert when retrying. (HBASE-20533)
096    conf.setInt(AssignmentManager.ASSIGN_MAX_ATTEMPTS, 3);
097
098    // keep rs online so it can report the failed opens.
099    conf.setBoolean(CoprocessorHost.ABORT_ON_ERROR_KEY, false);
100    TEST_UTIL.startMiniCluster(1);
101    CLUSTER = TEST_UTIL.getHBaseCluster();
102    MASTER = CLUSTER.getMaster();
103  }
104
105  @AfterClass
106  public static void after() throws Exception {
107    LOG.info("AFTER {} <= IS THIS NULL?", TEST_UTIL);
108    TEST_UTIL.shutdownMiniCluster();
109  }
110
111  @Test
112  public void testRITAssignmentManagerMetrics() throws Exception {
113    final TableName TABLENAME = TableName.valueOf(name.getMethodName());
114    final byte[] FAMILY = Bytes.toBytes("family");
115    try (Table table = TEST_UTIL.createTable(TABLENAME, FAMILY)) {
116      final byte[] row = Bytes.toBytes("row");
117      final byte[] qualifier = Bytes.toBytes("qualifier");
118      final byte[] value = Bytes.toBytes("value");
119
120      Put put = new Put(row);
121      put.addColumn(FAMILY, qualifier, value);
122      table.put(put);
123
124      // Sleep 3 seconds, wait for doMetrics chore catching up
125      Thread.sleep(MSG_INTERVAL * 3);
126
127      // check the RIT is 0
128      MetricsAssignmentManagerSource amSource =
129        MASTER.getAssignmentManager().getAssignmentManagerMetrics().getMetricsProcSource();
130
131      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 0, amSource);
132      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 0,
133        amSource);
134
135      // alter table with a non-existing coprocessor
136
137      TableDescriptor htd = TableDescriptorBuilder.newBuilder(TABLENAME)
138        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY))
139        .setCoprocessor(CoprocessorDescriptorBuilder.newBuilder("com.foo.FooRegionObserver")
140          .setJarPath("hdfs:///foo.jar").setPriority(1001).setProperty("arg1", "1")
141          .setProperty("arg2", "2").build())
142        .build();
143      try {
144        TEST_UTIL.getAdmin().modifyTable(htd);
145        fail("Expected region failed to open");
146      } catch (IOException e) {
147        // expected, the RS will crash and the assignment will spin forever waiting for a RS
148        // to assign the region. the region will not go to FAILED_OPEN because in this case
149        // we have just one RS and it will do one retry.
150        LOG.info("Expected error", e);
151      }
152
153      // Sleep 5 seconds, wait for doMetrics chore catching up
154      // the rit count consists of rit and failed opens. see RegionInTransitionStat#update
155      // Waiting for the completion of rit makes the assert stable.
156      TEST_UTIL.waitUntilNoRegionsInTransition();
157      Thread.sleep(MSG_INTERVAL * 5);
158      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1, amSource);
159      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 1,
160        amSource);
161      METRICS_HELPER.assertCounter(
162        MetricsAssignmentManagerSource.ASSIGN_METRIC_PREFIX + "SubmittedCount", 3, amSource);
163    }
164  }
165}