001/**
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master;
019
020import static org.junit.Assert.fail;
021
022import java.io.IOException;
023import org.apache.hadoop.conf.Configuration;
024import org.apache.hadoop.hbase.CompatibilityFactory;
025import org.apache.hadoop.hbase.HBaseClassTestRule;
026import org.apache.hadoop.hbase.HBaseTestingUtility;
027import org.apache.hadoop.hbase.HConstants;
028import org.apache.hadoop.hbase.MiniHBaseCluster;
029import org.apache.hadoop.hbase.TableName;
030import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
031import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
032import org.apache.hadoop.hbase.client.Put;
033import org.apache.hadoop.hbase.client.Table;
034import org.apache.hadoop.hbase.client.TableDescriptor;
035import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
036import org.apache.hadoop.hbase.coprocessor.CoprocessorHost;
037import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
038import org.apache.hadoop.hbase.test.MetricsAssertHelper;
039import org.apache.hadoop.hbase.testclassification.MasterTests;
040import org.apache.hadoop.hbase.testclassification.MediumTests;
041import org.apache.hadoop.hbase.util.Bytes;
042import org.junit.AfterClass;
043import org.junit.BeforeClass;
044import org.junit.ClassRule;
045import org.junit.Rule;
046import org.junit.Test;
047import org.junit.experimental.categories.Category;
048import org.junit.rules.TestName;
049import org.slf4j.Logger;
050import org.slf4j.LoggerFactory;
051
052@Category({ MasterTests.class, MediumTests.class })
053public class TestAssignmentManagerMetrics {
054
055  @ClassRule
056  public static final HBaseClassTestRule CLASS_RULE =
057      HBaseClassTestRule.forClass(TestAssignmentManagerMetrics.class);
058
059  private static final Logger LOG = LoggerFactory.getLogger(TestAssignmentManagerMetrics.class);
060  private static final MetricsAssertHelper METRICS_HELPER = CompatibilityFactory
061      .getInstance(MetricsAssertHelper.class);
062
063  private static MiniHBaseCluster CLUSTER;
064  private static HMaster MASTER;
065  private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
066  private static final int MSG_INTERVAL = 1000;
067
068  @Rule
069  public TestName name = new TestName();
070
071  @BeforeClass
072  public static void startCluster() throws Exception {
073    LOG.info("Starting cluster");
074    Configuration conf = TEST_UTIL.getConfiguration();
075
076    // Disable sanity check for coprocessor
077    conf.setBoolean("hbase.table.sanity.checks", false);
078
079    // set RIT stuck warning threshold to a small value
080    conf.setInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 20);
081
082    // set msgInterval to 1 second
083    conf.setInt("hbase.regionserver.msginterval", MSG_INTERVAL);
084
085    // set tablesOnMaster to none
086    conf.set("hbase.balancer.tablesOnMaster", "none");
087
088    // set client sync wait timeout to 5sec
089    conf.setInt("hbase.client.sync.wait.timeout.msec", 5000);
090    conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1);
091    conf.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, 2500);
092    // set a small interval for updating rit metrics
093    conf.setInt(AssignmentManager.RIT_CHORE_INTERVAL_MSEC_CONF_KEY, MSG_INTERVAL);
094    // set a small assign attempts for avoiding assert when retrying. (HBASE-20533)
095    conf.setInt(AssignmentManager.ASSIGN_MAX_ATTEMPTS, 3);
096
097    // keep rs online so it can report the failed opens.
098    conf.setBoolean(CoprocessorHost.ABORT_ON_ERROR_KEY, false);
099    TEST_UTIL.startMiniCluster(1);
100    CLUSTER = TEST_UTIL.getHBaseCluster();
101    MASTER = CLUSTER.getMaster();
102  }
103
104  @AfterClass
105  public static void after() throws Exception {
106    LOG.info("AFTER {} <= IS THIS NULL?", TEST_UTIL);
107    TEST_UTIL.shutdownMiniCluster();
108  }
109
110  @Test
111  public void testRITAssignmentManagerMetrics() throws Exception {
112    final TableName TABLENAME = TableName.valueOf(name.getMethodName());
113    final byte[] FAMILY = Bytes.toBytes("family");
114    try (Table table = TEST_UTIL.createTable(TABLENAME, FAMILY)){
115      final byte[] row = Bytes.toBytes("row");
116      final byte[] qualifier = Bytes.toBytes("qualifier");
117      final byte[] value = Bytes.toBytes("value");
118
119      Put put = new Put(row);
120      put.addColumn(FAMILY, qualifier, value);
121      table.put(put);
122
123      // Sleep 3 seconds, wait for doMetrics chore catching up
124      Thread.sleep(MSG_INTERVAL * 3);
125
126      // check the RIT is 0
127      MetricsAssignmentManagerSource amSource =
128          MASTER.getAssignmentManager().getAssignmentManagerMetrics().getMetricsProcSource();
129
130      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 0, amSource);
131      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 0,
132          amSource);
133
134      // alter table with a non-existing coprocessor
135
136      TableDescriptor htd = TableDescriptorBuilder.newBuilder(TABLENAME)
137        .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY))
138        .setCoprocessor(CoprocessorDescriptorBuilder.newBuilder("com.foo.FooRegionObserver")
139          .setJarPath("hdfs:///foo.jar")
140          .setPriority(1001)
141          .setProperty("arg1", "1")
142          .setProperty("arg2", "2")
143          .build())
144        .build();
145      try {
146        TEST_UTIL.getAdmin().modifyTable(htd);
147        fail("Expected region failed to open");
148      } catch (IOException e) {
149        // expected, the RS will crash and the assignment will spin forever waiting for a RS
150        // to assign the region. the region will not go to FAILED_OPEN because in this case
151        // we have just one RS and it will do one retry.
152        LOG.info("Expected error", e);
153      }
154
155      // Sleep 5 seconds, wait for doMetrics chore catching up
156      // the rit count consists of rit and failed opens. see RegionInTransitionStat#update
157      // Waiting for the completion of rit makes the assert stable.
158      TEST_UTIL.waitUntilNoRegionsInTransition();
159      Thread.sleep(MSG_INTERVAL * 5);
160      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1, amSource);
161      METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 1,
162        amSource);
163    }
164  }
165}