001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master; 019 020import static org.junit.Assert.fail; 021 022import java.io.IOException; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.hbase.CompatibilityFactory; 025import org.apache.hadoop.hbase.HBaseClassTestRule; 026import org.apache.hadoop.hbase.HBaseTestingUtility; 027import org.apache.hadoop.hbase.HConstants; 028import org.apache.hadoop.hbase.MiniHBaseCluster; 029import org.apache.hadoop.hbase.TableName; 030import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; 031import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder; 032import org.apache.hadoop.hbase.client.Put; 033import org.apache.hadoop.hbase.client.Table; 034import org.apache.hadoop.hbase.client.TableDescriptor; 035import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 036import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; 037import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 038import org.apache.hadoop.hbase.test.MetricsAssertHelper; 039import org.apache.hadoop.hbase.testclassification.MasterTests; 040import org.apache.hadoop.hbase.testclassification.MediumTests; 041import org.apache.hadoop.hbase.util.Bytes; 042import org.junit.AfterClass; 043import org.junit.BeforeClass; 044import org.junit.ClassRule; 045import org.junit.Rule; 046import org.junit.Test; 047import org.junit.experimental.categories.Category; 048import org.junit.rules.TestName; 049import org.slf4j.Logger; 050import org.slf4j.LoggerFactory; 051 052@Category({ MasterTests.class, MediumTests.class }) 053public class TestAssignmentManagerMetrics { 054 055 @ClassRule 056 public static final HBaseClassTestRule CLASS_RULE = 057 HBaseClassTestRule.forClass(TestAssignmentManagerMetrics.class); 058 059 private static final Logger LOG = LoggerFactory.getLogger(TestAssignmentManagerMetrics.class); 060 private static final MetricsAssertHelper METRICS_HELPER = CompatibilityFactory 061 .getInstance(MetricsAssertHelper.class); 062 063 private static MiniHBaseCluster CLUSTER; 064 private static HMaster MASTER; 065 private static HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); 066 private static final int MSG_INTERVAL = 1000; 067 068 @Rule 069 public TestName name = new TestName(); 070 071 @BeforeClass 072 public static void startCluster() throws Exception { 073 LOG.info("Starting cluster"); 074 Configuration conf = TEST_UTIL.getConfiguration(); 075 076 // Disable sanity check for coprocessor 077 conf.setBoolean("hbase.table.sanity.checks", false); 078 079 // set RIT stuck warning threshold to a small value 080 conf.setInt(HConstants.METRICS_RIT_STUCK_WARNING_THRESHOLD, 20); 081 082 // set msgInterval to 1 second 083 conf.setInt("hbase.regionserver.msginterval", MSG_INTERVAL); 084 085 // set tablesOnMaster to none 086 conf.set("hbase.balancer.tablesOnMaster", "none"); 087 088 // set client sync wait timeout to 5sec 089 conf.setInt("hbase.client.sync.wait.timeout.msec", 5000); 090 conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 1); 091 conf.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, 2500); 092 // set a small interval for updating rit metrics 093 conf.setInt(AssignmentManager.RIT_CHORE_INTERVAL_MSEC_CONF_KEY, MSG_INTERVAL); 094 // set a small assign attempts for avoiding assert when retrying. (HBASE-20533) 095 conf.setInt(AssignmentManager.ASSIGN_MAX_ATTEMPTS, 3); 096 097 // keep rs online so it can report the failed opens. 098 conf.setBoolean(CoprocessorHost.ABORT_ON_ERROR_KEY, false); 099 TEST_UTIL.startMiniCluster(1); 100 CLUSTER = TEST_UTIL.getHBaseCluster(); 101 MASTER = CLUSTER.getMaster(); 102 } 103 104 @AfterClass 105 public static void after() throws Exception { 106 LOG.info("AFTER {} <= IS THIS NULL?", TEST_UTIL); 107 TEST_UTIL.shutdownMiniCluster(); 108 } 109 110 @Test 111 public void testRITAssignmentManagerMetrics() throws Exception { 112 final TableName TABLENAME = TableName.valueOf(name.getMethodName()); 113 final byte[] FAMILY = Bytes.toBytes("family"); 114 try (Table table = TEST_UTIL.createTable(TABLENAME, FAMILY)){ 115 final byte[] row = Bytes.toBytes("row"); 116 final byte[] qualifier = Bytes.toBytes("qualifier"); 117 final byte[] value = Bytes.toBytes("value"); 118 119 Put put = new Put(row); 120 put.addColumn(FAMILY, qualifier, value); 121 table.put(put); 122 123 // Sleep 3 seconds, wait for doMetrics chore catching up 124 Thread.sleep(MSG_INTERVAL * 3); 125 126 // check the RIT is 0 127 MetricsAssignmentManagerSource amSource = 128 MASTER.getAssignmentManager().getAssignmentManagerMetrics().getMetricsProcSource(); 129 130 METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 0, amSource); 131 METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 0, 132 amSource); 133 134 // alter table with a non-existing coprocessor 135 136 TableDescriptor htd = TableDescriptorBuilder.newBuilder(TABLENAME) 137 .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY)) 138 .setCoprocessor(CoprocessorDescriptorBuilder.newBuilder("com.foo.FooRegionObserver") 139 .setJarPath("hdfs:///foo.jar") 140 .setPriority(1001) 141 .setProperty("arg1", "1") 142 .setProperty("arg2", "2") 143 .build()) 144 .build(); 145 try { 146 TEST_UTIL.getAdmin().modifyTable(htd); 147 fail("Expected region failed to open"); 148 } catch (IOException e) { 149 // expected, the RS will crash and the assignment will spin forever waiting for a RS 150 // to assign the region. the region will not go to FAILED_OPEN because in this case 151 // we have just one RS and it will do one retry. 152 LOG.info("Expected error", e); 153 } 154 155 // Sleep 5 seconds, wait for doMetrics chore catching up 156 // the rit count consists of rit and failed opens. see RegionInTransitionStat#update 157 // Waiting for the completion of rit makes the assert stable. 158 TEST_UTIL.waitUntilNoRegionsInTransition(); 159 Thread.sleep(MSG_INTERVAL * 5); 160 METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_NAME, 1, amSource); 161 METRICS_HELPER.assertGauge(MetricsAssignmentManagerSource.RIT_COUNT_OVER_THRESHOLD_NAME, 1, 162 amSource); 163 } 164 } 165}