001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.client; 019 020import java.io.Closeable; 021import java.io.IOException; 022import java.util.List; 023import java.util.Map; 024import org.apache.hadoop.hbase.Abortable; 025import org.apache.hadoop.hbase.HBaseInterfaceAudience; 026import org.apache.hadoop.hbase.ServerName; 027import org.apache.hadoop.hbase.master.RegionState; 028import org.apache.yetus.audience.InterfaceAudience; 029 030/** 031 * Hbck fixup tool APIs. Obtain an instance from {@link Connection#getHbck()} and call 032 * {@link #close()} when done. 033 * <p> 034 * WARNING: the below methods can damage the cluster. It may leave the cluster in an indeterminate 035 * state, e.g. region not assigned, or some hdfs files left behind. After running any of the below, 036 * operators may have to do some clean up on hdfs or schedule some assign procedures to get regions 037 * back online. DO AT YOUR OWN RISK. For experienced users only. 038 * @see ConnectionFactory 039 * @since 2.0.2, 2.1.1 040 */ 041@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.HBCK) 042public interface Hbck extends Abortable, Closeable { 043 /** 044 * Update table state in Meta only. No procedures are submitted to open/assign or close/unassign 045 * regions of the table. 046 * @param state table state 047 * @return previous state of the table in Meta 048 */ 049 TableState setTableStateInMeta(TableState state) throws IOException; 050 051 /** 052 * Update region state in Meta only. No procedures are submitted to manipulate the given region or 053 * any other region from same table. 054 * @param nameOrEncodedName2State list of all region states to be updated in meta 055 * @return previous state of the region in Meta 056 */ 057 Map<String, RegionState.State> 058 setRegionStateInMeta(Map<String, RegionState.State> nameOrEncodedName2State) throws IOException; 059 060 /** 061 * Like {@link Admin#assign(byte[])} but 'raw' in that it can do more than one Region at a time -- 062 * good if many Regions to online -- and it will schedule the assigns even in the case where 063 * Master is initializing (as long as the ProcedureExecutor is up). Does NOT call Coprocessor 064 * hooks. 065 * @param override You need to add override for unset of the procedure from 066 * RegionStateNode without byPassing preTransitCheck 067 * @param force You need to add force for case where a region has previously been 068 * bypassed. When a Procedure has been bypassed, a Procedure will have 069 * completed but no other Procedure will be able to make progress on the 070 * target entity (intentionally). Skips preTransitCheck only when 071 * selected along with override option 072 * @param encodedRegionNames Region encoded names; e.g. 1588230740 is the hard-coded encoding for 073 * hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an example 074 * of what a random user-space encoded Region name looks like. 075 */ 076 List<Long> assigns(List<String> encodedRegionNames, boolean override, boolean force) 077 throws IOException; 078 079 default List<Long> assigns(List<String> encodedRegionNames, boolean override) throws IOException { 080 return assigns(encodedRegionNames, override, true); 081 } 082 083 default List<Long> assigns(List<String> encodedRegionNames) throws IOException { 084 return assigns(encodedRegionNames, false, false); 085 } 086 087 /** 088 * Like {@link Admin#unassign(byte[], boolean)} but 'raw' in that it can do more than one Region 089 * at a time -- good if many Regions to offline -- and it will schedule the assigns even in the 090 * case where Master is initializing (as long as the ProcedureExecutor is up). Does NOT call 091 * Coprocessor hooks. 092 * @param override You need to add override for unset of the procedure from 093 * RegionStateNode without byPassing preTransitCheck 094 * @param force You need to add force for case where a region has previously been 095 * bypassed. When a Procedure has been bypassed, a Procedure will have 096 * completed but no other Procedure will be able to make progress on the 097 * target entity (intentionally). Skips preTransitCheck only when 098 * selected along with override option 099 * @param encodedRegionNames Region encoded names; e.g. 1588230740 is the hard-coded encoding for 100 * hbase:meta region and de00010733901a05f5a2a3a382e27dd4 is an example 101 * of what a random user-space encoded Region name looks like. 102 */ 103 List<Long> unassigns(List<String> encodedRegionNames, boolean override, boolean force) 104 throws IOException; 105 106 default List<Long> unassigns(List<String> encodedRegionNames, boolean override) 107 throws IOException { 108 return unassigns(encodedRegionNames, override, true); 109 } 110 111 default List<Long> unassigns(List<String> encodedRegionNames) throws IOException { 112 return unassigns(encodedRegionNames, false, true); 113 } 114 115 /** 116 * Bypass specified procedure and move it to completion. Procedure is marked completed but no 117 * actual work is done from the current state/step onwards. Parents of the procedure are also 118 * marked for bypass. 119 * @param pids of procedures to complete. 120 * @param waitTime wait time in ms for acquiring lock for a procedure 121 * @param override if override set to true, we will bypass the procedure even if it is executing. 122 * This is for procedures which can't break out during execution (bugs?). 123 * @param recursive If set, if a parent procedure, we will find and bypass children and then the 124 * parent procedure (Dangerous but useful in case where child procedure has been 125 * 'lost'). Does not always work. Experimental. 126 * @return true if procedure is marked for bypass successfully, false otherwise 127 */ 128 List<Boolean> bypassProcedure(List<Long> pids, long waitTime, boolean override, boolean recursive) 129 throws IOException; 130 131 List<Long> scheduleServerCrashProcedures(List<ServerName> serverNames) throws IOException; 132 133 List<Long> scheduleSCPsForUnknownServers() throws IOException; 134 135 /** 136 * Request HBCK chore to run at master side. 137 * @return <code>true</code> if HBCK chore ran, <code>false</code> if HBCK chore already running 138 * @throws IOException if a remote or network exception occurs 139 */ 140 boolean runHbckChore() throws IOException; 141 142 /** 143 * Fix Meta. 144 */ 145 void fixMeta() throws IOException; 146}