001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.util.compaction;
019
020import java.io.IOException;
021import java.util.Arrays;
022import java.util.Collection;
023import java.util.Collections;
024import java.util.List;
025import java.util.Map;
026import java.util.Optional;
027import java.util.Set;
028import java.util.concurrent.ConcurrentHashMap;
029import java.util.concurrent.ExecutorService;
030import java.util.concurrent.Executors;
031import java.util.concurrent.Future;
032import java.util.concurrent.TimeUnit;
033import org.apache.hadoop.conf.Configuration;
034import org.apache.hadoop.conf.Configured;
035import org.apache.hadoop.hbase.HBaseConfiguration;
036import org.apache.hadoop.hbase.HBaseInterfaceAudience;
037import org.apache.hadoop.hbase.HConstants;
038import org.apache.hadoop.hbase.HRegionLocation;
039import org.apache.hadoop.hbase.NotServingRegionException;
040import org.apache.hadoop.hbase.ServerName;
041import org.apache.hadoop.hbase.TableName;
042import org.apache.hadoop.hbase.client.Admin;
043import org.apache.hadoop.hbase.client.CompactionState;
044import org.apache.hadoop.hbase.client.Connection;
045import org.apache.hadoop.hbase.client.ConnectionFactory;
046import org.apache.hadoop.hbase.client.RegionInfo;
047import org.apache.hadoop.hbase.util.Bytes;
048import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
049import org.apache.hadoop.util.Tool;
050import org.apache.hadoop.util.ToolRunner;
051import org.apache.yetus.audience.InterfaceAudience;
052import org.slf4j.Logger;
053import org.slf4j.LoggerFactory;
054
055import org.apache.hbase.thirdparty.com.google.common.base.Joiner;
056import org.apache.hbase.thirdparty.com.google.common.base.Splitter;
057import org.apache.hbase.thirdparty.com.google.common.collect.Iterables;
058import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
059import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
060import org.apache.hbase.thirdparty.com.google.common.collect.Sets;
061import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine;
062import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLineParser;
063import org.apache.hbase.thirdparty.org.apache.commons.cli.DefaultParser;
064import org.apache.hbase.thirdparty.org.apache.commons.cli.HelpFormatter;
065import org.apache.hbase.thirdparty.org.apache.commons.cli.Option;
066import org.apache.hbase.thirdparty.org.apache.commons.cli.Options;
067import org.apache.hbase.thirdparty.org.apache.commons.cli.ParseException;
068
069@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.TOOLS)
070public class MajorCompactor extends Configured implements Tool {
071
072  private static final Logger LOG = LoggerFactory.getLogger(MajorCompactor.class);
073  protected static final Set<MajorCompactionRequest> ERRORS = ConcurrentHashMap.newKeySet();
074
075  protected ClusterCompactionQueues clusterCompactionQueues;
076  private long timestamp;
077  protected Set<String> storesToCompact;
078  protected ExecutorService executor;
079  protected long sleepForMs;
080  protected Connection connection;
081  protected TableName tableName;
082  private int numServers = -1;
083  private int numRegions = -1;
084  private boolean skipWait = false;
085
086  MajorCompactor() {
087  }
088
089  public MajorCompactor(Configuration conf, TableName tableName, Set<String> storesToCompact,
090    int concurrency, long timestamp, long sleepForMs) throws IOException {
091    this.connection = ConnectionFactory.createConnection(conf);
092    this.tableName = tableName;
093    this.timestamp = timestamp;
094    this.storesToCompact = storesToCompact;
095    this.executor = Executors.newFixedThreadPool(concurrency);
096    this.clusterCompactionQueues = new ClusterCompactionQueues(concurrency);
097    this.sleepForMs = sleepForMs;
098  }
099
100  public void compactAllRegions() throws Exception {
101    List<Future<?>> futures = Lists.newArrayList();
102    while (clusterCompactionQueues.hasWorkItems() || !futuresComplete(futures)) {
103      while (clusterCompactionQueues.atCapacity()) {
104        LOG.debug("Waiting for servers to complete Compactions");
105        Thread.sleep(sleepForMs);
106      }
107      Optional<ServerName> serverToProcess =
108        clusterCompactionQueues.getLargestQueueFromServersNotCompacting();
109      if (serverToProcess.isPresent() && clusterCompactionQueues.hasWorkItems()) {
110        ServerName serverName = serverToProcess.get();
111        // check to see if the region has moved... if so we have to enqueue it again with
112        // the proper serverName
113        MajorCompactionRequest request = clusterCompactionQueues.reserveForCompaction(serverName);
114
115        ServerName currentServer = connection.getRegionLocator(tableName)
116          .getRegionLocation(request.getRegion().getStartKey()).getServerName();
117
118        if (!currentServer.equals(serverName)) {
119          // add it back to the queue with the correct server it should be picked up in the future.
120          LOG.info("Server changed for region: " + request.getRegion().getEncodedName() + " from: "
121            + serverName + " to: " + currentServer + " re-queuing request");
122          clusterCompactionQueues.addToCompactionQueue(currentServer, request);
123          clusterCompactionQueues.releaseCompaction(serverName);
124        } else {
125          LOG.info("Firing off compaction request for server: " + serverName + ", " + request
126            + " total queue size left: "
127            + clusterCompactionQueues.getCompactionRequestsLeftToFinish());
128          futures.add(executor.submit(new Compact(serverName, request)));
129        }
130      } else {
131        // haven't assigned anything so we sleep.
132        Thread.sleep(sleepForMs);
133      }
134    }
135    LOG.info("All compactions have completed");
136  }
137
138  private boolean futuresComplete(List<Future<?>> futures) {
139    futures.removeIf(Future::isDone);
140    return futures.isEmpty();
141  }
142
143  public void shutdown() throws Exception {
144    executor.shutdown();
145    executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS);
146    if (!ERRORS.isEmpty()) {
147      StringBuilder builder = new StringBuilder().append("Major compaction failed, there were: ")
148        .append(ERRORS.size()).append(" regions / stores that failed compacting\n")
149        .append("Failed compaction requests\n").append("--------------------------\n")
150        .append(Joiner.on("\n").join(ERRORS));
151      LOG.error(builder.toString());
152    }
153    if (connection != null) {
154      connection.close();
155    }
156    LOG.info("All regions major compacted successfully");
157  }
158
159  @InterfaceAudience.Private
160  void initializeWorkQueues() throws IOException {
161    if (storesToCompact.isEmpty()) {
162      connection.getTable(tableName).getDescriptor().getColumnFamilyNames()
163        .forEach(a -> storesToCompact.add(Bytes.toString(a)));
164      LOG.info("No family specified, will execute for all families");
165    }
166    LOG.info(
167      "Initializing compaction queues for table:  " + tableName + " with cf: " + storesToCompact);
168
169    Map<ServerName, List<RegionInfo>> snRegionMap = getServerRegionsMap();
170    /*
171     * If numservers is specified, stop inspecting regions beyond the numservers, it will serve to
172     * throttle and won't end up scanning all the regions in the event there are not many regions to
173     * compact based on the criteria.
174     */
175    for (ServerName sn : getServersToCompact(snRegionMap.keySet())) {
176      List<RegionInfo> regions = snRegionMap.get(sn);
177      LOG.debug("Table: " + tableName + " Server: " + sn + " No of regions: " + regions.size());
178
179      /*
180       * If the tool is run periodically, then we could shuffle the regions and provide some random
181       * order to select regions. Helps if numregions is specified.
182       */
183      Collections.shuffle(regions);
184      int regionsToCompact = numRegions;
185      for (RegionInfo hri : regions) {
186        if (numRegions > 0 && regionsToCompact <= 0) {
187          LOG.debug("Reached region limit for server: " + sn);
188          break;
189        }
190
191        Optional<MajorCompactionRequest> request = getMajorCompactionRequest(hri);
192        if (request.isPresent()) {
193          LOG.debug("Adding region " + hri + " to queue " + sn + " for compaction");
194          clusterCompactionQueues.addToCompactionQueue(sn, request.get());
195          if (numRegions > 0) {
196            regionsToCompact--;
197          }
198        }
199      }
200    }
201  }
202
203  protected Optional<MajorCompactionRequest> getMajorCompactionRequest(RegionInfo hri)
204    throws IOException {
205    return MajorCompactionRequest.newRequest(connection, hri, storesToCompact, timestamp);
206  }
207
208  private Collection<ServerName> getServersToCompact(Set<ServerName> snSet) {
209    if (numServers < 0 || snSet.size() <= numServers) {
210      return snSet;
211
212    } else {
213      List<ServerName> snList = Lists.newArrayList(snSet);
214      Collections.shuffle(snList);
215      return snList.subList(0, numServers);
216    }
217  }
218
219  private Map<ServerName, List<RegionInfo>> getServerRegionsMap() throws IOException {
220    Map<ServerName, List<RegionInfo>> snRegionMap = Maps.newHashMap();
221    List<HRegionLocation> regionLocations =
222      connection.getRegionLocator(tableName).getAllRegionLocations();
223    for (HRegionLocation regionLocation : regionLocations) {
224      ServerName sn = regionLocation.getServerName();
225      RegionInfo hri = regionLocation.getRegion();
226      if (!snRegionMap.containsKey(sn)) {
227        snRegionMap.put(sn, Lists.newArrayList());
228      }
229      snRegionMap.get(sn).add(hri);
230    }
231    return snRegionMap;
232  }
233
234  public void setNumServers(int numServers) {
235    this.numServers = numServers;
236  }
237
238  public void setNumRegions(int numRegions) {
239    this.numRegions = numRegions;
240  }
241
242  public void setSkipWait(boolean skipWait) {
243    this.skipWait = skipWait;
244  }
245
246  class Compact implements Runnable {
247
248    private final ServerName serverName;
249    private final MajorCompactionRequest request;
250
251    Compact(ServerName serverName, MajorCompactionRequest request) {
252      this.serverName = serverName;
253      this.request = request;
254    }
255
256    @Override
257    public void run() {
258      try {
259        compactAndWait(request);
260      } catch (NotServingRegionException e) {
261        // this region has split or merged
262        LOG.warn("Region is invalid, requesting updated regions", e);
263        // lets updated the cluster compaction queues with these newly created regions.
264        addNewRegions();
265      } catch (Exception e) {
266        LOG.warn("Error compacting:", e);
267      } finally {
268        clusterCompactionQueues.releaseCompaction(serverName);
269      }
270    }
271
272    void compactAndWait(MajorCompactionRequest request) throws Exception {
273      Admin admin = connection.getAdmin();
274      try {
275        // only make the request if the region is not already major compacting
276        if (!isCompacting(request)) {
277          Set<String> stores = getStoresRequiringCompaction(request);
278          if (!stores.isEmpty()) {
279            request.setStores(stores);
280            for (String store : request.getStores()) {
281              compactRegionOnServer(request, admin, store);
282            }
283          }
284        }
285
286        /*
287         * In some scenarios like compacting TTLed regions, the compaction itself won't take time
288         * and hence we can skip the wait. An external tool will also be triggered frequently and
289         * the next run can identify region movements and compact them.
290         */
291        if (!skipWait) {
292          while (isCompacting(request)) {
293            Thread.sleep(sleepForMs);
294            LOG.debug("Waiting for compaction to complete for region: "
295              + request.getRegion().getEncodedName());
296          }
297        }
298      } finally {
299        if (!skipWait) {
300          // Make sure to wait for the CompactedFileDischarger chore to do its work
301          int waitForArchive = connection.getConfiguration()
302            .getInt("hbase.hfile.compaction.discharger.interval", 2 * 60 * 1000);
303          Thread.sleep(waitForArchive);
304          // check if compaction completed successfully, otherwise put that request back in the
305          // proper queue
306          Set<String> storesRequiringCompaction = getStoresRequiringCompaction(request);
307          if (!storesRequiringCompaction.isEmpty()) {
308            // this happens, when a region server is marked as dead, flushes a store file and
309            // the new regionserver doesn't pick it up because its accounted for in the WAL replay,
310            // thus you have more store files on the filesystem than the regionserver knows about.
311            boolean regionHasNotMoved = connection.getRegionLocator(tableName)
312              .getRegionLocation(request.getRegion().getStartKey()).getServerName()
313              .equals(serverName);
314            if (regionHasNotMoved) {
315              LOG.error(
316                "Not all store files were compacted, this may be due to the regionserver not "
317                  + "being aware of all store files.  Will not reattempt compacting, " + request);
318              ERRORS.add(request);
319            } else {
320              request.setStores(storesRequiringCompaction);
321              clusterCompactionQueues.addToCompactionQueue(serverName, request);
322              LOG.info("Compaction failed for the following stores: " + storesRequiringCompaction
323                + " region: " + request.getRegion().getEncodedName());
324            }
325          } else {
326            LOG.info("Compaction complete for region: " + request.getRegion().getEncodedName()
327              + " -> cf(s): " + request.getStores());
328          }
329        }
330      }
331    }
332
333    private void compactRegionOnServer(MajorCompactionRequest request, Admin admin, String store)
334      throws IOException {
335      admin.majorCompactRegion(request.getRegion().getEncodedNameAsBytes(), Bytes.toBytes(store));
336    }
337  }
338
339  private boolean isCompacting(MajorCompactionRequest request) throws Exception {
340    CompactionState compactionState = connection.getAdmin()
341      .getCompactionStateForRegion(request.getRegion().getEncodedNameAsBytes());
342    return compactionState.equals(CompactionState.MAJOR)
343      || compactionState.equals(CompactionState.MAJOR_AND_MINOR);
344  }
345
346  private void addNewRegions() {
347    try {
348      List<HRegionLocation> locations =
349        connection.getRegionLocator(tableName).getAllRegionLocations();
350      for (HRegionLocation location : locations) {
351        if (location.getRegion().getRegionId() > timestamp) {
352          Optional<MajorCompactionRequest> compactionRequest = MajorCompactionRequest
353            .newRequest(connection, location.getRegion(), storesToCompact, timestamp);
354          compactionRequest.ifPresent(request -> clusterCompactionQueues
355            .addToCompactionQueue(location.getServerName(), request));
356        }
357      }
358    } catch (IOException e) {
359      throw new RuntimeException(e);
360    }
361  }
362
363  protected Set<String> getStoresRequiringCompaction(MajorCompactionRequest request)
364    throws IOException {
365    return request.getStoresRequiringCompaction(storesToCompact, timestamp);
366  }
367
368  protected Options getCommonOptions() {
369    Options options = new Options();
370
371    options.addOption(
372      Option.builder("servers").required().desc("Concurrent servers compacting").hasArg().build());
373    options.addOption(Option.builder("minModTime")
374      .desc("Compact if store files have modification time < minModTime").hasArg().build());
375    options.addOption(Option.builder("zk").optionalArg(true).desc("zk quorum").hasArg().build());
376    options.addOption(
377      Option.builder("rootDir").optionalArg(true).desc("hbase.rootDir").hasArg().build());
378    options.addOption(Option.builder("sleep")
379      .desc("Time to sleepForMs (ms) for checking compaction status per region and available "
380        + "work queues: default 30s")
381      .hasArg().build());
382    options.addOption(Option.builder("retries")
383      .desc("Max # of retries for a compaction request," + " defaults to 3").hasArg().build());
384    options.addOption(Option.builder("dryRun")
385      .desc("Dry run, will just output a list of regions that require compaction based on "
386        + "parameters passed")
387      .hasArg(false).build());
388
389    options.addOption(Option.builder("skipWait").desc("Skip waiting after triggering compaction.")
390      .hasArg(false).build());
391
392    options.addOption(Option.builder("numservers").optionalArg(true)
393      .desc("Number of servers to compact in this run, defaults to all").hasArg().build());
394
395    options.addOption(Option.builder("numregions").optionalArg(true)
396      .desc("Number of regions to compact per server, defaults to all").hasArg().build());
397    return options;
398  }
399
400  @Override
401  public int run(String[] args) throws Exception {
402    Options options = getCommonOptions();
403    options.addOption(Option.builder("table").required().desc("table name").hasArg().build());
404    options.addOption(Option.builder("cf").optionalArg(true)
405      .desc("column families: comma separated eg: a,b,c").hasArg().build());
406
407    final CommandLineParser cmdLineParser = new DefaultParser();
408    CommandLine commandLine = null;
409    try {
410      commandLine = cmdLineParser.parse(options, args);
411    } catch (ParseException parseException) {
412      System.out.println("ERROR: Unable to parse command-line arguments " + Arrays.toString(args)
413        + " due to: " + parseException);
414      printUsage(options);
415      return -1;
416    }
417    if (commandLine == null) {
418      System.out.println("ERROR: Failed parse, empty commandLine; " + Arrays.toString(args));
419      printUsage(options);
420      return -1;
421    }
422    String tableName = commandLine.getOptionValue("table");
423    String cf = commandLine.getOptionValue("cf", null);
424    Set<String> families = Sets.newHashSet();
425    if (cf != null) {
426      Iterables.addAll(families, Splitter.on(",").split(cf));
427    }
428
429    Configuration configuration = getConf();
430    int concurrency = Integer.parseInt(commandLine.getOptionValue("servers"));
431    long minModTime = Long.parseLong(commandLine.getOptionValue("minModTime",
432      String.valueOf(EnvironmentEdgeManager.currentTime())));
433    String quorum =
434      commandLine.getOptionValue("zk", configuration.get(HConstants.ZOOKEEPER_QUORUM));
435    String rootDir = commandLine.getOptionValue("rootDir", configuration.get(HConstants.HBASE_DIR));
436    long sleep = Long.parseLong(commandLine.getOptionValue("sleep", Long.toString(30000)));
437
438    int numServers = Integer.parseInt(commandLine.getOptionValue("numservers", "-1"));
439    int numRegions = Integer.parseInt(commandLine.getOptionValue("numregions", "-1"));
440
441    configuration.set(HConstants.HBASE_DIR, rootDir);
442    configuration.set(HConstants.ZOOKEEPER_QUORUM, quorum);
443
444    MajorCompactor compactor = new MajorCompactor(configuration, TableName.valueOf(tableName),
445      families, concurrency, minModTime, sleep);
446    compactor.setNumServers(numServers);
447    compactor.setNumRegions(numRegions);
448    compactor.setSkipWait(commandLine.hasOption("skipWait"));
449
450    compactor.initializeWorkQueues();
451    if (!commandLine.hasOption("dryRun")) {
452      compactor.compactAllRegions();
453    }
454    compactor.shutdown();
455    return ERRORS.size();
456  }
457
458  protected static void printUsage(final Options options) {
459    String header = "\nUsage instructions\n\n";
460    String footer = "\n";
461    HelpFormatter formatter = new HelpFormatter();
462    formatter.printHelp(MajorCompactor.class.getSimpleName(), header, options, footer, true);
463  }
464
465  public static void main(String[] args) throws Exception {
466    ToolRunner.run(HBaseConfiguration.create(), new MajorCompactor(), args);
467  }
468}