View Javadoc

1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.HashSet;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.atomic.AtomicBoolean;
28  import java.util.concurrent.atomic.AtomicInteger;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.fs.FileSystem;
33  import org.apache.hadoop.fs.Path;
34  import org.apache.hadoop.hbase.Chore;
35  import org.apache.hadoop.hbase.HColumnDescriptor;
36  import org.apache.hadoop.hbase.HConstants;
37  import org.apache.hadoop.hbase.HRegionInfo;
38  import org.apache.hadoop.hbase.HTableDescriptor;
39  import org.apache.hadoop.hbase.MetaTableAccessor;
40  import org.apache.hadoop.hbase.Server;
41  import org.apache.hadoop.hbase.TableName;
42  import org.apache.hadoop.hbase.backup.HFileArchiver;
43  import org.apache.hadoop.hbase.classification.InterfaceAudience;
44  import org.apache.hadoop.hbase.client.Connection;
45  import org.apache.hadoop.hbase.client.MetaScanner;
46  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
47  import org.apache.hadoop.hbase.client.Result;
48  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.FSUtils;
51  import org.apache.hadoop.hbase.util.Pair;
52  import org.apache.hadoop.hbase.util.PairOfSameType;
53  import org.apache.hadoop.hbase.util.Triple;
54  
55  /**
56   * A janitor for the catalog tables.  Scans the <code>hbase:meta</code> catalog
57   * table on a period looking for unused regions to garbage collect.
58   */
59  @InterfaceAudience.Private
60  public class CatalogJanitor extends Chore {
61    private static final Log LOG = LogFactory.getLog(CatalogJanitor.class.getName());
62    private final Server server;
63    private final MasterServices services;
64    private AtomicBoolean enabled = new AtomicBoolean(true);
65    private AtomicBoolean alreadyRunning = new AtomicBoolean(false);
66    private final Connection connection;
67  
68    CatalogJanitor(final Server server, final MasterServices services) {
69      super("CatalogJanitor-" + server.getServerName().toShortString(),
70        server.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000),
71        server);
72      this.server = server;
73      this.services = services;
74      this.connection = server.getConnection();
75    }
76  
77    @Override
78    protected boolean initialChore() {
79      try {
80        if (this.enabled.get()) scan();
81      } catch (IOException e) {
82        LOG.warn("Failed initial scan of catalog table", e);
83        return false;
84      }
85      return true;
86    }
87  
88    /**
89     * @param enabled
90     */
91    public boolean setEnabled(final boolean enabled) {
92      return this.enabled.getAndSet(enabled);
93    }
94  
95    boolean getEnabled() {
96      return this.enabled.get();
97    }
98  
99    @Override
100   protected void chore() {
101     try {
102       if (this.enabled.get()) {
103         scan();
104       } else {
105         LOG.warn("CatalogJanitor disabled! Not running scan.");
106       }
107     } catch (IOException e) {
108       LOG.warn("Failed scan of catalog table", e);
109     }
110   }
111 
112   /**
113    * Scans hbase:meta and returns a number of scanned rows, and a map of merged
114    * regions, and an ordered map of split parents.
115    * @return triple of scanned rows, map of merged regions and map of split
116    *         parent regioninfos
117    * @throws IOException
118    */
119   Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> getMergedRegionsAndSplitParents()
120       throws IOException {
121     return getMergedRegionsAndSplitParents(null);
122   }
123 
124   /**
125    * Scans hbase:meta and returns a number of scanned rows, and a map of merged
126    * regions, and an ordered map of split parents. if the given table name is
127    * null, return merged regions and split parents of all tables, else only the
128    * specified table
129    * @param tableName null represents all tables
130    * @return triple of scanned rows, and map of merged regions, and map of split
131    *         parent regioninfos
132    * @throws IOException
133    */
134   Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> getMergedRegionsAndSplitParents(
135       final TableName tableName) throws IOException {
136     final boolean isTableSpecified = (tableName != null);
137     // TODO: Only works with single hbase:meta region currently.  Fix.
138     final AtomicInteger count = new AtomicInteger(0);
139     // Keep Map of found split parents.  There are candidates for cleanup.
140     // Use a comparator that has split parents come before its daughters.
141     final Map<HRegionInfo, Result> splitParents =
142       new TreeMap<HRegionInfo, Result>(new SplitParentFirstComparator());
143     final Map<HRegionInfo, Result> mergedRegions = new TreeMap<HRegionInfo, Result>();
144     // This visitor collects split parents and counts rows in the hbase:meta table
145 
146     MetaScannerVisitor visitor = new MetaScanner.MetaScannerVisitorBase() {
147       @Override
148       public boolean processRow(Result r) throws IOException {
149         if (r == null || r.isEmpty()) return true;
150         count.incrementAndGet();
151         HRegionInfo info = HRegionInfo.getHRegionInfo(r);
152         if (info == null) return true; // Keep scanning
153         if (isTableSpecified
154             && info.getTable().compareTo(tableName) > 0) {
155           // Another table, stop scanning
156           return false;
157         }
158         if (info.isSplitParent()) splitParents.put(info, r);
159         if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) {
160           mergedRegions.put(info, r);
161         }
162         // Returning true means "keep scanning"
163         return true;
164       }
165     };
166 
167     // Run full scan of hbase:meta catalog table passing in our custom visitor with
168     // the start row
169     MetaScanner.metaScan(server.getConfiguration(), this.connection, visitor, tableName);
170 
171     return new Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>>(
172         count.get(), mergedRegions, splitParents);
173   }
174 
175   /**
176    * If merged region no longer holds reference to the merge regions, archive
177    * merge region on hdfs and perform deleting references in hbase:meta
178    * @param mergedRegion
179    * @param regionA
180    * @param regionB
181    * @return true if we delete references in merged region on hbase:meta and archive
182    *         the files on the file system
183    * @throws IOException
184    */
185   boolean cleanMergeRegion(final HRegionInfo mergedRegion,
186       final HRegionInfo regionA, final HRegionInfo regionB) throws IOException {
187     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
188     Path rootdir = this.services.getMasterFileSystem().getRootDir();
189     Path tabledir = FSUtils.getTableDir(rootdir, mergedRegion.getTable());
190     HTableDescriptor htd = getTableDescriptor(mergedRegion.getTable());
191     HRegionFileSystem regionFs = null;
192     try {
193       regionFs = HRegionFileSystem.openRegionFromFileSystem(
194           this.services.getConfiguration(), fs, tabledir, mergedRegion, true);
195     } catch (IOException e) {
196       LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
197     }
198     if (regionFs == null || !regionFs.hasReferences(htd)) {
199       LOG.debug("Deleting region " + regionA.getRegionNameAsString() + " and "
200           + regionB.getRegionNameAsString()
201           + " from fs because merged region no longer holds references");
202       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionA);
203       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionB);
204       MetaTableAccessor.deleteMergeQualifiers(server.getConnection(),
205         mergedRegion);
206       return true;
207     }
208     return false;
209   }
210 
211   /**
212    * Run janitorial scan of catalog <code>hbase:meta</code> table looking for
213    * garbage to collect.
214    * @return number of cleaned regions
215    * @throws IOException
216    */
217   int scan() throws IOException {
218     try {
219       if (!alreadyRunning.compareAndSet(false, true)) {
220         return 0;
221       }
222       Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> scanTriple =
223         getMergedRegionsAndSplitParents();
224       int count = scanTriple.getFirst();
225       /**
226        * clean merge regions first
227        */
228       int mergeCleaned = 0;
229       Map<HRegionInfo, Result> mergedRegions = scanTriple.getSecond();
230       for (Map.Entry<HRegionInfo, Result> e : mergedRegions.entrySet()) {
231         HRegionInfo regionA = HRegionInfo.getHRegionInfo(e.getValue(),
232             HConstants.MERGEA_QUALIFIER);
233         HRegionInfo regionB = HRegionInfo.getHRegionInfo(e.getValue(),
234             HConstants.MERGEB_QUALIFIER);
235         if (regionA == null || regionB == null) {
236           LOG.warn("Unexpected references regionA="
237               + (regionA == null ? "null" : regionA.getRegionNameAsString())
238               + ",regionB="
239               + (regionB == null ? "null" : regionB.getRegionNameAsString())
240               + " in merged region " + e.getKey().getRegionNameAsString());
241         } else {
242           if (cleanMergeRegion(e.getKey(), regionA, regionB)) {
243             mergeCleaned++;
244           }
245         }
246       }
247       /**
248        * clean split parents
249        */
250       Map<HRegionInfo, Result> splitParents = scanTriple.getThird();
251 
252       // Now work on our list of found parents. See if any we can clean up.
253       int splitCleaned = 0;
254       // regions whose parents are still around
255       HashSet<String> parentNotCleaned = new HashSet<String>();
256       for (Map.Entry<HRegionInfo, Result> e : splitParents.entrySet()) {
257         if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
258             cleanParent(e.getKey(), e.getValue())) {
259           splitCleaned++;
260         } else {
261           // We could not clean the parent, so it's daughters should not be cleaned either (HBASE-6160)
262           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(e.getValue());
263           parentNotCleaned.add(daughters.getFirst().getEncodedName());
264           parentNotCleaned.add(daughters.getSecond().getEncodedName());
265         }
266       }
267       if ((mergeCleaned + splitCleaned) != 0) {
268         LOG.info("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
269             + " unreferenced merged region(s) and " + splitCleaned
270             + " unreferenced parent region(s)");
271       } else if (LOG.isTraceEnabled()) {
272         LOG.trace("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
273             + " unreferenced merged region(s) and " + splitCleaned
274             + " unreferenced parent region(s)");
275       }
276       return mergeCleaned + splitCleaned;
277     } finally {
278       alreadyRunning.set(false);
279     }
280   }
281 
282   /**
283    * Compare HRegionInfos in a way that has split parents sort BEFORE their
284    * daughters.
285    */
286   static class SplitParentFirstComparator implements Comparator<HRegionInfo> {
287     Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
288     @Override
289     public int compare(HRegionInfo left, HRegionInfo right) {
290       // This comparator differs from the one HRegionInfo in that it sorts
291       // parent before daughters.
292       if (left == null) return -1;
293       if (right == null) return 1;
294       // Same table name.
295       int result = left.getTable().compareTo(right.getTable());
296       if (result != 0) return result;
297       // Compare start keys.
298       result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
299       if (result != 0) return result;
300       // Compare end keys, but flip the operands so parent comes first
301       result = rowEndKeyComparator.compare(right.getEndKey(), left.getEndKey());
302 
303       return result;
304     }
305   }
306 
307   /**
308    * If daughters no longer hold reference to the parents, delete the parent.
309    * @param parent HRegionInfo of split offlined parent
310    * @param rowContent Content of <code>parent</code> row in
311    * <code>metaRegionName</code>
312    * @return True if we removed <code>parent</code> from meta table and from
313    * the filesystem.
314    * @throws IOException
315    */
316   boolean cleanParent(final HRegionInfo parent, Result rowContent)
317   throws IOException {
318     boolean result = false;
319     // Check whether it is a merged region and not clean reference
320     // No necessary to check MERGEB_QUALIFIER because these two qualifiers will
321     // be inserted/deleted together
322     if (rowContent.getValue(HConstants.CATALOG_FAMILY,
323         HConstants.MERGEA_QUALIFIER) != null) {
324       // wait cleaning merge region first
325       return result;
326     }
327     // Run checks on each daughter split.
328     PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(rowContent);
329     Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst());
330     Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond());
331     if (hasNoReferences(a) && hasNoReferences(b)) {
332       LOG.debug("Deleting region " + parent.getRegionNameAsString() +
333         " because daughter splits no longer hold references");
334       FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
335       if (LOG.isTraceEnabled()) LOG.trace("Archiving parent region: " + parent);
336       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, parent);
337       MetaTableAccessor.deleteRegion(this.connection, parent);
338       result = true;
339     }
340     return result;
341   }
342 
343   /**
344    * @param p A pair where the first boolean says whether or not the daughter
345    * region directory exists in the filesystem and then the second boolean says
346    * whether the daughter has references to the parent.
347    * @return True the passed <code>p</code> signifies no references.
348    */
349   private boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
350     return !p.getFirst() || !p.getSecond();
351   }
352 
353   /**
354    * Checks if a daughter region -- either splitA or splitB -- still holds
355    * references to parent.
356    * @param parent Parent region
357    * @param daughter Daughter region
358    * @return A pair where the first boolean says whether or not the daughter
359    * region directory exists in the filesystem and then the second boolean says
360    * whether the daughter has references to the parent.
361    * @throws IOException
362    */
363   Pair<Boolean, Boolean> checkDaughterInFs(final HRegionInfo parent, final HRegionInfo daughter)
364   throws IOException {
365     if (daughter == null)  {
366       return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
367     }
368 
369     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
370     Path rootdir = this.services.getMasterFileSystem().getRootDir();
371     Path tabledir = FSUtils.getTableDir(rootdir, daughter.getTable());
372 
373     HRegionFileSystem regionFs = null;
374     try {
375       regionFs = HRegionFileSystem.openRegionFromFileSystem(
376           this.services.getConfiguration(), fs, tabledir, daughter, true);
377     } catch (IOException e) {
378       LOG.warn("Daughter region does not exist: " + daughter.getEncodedName()
379         + ", parent is: " + parent.getEncodedName());
380       return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
381     }
382 
383     boolean references = false;
384     HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTable());
385     for (HColumnDescriptor family: parentDescriptor.getFamilies()) {
386       if ((references = regionFs.hasReferences(family.getNameAsString()))) {
387         break;
388       }
389     }
390     return new Pair<Boolean, Boolean>(Boolean.TRUE, Boolean.valueOf(references));
391   }
392 
393   private HTableDescriptor getTableDescriptor(final TableName tableName)
394       throws FileNotFoundException, IOException {
395     return this.services.getTableDescriptors().get(tableName);
396   }
397 
398   /**
399    * Checks if the specified region has merge qualifiers, if so, try to clean
400    * them
401    * @param region
402    * @return true if the specified region doesn't have merge qualifier now
403    * @throws IOException
404    */
405   public boolean cleanMergeQualifier(final HRegionInfo region)
406       throws IOException {
407     // Get merge regions if it is a merged region and already has merge
408     // qualifier
409     Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaTableAccessor
410         .getRegionsFromMergeQualifier(this.services.getConnection(),
411           region.getRegionName());
412     if (mergeRegions == null
413         || (mergeRegions.getFirst() == null && mergeRegions.getSecond() == null)) {
414       // It doesn't have merge qualifier, no need to clean
415       return true;
416     }
417     // It shouldn't happen, we must insert/delete these two qualifiers together
418     if (mergeRegions.getFirst() == null || mergeRegions.getSecond() == null) {
419       LOG.error("Merged region " + region.getRegionNameAsString()
420           + " has only one merge qualifier in META.");
421       return false;
422     }
423     return cleanMergeRegion(region, mergeRegions.getFirst(),
424         mergeRegions.getSecond());
425   }
426 }