1   /**
2    *
3    * Licensed to the Apache Software Foundation (ASF) under one
4    * or more contributor license agreements.  See the NOTICE file
5    * distributed with this work for additional information
6    * regarding copyright ownership.  The ASF licenses this file
7    * to you under the Apache License, Version 2.0 (the
8    * "License"); you may not use this file except in compliance
9    * with the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package org.apache.hadoop.hbase.master;
20  
21  import java.io.FileNotFoundException;
22  import java.io.IOException;
23  import java.util.Comparator;
24  import java.util.HashSet;
25  import java.util.Map;
26  import java.util.TreeMap;
27  import java.util.concurrent.atomic.AtomicBoolean;
28  import java.util.concurrent.atomic.AtomicInteger;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.classification.InterfaceAudience;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.hbase.Chore;
36  import org.apache.hadoop.hbase.HColumnDescriptor;
37  import org.apache.hadoop.hbase.HConstants;
38  import org.apache.hadoop.hbase.HRegionInfo;
39  import org.apache.hadoop.hbase.HTableDescriptor;
40  import org.apache.hadoop.hbase.Server;
41  import org.apache.hadoop.hbase.backup.HFileArchiver;
42  import org.apache.hadoop.hbase.catalog.MetaEditor;
43  import org.apache.hadoop.hbase.catalog.MetaReader;
44  import org.apache.hadoop.hbase.client.MetaScanner;
45  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
46  import org.apache.hadoop.hbase.client.Result;
47  import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
48  import org.apache.hadoop.hbase.util.Bytes;
49  import org.apache.hadoop.hbase.util.Pair;
50  import org.apache.hadoop.hbase.util.PairOfSameType;
51  import org.apache.hadoop.hbase.util.Triple;
52  
53  /**
54   * A janitor for the catalog tables.  Scans the <code>.META.</code> catalog
55   * table on a period looking for unused regions to garbage collect.
56   */
57  @InterfaceAudience.Private
58  public class CatalogJanitor extends Chore {
59    private static final Log LOG = LogFactory.getLog(CatalogJanitor.class.getName());
60    private final Server server;
61    private final MasterServices services;
62    private AtomicBoolean enabled = new AtomicBoolean(true);
63    private AtomicBoolean alreadyRunning = new AtomicBoolean(false);
64  
65    CatalogJanitor(final Server server, final MasterServices services) {
66      super(server.getServerName() + "-CatalogJanitor",
67        server.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000),
68        server);
69      this.server = server;
70      this.services = services;
71    }
72  
73    @Override
74    protected boolean initialChore() {
75      try {
76        if (this.enabled.get()) scan();
77      } catch (IOException e) {
78        LOG.warn("Failed initial scan of catalog table", e);
79        return false;
80      }
81      return true;
82    }
83  
84    /**
85     * @param enabled
86     */
87    public boolean setEnabled(final boolean enabled) {
88      return this.enabled.getAndSet(enabled);
89    }
90  
91    boolean getEnabled() {
92      return this.enabled.get();
93    }
94  
95    @Override
96    protected void chore() {
97      try {
98        if (this.enabled.get()) {
99          scan();
100       } else {
101         LOG.warn("CatalogJanitor disabled! Not running scan.");
102       }
103     } catch (IOException e) {
104       LOG.warn("Failed scan of catalog table", e);
105     }
106   }
107 
108   /**
109    * Scans META and returns a number of scanned rows, and a map of merged
110    * regions, and an ordered map of split parents.
111    * @return triple of scanned rows, map of merged regions and map of split
112    *         parent regioninfos
113    * @throws IOException
114    */
115   Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> getMergedRegionsAndSplitParents()
116       throws IOException {
117     return getMergedRegionsAndSplitParents(null);
118   }
119 
120   /**
121    * Scans META and returns a number of scanned rows, and a map of merged
122    * regions, and an ordered map of split parents. if the given table name is
123    * null, return merged regions and split parents of all tables, else only the
124    * specified table
125    * @param tableName null represents all tables
126    * @return triple of scanned rows, and map of merged regions, and map of split
127    *         parent regioninfos
128    * @throws IOException
129    */
130   Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> getMergedRegionsAndSplitParents(
131       final byte[] tableName) throws IOException {
132     final boolean isTableSpecified = (tableName != null && tableName.length != 0);
133     // TODO: Only works with single .META. region currently.  Fix.
134     final AtomicInteger count = new AtomicInteger(0);
135     // Keep Map of found split parents.  There are candidates for cleanup.
136     // Use a comparator that has split parents come before its daughters.
137     final Map<HRegionInfo, Result> splitParents =
138       new TreeMap<HRegionInfo, Result>(new SplitParentFirstComparator());
139     final Map<HRegionInfo, Result> mergedRegions = new TreeMap<HRegionInfo, Result>();
140     // This visitor collects split parents and counts rows in the .META. table
141 
142     MetaScannerVisitor visitor = new MetaScanner.MetaScannerVisitorBase() {
143       @Override
144       public boolean processRow(Result r) throws IOException {
145         if (r == null || r.isEmpty()) return true;
146         count.incrementAndGet();
147         HRegionInfo info = HRegionInfo.getHRegionInfo(r);
148         if (info == null) return true; // Keep scanning
149         if (isTableSpecified
150             && Bytes.compareTo(info.getTableName(), tableName) > 0) {
151           // Another table, stop scanning
152           return false;
153         }
154         if (info.isSplitParent()) splitParents.put(info, r);
155         if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) {
156           mergedRegions.put(info, r);
157         }
158         // Returning true means "keep scanning"
159         return true;
160       }
161     };
162 
163     // Run full scan of .META. catalog table passing in our custom visitor with
164     // the start row
165     MetaScanner.metaScan(server.getConfiguration(), visitor, tableName);
166 
167     return new Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>>(
168         count.get(), mergedRegions, splitParents);
169   }
170 
171   /**
172    * If merged region no longer holds reference to the merge regions, archive
173    * merge region on hdfs and perform deleting references in .META.
174    * @param mergedRegion
175    * @param regionA
176    * @param regionB
177    * @return true if we delete references in merged region on .META. and archive
178    *         the files on the file system
179    * @throws IOException
180    */
181   boolean cleanMergeRegion(final HRegionInfo mergedRegion,
182       final HRegionInfo regionA, final HRegionInfo regionB) throws IOException {
183     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
184     Path rootdir = this.services.getMasterFileSystem().getRootDir();
185     Path tabledir = HTableDescriptor.getTableDir(rootdir,
186         mergedRegion.getTableName());
187     HTableDescriptor htd = getTableDescriptor(mergedRegion
188         .getTableNameAsString());
189     HRegionFileSystem regionFs = null;
190     try {
191       regionFs = HRegionFileSystem.openRegionFromFileSystem(
192           this.services.getConfiguration(), fs, tabledir, mergedRegion, true);
193     } catch (IOException e) {
194       LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
195     }
196     if (regionFs == null || !regionFs.hasReferences(htd)) {
197       LOG.debug("Deleting region " + regionA.getRegionNameAsString() + " and "
198           + regionB.getRegionNameAsString()
199           + " from fs because merged region no longer holds references");
200       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionA);
201       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionB);
202       MetaEditor.deleteMergeQualifiers(server.getCatalogTracker(), mergedRegion);
203       return true;
204     }
205     return false;
206   }
207 
208   /**
209    * Run janitorial scan of catalog <code>.META.</code> table looking for
210    * garbage to collect.
211    * @return number of cleaned regions
212    * @throws IOException
213    */
214   int scan() throws IOException {
215     try {
216       if (!alreadyRunning.compareAndSet(false, true)) {
217         return 0;
218       }
219       Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> scanTriple =
220         getMergedRegionsAndSplitParents();
221       int count = scanTriple.getFirst();
222       /**
223        * clean merge regions first
224        */
225       int mergeCleaned = 0;
226       Map<HRegionInfo, Result> mergedRegions = scanTriple.getSecond();
227       for (Map.Entry<HRegionInfo, Result> e : mergedRegions.entrySet()) {
228         HRegionInfo regionA = HRegionInfo.getHRegionInfo(e.getValue(),
229             HConstants.MERGEA_QUALIFIER);
230         HRegionInfo regionB = HRegionInfo.getHRegionInfo(e.getValue(),
231             HConstants.MERGEB_QUALIFIER);
232         if (regionA == null || regionB == null) {
233           LOG.warn("Unexpected references regionA="
234               + (regionA == null ? "null" : regionA.getRegionNameAsString())
235               + ",regionB="
236               + (regionB == null ? "null" : regionB.getRegionNameAsString())
237               + " in merged region " + e.getKey().getRegionNameAsString());
238         } else {
239           if (cleanMergeRegion(e.getKey(), regionA, regionB)) {
240             mergeCleaned++;
241           }
242         }
243       }
244       /**
245        * clean split parents
246        */
247       Map<HRegionInfo, Result> splitParents = scanTriple.getThird();
248 
249       // Now work on our list of found parents. See if any we can clean up.
250       int splitCleaned = 0;
251       // regions whose parents are still around
252       HashSet<String> parentNotCleaned = new HashSet<String>();
253       for (Map.Entry<HRegionInfo, Result> e : splitParents.entrySet()) {
254         if (!parentNotCleaned.contains(e.getKey().getEncodedName()) &&
255             cleanParent(e.getKey(), e.getValue())) {
256           splitCleaned++;
257         } else {
258           // We could not clean the parent, so it's daughters should not be cleaned either (HBASE-6160)
259           PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(e.getValue());
260           parentNotCleaned.add(daughters.getFirst().getEncodedName());
261           parentNotCleaned.add(daughters.getSecond().getEncodedName());
262         }
263       }
264       if ((mergeCleaned + splitCleaned) != 0) {
265         LOG.info("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
266             + " unreferenced merged region(s) and " + splitCleaned
267             + " unreferenced parent region(s)");
268       } else if (LOG.isDebugEnabled()) {
269         LOG.debug("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
270             + " unreferenced merged region(s) and " + splitCleaned
271             + " unreferenced parent region(s)");
272       }
273       return mergeCleaned + splitCleaned;
274     } finally {
275       alreadyRunning.set(false);
276     }
277   }
278 
279   /**
280    * Compare HRegionInfos in a way that has split parents sort BEFORE their
281    * daughters.
282    */
283   static class SplitParentFirstComparator implements Comparator<HRegionInfo> {
284     Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
285     @Override
286     public int compare(HRegionInfo left, HRegionInfo right) {
287       // This comparator differs from the one HRegionInfo in that it sorts
288       // parent before daughters.
289       if (left == null) return -1;
290       if (right == null) return 1;
291       // Same table name.
292       int result = Bytes.compareTo(left.getTableName(),
293           right.getTableName());
294       if (result != 0) return result;
295       // Compare start keys.
296       result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
297       if (result != 0) return result;
298       // Compare end keys.
299       result = rowEndKeyComparator.compare(left.getEndKey(), right.getEndKey());
300 
301       return -result; // Flip the result so parent comes first.
302     }
303   }
304 
305   /**
306    * If daughters no longer hold reference to the parents, delete the parent.
307    * @param parent HRegionInfo of split offlined parent
308    * @param rowContent Content of <code>parent</code> row in
309    * <code>metaRegionName</code>
310    * @return True if we removed <code>parent</code> from meta table and from
311    * the filesystem.
312    * @throws IOException
313    */
314   boolean cleanParent(final HRegionInfo parent, Result rowContent)
315   throws IOException {
316     boolean result = false;
317     // Check whether it is a merged region and not clean reference
318     // No necessary to check MERGEB_QUALIFIER because these two qualifiers will
319     // be inserted/deleted together
320     if (rowContent.getValue(HConstants.CATALOG_FAMILY,
321         HConstants.MERGEA_QUALIFIER) != null) {
322       // wait cleaning merge region first
323       return result;
324     }
325     // Run checks on each daughter split.
326     PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(rowContent);
327     Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst());
328     Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond());
329     if (hasNoReferences(a) && hasNoReferences(b)) {
330       LOG.debug("Deleting region " + parent.getRegionNameAsString() +
331         " because daughter splits no longer hold references");
332 
333       // This latter regionOffline should not be necessary but is done for now
334       // until we let go of regionserver to master heartbeats.  See HBASE-3368.
335       if (this.services.getAssignmentManager() != null) {
336         // The mock used in testing catalogjanitor returns null for getAssignmnetManager.
337         // Allow for null result out of getAssignmentManager.
338         this.services.getAssignmentManager().regionOffline(parent);
339       }
340       FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
341       LOG.debug("Archiving parent region:" + parent);
342       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, parent);
343       MetaEditor.deleteRegion(this.server.getCatalogTracker(), parent);
344       result = true;
345     }
346     return result;
347   }
348 
349   /**
350    * @param p A pair where the first boolean says whether or not the daughter
351    * region directory exists in the filesystem and then the second boolean says
352    * whether the daughter has references to the parent.
353    * @return True the passed <code>p</code> signifies no references.
354    */
355   private boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
356     return !p.getFirst() || !p.getSecond();
357   }
358 
359   /**
360    * Checks if a daughter region -- either splitA or splitB -- still holds
361    * references to parent.
362    * @param parent Parent region
363    * @param daughter Daughter region
364    * @return A pair where the first boolean says whether or not the daughter
365    * region directory exists in the filesystem and then the second boolean says
366    * whether the daughter has references to the parent.
367    * @throws IOException
368    */
369   Pair<Boolean, Boolean> checkDaughterInFs(final HRegionInfo parent, final HRegionInfo daughter)
370   throws IOException {
371     if (daughter == null)  {
372       return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
373     }
374 
375     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
376     Path rootdir = this.services.getMasterFileSystem().getRootDir();
377     Path tabledir = HTableDescriptor.getTableDir(rootdir, daughter.getTableName());
378 
379     HRegionFileSystem regionFs = null;
380     try {
381       regionFs = HRegionFileSystem.openRegionFromFileSystem(
382           this.services.getConfiguration(), fs, tabledir, daughter, true);
383     } catch (IOException e) {
384       LOG.warn("Daughter region does not exist: " + daughter.getEncodedName());
385       return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
386     }
387 
388     boolean references = false;
389     HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTableNameAsString());
390     for (HColumnDescriptor family: parentDescriptor.getFamilies()) {
391       if ((references = regionFs.hasReferences(family.getNameAsString()))) {
392         break;
393       }
394     }
395     return new Pair<Boolean, Boolean>(Boolean.TRUE, Boolean.valueOf(references));
396   }
397 
398   private HTableDescriptor getTableDescriptor(final String tableName)
399       throws FileNotFoundException, IOException {
400     return this.services.getTableDescriptors().get(tableName);
401   }
402 
403   /**
404    * Checks if the specified region has merge qualifiers, if so, try to clean
405    * them
406    * @param region
407    * @return true if the specified region doesn't have merge qualifier now
408    * @throws IOException
409    */
410   public boolean cleanMergeQualifier(final HRegionInfo region)
411       throws IOException {
412     // Get merge regions if it is a merged region and already has merge
413     // qualifier
414     Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
415         .getRegionsFromMergeQualifier(this.services.getCatalogTracker(),
416             region.getRegionName());
417     if (mergeRegions == null
418         || (mergeRegions.getFirst() == null && mergeRegions.getSecond() == null)) {
419       // It doesn't have merge qualifier, no need to clean
420       return true;
421     }
422     // It shouldn't happen, we must insert/delete these two qualifiers together
423     if (mergeRegions.getFirst() == null || mergeRegions.getSecond() == null) {
424       LOG.error("Merged region " + region.getRegionNameAsString()
425           + " has only one merge qualifier in META.");
426       return false;
427     }
428     return cleanMergeRegion(region, mergeRegions.getFirst(),
429         mergeRegions.getSecond());
430   }
431 }