View Javadoc

1   /**
2    * Copyright 2008 The Apache Software Foundation
3    *
4    * Licensed to the Apache Software Foundation (ASF) under one
5    * or more contributor license agreements.  See the NOTICE file
6    * distributed with this work for additional information
7    * regarding copyright ownership.  The ASF licenses this file
8    * to you under the Apache License, Version 2.0 (the
9    * "License"); you may not use this file except in compliance
10   * with the License.  You may obtain a copy of the License at
11   *
12   *     http://www.apache.org/licenses/LICENSE-2.0
13   *
14   * Unless required by applicable law or agreed to in writing, software
15   * distributed under the License is distributed on an "AS IS" BASIS,
16   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17   * See the License for the specific language governing permissions and
18   * limitations under the License.
19   */
20  package org.apache.hadoop.hbase.master;
21  
22  import java.io.FileNotFoundException;
23  import java.io.IOException;
24  import java.util.Comparator;
25  import java.util.HashSet;
26  import java.util.Map;
27  import java.util.TreeMap;
28  import java.util.concurrent.atomic.AtomicInteger;
29  
30  import org.apache.commons.logging.Log;
31  import org.apache.commons.logging.LogFactory;
32  import org.apache.hadoop.fs.FileStatus;
33  import org.apache.hadoop.fs.FileSystem;
34  import org.apache.hadoop.fs.Path;
35  import org.apache.hadoop.fs.PathFilter;
36  import org.apache.hadoop.hbase.Chore;
37  import org.apache.hadoop.hbase.HColumnDescriptor;
38  import org.apache.hadoop.hbase.HConstants;
39  import org.apache.hadoop.hbase.HRegionInfo;
40  import org.apache.hadoop.hbase.HTableDescriptor;
41  import org.apache.hadoop.hbase.Server;
42  import org.apache.hadoop.hbase.backup.HFileArchiver;
43  import org.apache.hadoop.hbase.catalog.MetaEditor;
44  import org.apache.hadoop.hbase.client.MetaScanner;
45  import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
46  import org.apache.hadoop.hbase.client.Result;
47  import org.apache.hadoop.hbase.regionserver.Store;
48  import org.apache.hadoop.hbase.regionserver.StoreFile;
49  import org.apache.hadoop.hbase.util.Bytes;
50  import org.apache.hadoop.hbase.util.FSUtils;
51  import org.apache.hadoop.hbase.util.Pair;
52  import org.apache.hadoop.hbase.util.Writables;
53  
54  /**
55   * A janitor for the catalog tables.  Scans the <code>.META.</code> catalog
56   * table on a period looking for unused regions to garbage collect.
57   */
58  class CatalogJanitor extends Chore {
59    private static final Log LOG = LogFactory.getLog(CatalogJanitor.class.getName());
60    private final Server server;
61    private final MasterServices services;
62    private boolean enabled = true;
63  
64    CatalogJanitor(final Server server, final MasterServices services) {
65      super(server.getServerName() + "-CatalogJanitor",
66        server.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000),
67        server);
68      this.server = server;
69      this.services = services;
70    }
71  
72    @Override
73    protected boolean initialChore() {
74      try {
75        if (this.enabled) scan();
76      } catch (IOException e) {
77        LOG.warn("Failed initial scan of catalog table", e);
78        return false;
79      }
80      return true;
81    }
82  
83    /**
84     * @param enabled
85     */
86    public void setEnabled(final boolean enabled) {
87      this.enabled = enabled;
88    }
89  
90    @Override
91    protected void chore() {
92      try {
93        scan();
94      } catch (IOException e) {
95        LOG.warn("Failed scan of catalog table", e);
96      }
97    }
98  
99    /**
100    * Scans META and returns a number of scanned rows, and
101    * an ordered map of split parents.
102    */
103   Pair<Integer, Map<HRegionInfo, Result>> getSplitParents() throws IOException {
104     // TODO: Only works with single .META. region currently.  Fix.
105     final AtomicInteger count = new AtomicInteger(0);
106     // Keep Map of found split parents.  There are candidates for cleanup.
107     // Use a comparator that has split parents come before its daughters.
108     final Map<HRegionInfo, Result> splitParents =
109       new TreeMap<HRegionInfo, Result>(new SplitParentFirstComparator());
110     // This visitor collects split parents and counts rows in the .META. table
111 
112     MetaScannerVisitor visitor = new MetaScanner.BlockingMetaScannerVisitor(server.getConfiguration()) {
113       @Override
114       public boolean processRowInternal(Result r) throws IOException {
115         if (r == null || r.isEmpty()) return true;
116         count.incrementAndGet();
117         HRegionInfo info = getHRegionInfo(r);
118         if (info == null) return true; // Keep scanning
119         if (info.isSplitParent()) splitParents.put(info, r);
120         // Returning true means "keep scanning"
121         return true;
122       }
123     };
124 
125     // Run full scan of .META. catalog table passing in our custom visitor
126     MetaScanner.metaScan(server.getConfiguration(), visitor);
127 
128     return new Pair<Integer, Map<HRegionInfo, Result>>(count.get(), splitParents);
129   }
130 
131   /**
132    * Run janitorial scan of catalog <code>.META.</code> table looking for
133    * garbage to collect.
134    * @throws IOException
135    */
136   int scan() throws IOException {
137     Pair<Integer, Map<HRegionInfo, Result>> pair = getSplitParents();
138     int count = pair.getFirst();
139     Map<HRegionInfo, Result> splitParents = pair.getSecond();
140 
141     // Now work on our list of found parents. See if any we can clean up.
142     int cleaned = 0;
143     HashSet<String> parentNotCleaned = new HashSet<String>(); //regions whose parents are still around
144     for (Map.Entry<HRegionInfo, Result> e : splitParents.entrySet()) {
145       if (!parentNotCleaned.contains(e.getKey().getEncodedName()) && cleanParent(e.getKey(), e.getValue())) {
146         cleaned++;
147       } else {
148         // We could not clean the parent, so it's daughters should not be cleaned either (HBASE-6160)
149         parentNotCleaned.add(getDaughterRegionInfo(
150               e.getValue(), HConstants.SPLITA_QUALIFIER).getEncodedName());
151         parentNotCleaned.add(getDaughterRegionInfo(
152               e.getValue(), HConstants.SPLITB_QUALIFIER).getEncodedName());
153       }
154     }
155     if (cleaned != 0) {
156       LOG.info("Scanned " + count + " catalog row(s) and gc'd " + cleaned +
157         " unreferenced parent region(s)");
158     } else if (LOG.isDebugEnabled()) {
159       LOG.debug("Scanned " + count + " catalog row(s) and gc'd " + cleaned +
160       " unreferenced parent region(s)");
161     }
162     return cleaned;
163   }
164 
165   /**
166    * Compare HRegionInfos in a way that has split parents sort BEFORE their
167    * daughters.
168    */
169   static class SplitParentFirstComparator implements Comparator<HRegionInfo> {
170     Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();
171     @Override
172     public int compare(HRegionInfo left, HRegionInfo right) {
173       // This comparator differs from the one HRegionInfo in that it sorts
174       // parent before daughters.
175       if (left == null) return -1;
176       if (right == null) return 1;
177       // Same table name.
178       int result = Bytes.compareTo(left.getTableName(),
179           right.getTableName());
180       if (result != 0) return result;
181       // Compare start keys.
182       result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
183       if (result != 0) return result;
184       // Compare end keys.
185       result = rowEndKeyComparator.compare(left.getEndKey(), right.getEndKey());
186 
187       return -result; // Flip the result so parent comes first.
188     }
189   }
190 
191   /**
192    * Get HRegionInfo from passed Map of row values.
193    * @param result Map to do lookup in.
194    * @return Null if not found (and logs fact that expected COL_REGIONINFO
195    * was missing) else deserialized {@link HRegionInfo}
196    * @throws IOException
197    */
198   static HRegionInfo getHRegionInfo(final Result result)
199   throws IOException {
200     byte [] bytes =
201       result.getValue(HConstants.CATALOG_FAMILY, HConstants.REGIONINFO_QUALIFIER);
202     if (bytes == null) {
203       LOG.warn("REGIONINFO_QUALIFIER is empty in " + result);
204       return null;
205     }
206     return Writables.getHRegionInfo(bytes);
207   }
208 
209   /**
210    * If daughters no longer hold reference to the parents, delete the parent.
211    * @param server HRegionInterface of meta server to talk to
212    * @param parent HRegionInfo of split offlined parent
213    * @param rowContent Content of <code>parent</code> row in
214    * <code>metaRegionName</code>
215    * @return True if we removed <code>parent</code> from meta table and from
216    * the filesystem.
217    * @throws IOException
218    */
219   boolean cleanParent(final HRegionInfo parent, Result rowContent)
220   throws IOException {
221     boolean result = false;
222     // Run checks on each daughter split.
223     HRegionInfo a_region = getDaughterRegionInfo(rowContent, HConstants.SPLITA_QUALIFIER);
224     HRegionInfo b_region = getDaughterRegionInfo(rowContent, HConstants.SPLITB_QUALIFIER);
225     Pair<Boolean, Boolean> a =
226       checkDaughterInFs(parent, a_region, HConstants.SPLITA_QUALIFIER);
227     Pair<Boolean, Boolean> b =
228       checkDaughterInFs(parent, b_region, HConstants.SPLITB_QUALIFIER);
229     if (hasNoReferences(a) && hasNoReferences(b)) {
230       LOG.debug("Deleting region " + parent.getRegionNameAsString() +
231         " because daughter splits no longer hold references");
232 
233       // This latter regionOffline should not be necessary but is done for now
234       // until we let go of regionserver to master heartbeats.  See HBASE-3368.
235       if (this.services.getAssignmentManager() != null) {
236         // The mock used in testing catalogjanitor returns null for getAssignmnetManager.
237         // Allow for null result out of getAssignmentManager.
238         this.services.getAssignmentManager().regionOffline(parent);
239       }
240       FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
241       HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, parent);
242       MetaEditor.deleteRegion(this.server.getCatalogTracker(), parent);
243       result = true;
244     }
245     return result;
246   }
247 
248   /**
249    * @param p A pair where the first boolean says whether or not the daughter
250    * region directory exists in the filesystem and then the second boolean says
251    * whether the daughter has references to the parent.
252    * @return True the passed <code>p</code> signifies no references.
253    */
254   private boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
255     return !p.getFirst() || !p.getSecond();
256   }
257 
258   /**
259    * Get daughter HRegionInfo out of parent info:splitA/info:splitB columns.
260    * @param result
261    * @param which Whether "info:splitA" or "info:splitB" column
262    * @return Deserialized content of the info:splitA or info:splitB as a
263    * HRegionInfo
264    * @throws IOException
265    */
266   private HRegionInfo getDaughterRegionInfo(final Result result,
267     final byte [] which)
268   throws IOException {
269     byte [] bytes = result.getValue(HConstants.CATALOG_FAMILY, which);
270     return Writables.getHRegionInfoOrNull(bytes);
271   }
272 
273   /**
274    * Checks if a daughter region -- either splitA or splitB -- still holds
275    * references to parent.
276    * @param parent Parent region name.
277    * @param split Which column family.
278    * @param qualifier Which of the daughters to look at, splitA or splitB.
279    * @return A pair where the first boolean says whether or not the daughter
280    * region directory exists in the filesystem and then the second boolean says
281    * whether the daughter has references to the parent.
282    * @throws IOException
283    */
284   Pair<Boolean, Boolean> checkDaughterInFs(final HRegionInfo parent,
285     final HRegionInfo split,
286     final byte [] qualifier)
287   throws IOException {
288     boolean references = false;
289     boolean exists = false;
290     if (split == null)  {
291       return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
292     }
293     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
294     Path rootdir = this.services.getMasterFileSystem().getRootDir();
295     Path tabledir = new Path(rootdir, split.getTableNameAsString());
296     Path regiondir = new Path(tabledir, split.getEncodedName());
297     exists = fs.exists(regiondir);
298     if (!exists) {
299       LOG.warn("Daughter regiondir does not exist: " + regiondir.toString());
300       return new Pair<Boolean, Boolean>(exists, Boolean.FALSE);
301     }
302     HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTableName());
303 
304     for (HColumnDescriptor family: parentDescriptor.getFamilies()) {
305       Path p = Store.getStoreHomedir(tabledir, split.getEncodedName(),
306         family.getName());
307       if (!fs.exists(p)) continue;
308       // Look for reference files.  Call listStatus with anonymous instance of PathFilter.
309       FileStatus [] ps = FSUtils.listStatus(fs, p,
310           new PathFilter () {
311             public boolean accept(Path path) {
312               return StoreFile.isReference(path);
313             }
314           }
315       );
316 
317       if (ps != null && ps.length > 0) {
318         references = true;
319         break;
320       }
321     }
322     return new Pair<Boolean, Boolean>(Boolean.valueOf(exists),
323       Boolean.valueOf(references));
324   }
325 
326   private HTableDescriptor getTableDescriptor(byte[] tableName)
327   throws FileNotFoundException, IOException {
328     return this.services.getTableDescriptors().get(Bytes.toString(tableName));
329   }
330 }