001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.region;
019
020import static org.apache.hadoop.hbase.HConstants.HREGION_LOGDIR_NAME;
021
022import com.google.errorprone.annotations.RestrictedApi;
023import java.io.IOException;
024import java.util.List;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileStatus;
027import org.apache.hadoop.fs.FileSystem;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.hbase.HBaseIOException;
030import org.apache.hadoop.hbase.Server;
031import org.apache.hadoop.hbase.TableName;
032import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
033import org.apache.hadoop.hbase.client.Get;
034import org.apache.hadoop.hbase.client.RegionInfo;
035import org.apache.hadoop.hbase.client.RegionInfoBuilder;
036import org.apache.hadoop.hbase.client.Result;
037import org.apache.hadoop.hbase.client.ResultScanner;
038import org.apache.hadoop.hbase.client.Scan;
039import org.apache.hadoop.hbase.client.TableDescriptor;
040import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
041import org.apache.hadoop.hbase.regionserver.HRegion;
042import org.apache.hadoop.hbase.regionserver.HRegion.FlushResult;
043import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
044import org.apache.hadoop.hbase.regionserver.RegionScanner;
045import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
046import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
047import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
048import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.CommonFSUtils;
051import org.apache.hadoop.hbase.util.FSTableDescriptors;
052import org.apache.hadoop.hbase.util.FSUtils;
053import org.apache.hadoop.hbase.util.HFileArchiveUtil;
054import org.apache.hadoop.hbase.util.RecoverLeaseFSUtils;
055import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
056import org.apache.hadoop.hbase.wal.WAL;
057import org.apache.hadoop.hbase.wal.WALFactory;
058import org.apache.yetus.audience.InterfaceAudience;
059import org.slf4j.Logger;
060import org.slf4j.LoggerFactory;
061
062import org.apache.hbase.thirdparty.com.google.common.math.IntMath;
063
064/**
065 * A region that stores data in a separated directory, which can be used to store master local data.
066 * <p/>
067 * FileSystem layout:
068 *
069 * <pre>
070 * hbase
071 *   |
072 *   --&lt;region dir&gt;
073 *       |
074 *       --data
075 *       |  |
076 *       |  --/&lt;ns&gt/&lt;table&gt/&lt;encoded-region-name&gt; <---- The region data
077 *       |      |
078 *       |      --replay <---- The edits to replay
079 *       |
080 *       --WALs
081 *          |
082 *          --&lt;master-server-name&gt; <---- The WAL dir for active master
083 *          |
084 *          --&lt;master-server-name&gt;-dead <---- The WAL dir for dead master
085 * </pre>
086 *
087 * Notice that, you can use different root file system and WAL file system. Then the above directory
088 * will be on two file systems, the root file system will have the data directory while the WAL
089 * filesystem will have the WALs directory. The archived HFile will be moved to the global HFile
090 * archived directory with the {@link MasterRegionParams#archivedHFileSuffix()} suffix. The archived
091 * WAL will be moved to the global WAL archived directory with the
092 * {@link MasterRegionParams#archivedWalSuffix()} suffix.
093 */
094@InterfaceAudience.Private
095public final class MasterRegion {
096
097  private static final Logger LOG = LoggerFactory.getLogger(MasterRegion.class);
098
099  private static final String REPLAY_EDITS_DIR = "recovered.wals";
100
101  private static final String DEAD_WAL_DIR_SUFFIX = "-dead";
102
103  static final String INITIALIZING_FLAG = ".initializing";
104
105  static final String INITIALIZED_FLAG = ".initialized";
106
107  private static final int REGION_ID = 1;
108
109  private final WALFactory walFactory;
110
111  final HRegion region;
112
113  final MasterRegionFlusherAndCompactor flusherAndCompactor;
114
115  private MasterRegionWALRoller walRoller;
116
117  private MasterRegion(HRegion region, WALFactory walFactory,
118    MasterRegionFlusherAndCompactor flusherAndCompactor, MasterRegionWALRoller walRoller) {
119    this.region = region;
120    this.walFactory = walFactory;
121    this.flusherAndCompactor = flusherAndCompactor;
122    this.walRoller = walRoller;
123  }
124
125  private void closeRegion(boolean abort) {
126    try {
127      region.close(abort);
128    } catch (IOException e) {
129      LOG.warn("Failed to close region", e);
130    }
131  }
132
133  private void shutdownWAL() {
134    try {
135      walFactory.shutdown();
136    } catch (IOException e) {
137      LOG.warn("Failed to shutdown WAL", e);
138    }
139  }
140
141  public void update(UpdateMasterRegion action) throws IOException {
142    action.update(region);
143    flusherAndCompactor.onUpdate();
144  }
145
146  public Result get(Get get) throws IOException {
147    return region.get(get);
148  }
149
150  public ResultScanner getScanner(Scan scan) throws IOException {
151    return new RegionScannerAsResultScanner(region.getScanner(scan));
152  }
153
154  public RegionScanner getRegionScanner(Scan scan) throws IOException {
155    return region.getScanner(scan);
156  }
157
158  public FlushResult flush(boolean force) throws IOException {
159    flusherAndCompactor.resetChangesAfterLastFlush();
160    FlushResult flushResult = region.flush(force);
161    flusherAndCompactor.recordLastFlushTime();
162    return flushResult;
163  }
164
165  @RestrictedApi(explanation = "Should only be called in tests", link = "",
166      allowedOnPath = ".*/src/test/.*")
167  public void requestRollAll() {
168    walRoller.requestRollAll();
169  }
170
171  @RestrictedApi(explanation = "Should only be called in tests", link = "",
172      allowedOnPath = ".*/src/test/.*")
173  public void waitUntilWalRollFinished() throws InterruptedException {
174    walRoller.waitUntilWalRollFinished();
175  }
176
177  public void close(boolean abort) {
178    LOG.info("Closing local region {}, isAbort={}", region.getRegionInfo(), abort);
179    if (flusherAndCompactor != null) {
180      flusherAndCompactor.close();
181    }
182    // if abort, we shutdown wal first to fail the ongoing updates to the region, and then close the
183    // region, otherwise there will be dead lock.
184    if (abort) {
185      shutdownWAL();
186      closeRegion(true);
187    } else {
188      closeRegion(false);
189      shutdownWAL();
190    }
191
192    if (walRoller != null) {
193      walRoller.close();
194    }
195  }
196
197  private static WAL createWAL(WALFactory walFactory, MasterRegionWALRoller walRoller,
198    String serverName, FileSystem walFs, Path walRootDir, RegionInfo regionInfo)
199    throws IOException {
200    String logName = AbstractFSWALProvider.getWALDirectoryName(serverName);
201    Path walDir = new Path(walRootDir, logName);
202    LOG.debug("WALDir={}", walDir);
203    if (walFs.exists(walDir)) {
204      throw new HBaseIOException(
205        "Already created wal directory at " + walDir + " for local region " + regionInfo);
206    }
207    if (!walFs.mkdirs(walDir)) {
208      throw new IOException(
209        "Can not create wal directory " + walDir + " for local region " + regionInfo);
210    }
211    WAL wal = walFactory.getWAL(regionInfo);
212    walRoller.addWAL(wal);
213    return wal;
214  }
215
216  private static HRegion bootstrap(Configuration conf, TableDescriptor td, FileSystem fs,
217    Path rootDir, FileSystem walFs, Path walRootDir, WALFactory walFactory,
218    MasterRegionWALRoller walRoller, String serverName, boolean touchInitializingFlag)
219    throws IOException {
220    TableName tn = td.getTableName();
221    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tn).setRegionId(REGION_ID).build();
222    Path tableDir = CommonFSUtils.getTableDir(rootDir, tn);
223    // persist table descriptor
224    FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, td, true);
225    HRegion.createHRegion(conf, regionInfo, fs, tableDir, td).close();
226    Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG);
227    if (!fs.mkdirs(initializedFlag)) {
228      throw new IOException("Can not touch initialized flag: " + initializedFlag);
229    }
230    Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG);
231    if (!fs.delete(initializingFlag, true)) {
232      LOG.warn("failed to clean up initializing flag: " + initializingFlag);
233    }
234    WAL wal = createWAL(walFactory, walRoller, serverName, walFs, walRootDir, regionInfo);
235    return HRegion.openHRegionFromTableDir(conf, fs, tableDir, regionInfo, td, wal, null, null);
236  }
237
238  private static RegionInfo loadRegionInfo(FileSystem fs, Path tableDir) throws IOException {
239    Path regionDir =
240      fs.listStatus(tableDir, p -> RegionInfo.isEncodedRegionName(Bytes.toBytes(p.getName())))[0]
241        .getPath();
242    return HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
243  }
244
245  private static HRegion open(Configuration conf, TableDescriptor td, RegionInfo regionInfo,
246    FileSystem fs, Path rootDir, FileSystem walFs, Path walRootDir, WALFactory walFactory,
247    MasterRegionWALRoller walRoller, String serverName) throws IOException {
248    Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName());
249    Path walRegionDir = FSUtils.getRegionDirFromRootDir(walRootDir, regionInfo);
250    Path replayEditsDir = new Path(walRegionDir, REPLAY_EDITS_DIR);
251    if (!walFs.exists(replayEditsDir) && !walFs.mkdirs(replayEditsDir)) {
252      throw new IOException("Failed to create replay directory: " + replayEditsDir);
253    }
254
255    // Replay any WALs for the Master Region before opening it.
256    Path walsDir = new Path(walRootDir, HREGION_LOGDIR_NAME);
257    // In open(...), we expect that the WAL directory for the MasterRegion to already exist.
258    // This is in contrast to bootstrap() where we create the MasterRegion data and WAL dir.
259    // However, it's possible that users directly remove the WAL directory. We expect walsDir
260    // to always exist in normal situations, but we should guard against users changing the
261    // filesystem outside of HBase's line of sight.
262    if (walFs.exists(walsDir)) {
263      replayWALs(conf, walFs, walRootDir, walsDir, regionInfo, serverName, replayEditsDir);
264    } else {
265      LOG.error(
266        "UNEXPECTED: WAL directory for MasterRegion is missing." + " {} is unexpectedly missing.",
267        walsDir);
268    }
269
270    // Create a new WAL
271    WAL wal = createWAL(walFactory, walRoller, serverName, walFs, walRootDir, regionInfo);
272    conf.set(HRegion.SPECIAL_RECOVERED_EDITS_DIR,
273      replayEditsDir.makeQualified(walFs.getUri(), walFs.getWorkingDirectory()).toString());
274    // we do not do WAL splitting here so it is possible to have uncleanly closed WAL files, so we
275    // need to ignore EOFException.
276    conf.setBoolean(HRegion.RECOVERED_EDITS_IGNORE_EOF, true);
277    return HRegion.openHRegionFromTableDir(conf, fs, tableDir, regionInfo, td, wal, null, null);
278  }
279
280  private static void replayWALs(Configuration conf, FileSystem walFs, Path walRootDir,
281    Path walsDir, RegionInfo regionInfo, String serverName, Path replayEditsDir)
282    throws IOException {
283    for (FileStatus walDir : walFs.listStatus(walsDir)) {
284      if (!walDir.isDirectory()) {
285        continue;
286      }
287      if (walDir.getPath().getName().startsWith(serverName)) {
288        LOG.warn("This should not happen in real production as we have not created our WAL "
289          + "directory yet, ignore if you are running a local region related UT");
290      }
291      Path deadWALDir;
292      if (!walDir.getPath().getName().endsWith(DEAD_WAL_DIR_SUFFIX)) {
293        deadWALDir =
294          new Path(walDir.getPath().getParent(), walDir.getPath().getName() + DEAD_WAL_DIR_SUFFIX);
295        if (!walFs.rename(walDir.getPath(), deadWALDir)) {
296          throw new IOException("Can not rename " + walDir + " to " + deadWALDir
297            + " when recovering lease of proc store");
298        }
299        LOG.info("Renamed {} to {} as it is dead", walDir.getPath(), deadWALDir);
300      } else {
301        deadWALDir = walDir.getPath();
302        LOG.info("{} is already marked as dead", deadWALDir);
303      }
304      for (FileStatus walFile : walFs.listStatus(deadWALDir)) {
305        Path replayEditsFile = new Path(replayEditsDir, walFile.getPath().getName());
306        RecoverLeaseFSUtils.recoverFileLease(walFs, walFile.getPath(), conf);
307        if (!walFs.rename(walFile.getPath(), replayEditsFile)) {
308          throw new IOException("Can not rename " + walFile.getPath() + " to " + replayEditsFile
309            + " when recovering lease for local region");
310        }
311        LOG.info("Renamed {} to {}", walFile.getPath(), replayEditsFile);
312      }
313      LOG.info("Delete empty local region wal dir {}", deadWALDir);
314      walFs.delete(deadWALDir, true);
315    }
316  }
317
318  private static void tryMigrate(Configuration conf, FileSystem fs, Path tableDir,
319    RegionInfo regionInfo, TableDescriptor oldTd, TableDescriptor newTd) throws IOException {
320    Class<? extends StoreFileTracker> oldSft =
321      StoreFileTrackerFactory.getTrackerClass(oldTd.getValue(StoreFileTrackerFactory.TRACKER_IMPL));
322    Class<? extends StoreFileTracker> newSft =
323      StoreFileTrackerFactory.getTrackerClass(newTd.getValue(StoreFileTrackerFactory.TRACKER_IMPL));
324    if (oldSft.equals(newSft)) {
325      LOG.debug("old store file tracker {} is the same with new store file tracker, skip migration",
326        StoreFileTrackerFactory.getStoreFileTrackerName(oldSft));
327      if (!oldTd.equals(newTd)) {
328        // we may change other things such as adding a new family, so here we still need to persist
329        // the new table descriptor
330        LOG.info("Update table descriptor from {} to {}", oldTd, newTd);
331        FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, newTd, true);
332      }
333      return;
334    }
335    LOG.info("Migrate store file tracker from {} to {}", oldSft.getSimpleName(),
336      newSft.getSimpleName());
337    HRegionFileSystem hfs =
338      HRegionFileSystem.openRegionFromFileSystem(conf, fs, tableDir, regionInfo, false);
339    for (ColumnFamilyDescriptor oldCfd : oldTd.getColumnFamilies()) {
340      StoreFileTracker oldTracker = StoreFileTrackerFactory.create(conf, oldTd, oldCfd, hfs);
341      StoreFileTracker newTracker = StoreFileTrackerFactory.create(conf, oldTd, oldCfd, hfs);
342      List<StoreFileInfo> files = oldTracker.load();
343      LOG.debug("Store file list for {}: {}", oldCfd.getNameAsString(), files);
344      newTracker.set(oldTracker.load());
345    }
346    // persist the new table descriptor after migration
347    LOG.info("Update table descriptor from {} to {}", oldTd, newTd);
348    FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, newTd, true);
349  }
350
351  public static MasterRegion create(MasterRegionParams params) throws IOException {
352    TableDescriptor td = params.tableDescriptor();
353    LOG.info("Create or load local region for table " + td);
354    Server server = params.server();
355    Configuration baseConf = server.getConfiguration();
356    FileSystem fs = CommonFSUtils.getRootDirFileSystem(baseConf);
357    FileSystem walFs = CommonFSUtils.getWALFileSystem(baseConf);
358    Path globalRootDir = CommonFSUtils.getRootDir(baseConf);
359    Path globalWALRootDir = CommonFSUtils.getWALRootDir(baseConf);
360    Path rootDir = new Path(globalRootDir, params.regionDirName());
361    Path walRootDir = new Path(globalWALRootDir, params.regionDirName());
362    // we will override some configurations so create a new one.
363    Configuration conf = new Configuration(baseConf);
364    CommonFSUtils.setRootDir(conf, rootDir);
365    CommonFSUtils.setWALRootDir(conf, walRootDir);
366    MasterRegionFlusherAndCompactor.setupConf(conf, params.flushSize(), params.flushPerChanges(),
367      params.flushIntervalMs());
368    conf.setInt(AbstractFSWAL.MAX_LOGS, params.maxWals());
369    if (params.useHsync() != null) {
370      conf.setBoolean(HRegion.WAL_HSYNC_CONF_KEY, params.useHsync());
371    }
372    if (params.useMetaCellComparator() != null) {
373      conf.setBoolean(HRegion.USE_META_CELL_COMPARATOR, params.useMetaCellComparator());
374    }
375    conf.setInt(AbstractFSWAL.RING_BUFFER_SLOT_COUNT,
376      IntMath.ceilingPowerOfTwo(params.ringBufferSlotCount()));
377
378    MasterRegionWALRoller walRoller = MasterRegionWALRoller.create(
379      td.getTableName() + "-WAL-Roller", conf, server, walFs, walRootDir, globalWALRootDir,
380      params.archivedWalSuffix(), params.rollPeriodMs(), params.flushSize());
381    walRoller.start();
382
383    WALFactory walFactory = new WALFactory(conf, server.getServerName(), server);
384    Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName());
385    Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG);
386    Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG);
387    HRegion region;
388    if (!fs.exists(tableDir)) {
389      // bootstrap, no doubt
390      if (!fs.mkdirs(initializedFlag)) {
391        throw new IOException("Can not touch initialized flag");
392      }
393      region = bootstrap(conf, td, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
394        server.getServerName().toString(), true);
395    } else {
396      if (!fs.exists(initializedFlag)) {
397        if (!fs.exists(initializingFlag)) {
398          // should be old style, where we do not have the initializing or initialized file, persist
399          // the table descriptor, touch the initialized flag and then open the region.
400          // the store file tracker must be DEFAULT
401          LOG.info("No {} or {} file, try upgrading", INITIALIZING_FLAG, INITIALIZED_FLAG);
402          TableDescriptor oldTd =
403            TableDescriptorBuilder.newBuilder(td).setValue(StoreFileTrackerFactory.TRACKER_IMPL,
404              StoreFileTrackerFactory.Trackers.DEFAULT.name()).build();
405          FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, oldTd, true);
406          if (!fs.mkdirs(initializedFlag)) {
407            throw new IOException("Can not touch initialized flag: " + initializedFlag);
408          }
409          RegionInfo regionInfo = loadRegionInfo(fs, tableDir);
410          tryMigrate(conf, fs, tableDir, regionInfo, oldTd, td);
411          region = open(conf, td, regionInfo, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
412            server.getServerName().toString());
413        } else {
414          // delete all contents besides the initializing flag, here we can make sure tableDir
415          // exists(unless someone delete it manually...), so we do not do null check here.
416          for (FileStatus status : fs.listStatus(tableDir)) {
417            if (!status.getPath().getName().equals(INITIALIZING_FLAG)) {
418              fs.delete(status.getPath(), true);
419            }
420          }
421          region = bootstrap(conf, td, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
422            server.getServerName().toString(), false);
423        }
424      } else {
425        if (fs.exists(initializingFlag) && !fs.delete(initializingFlag, true)) {
426          LOG.warn("failed to clean up initializing flag: " + initializingFlag);
427        }
428        // open it, make sure to load the table descriptor from fs
429        TableDescriptor oldTd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
430        RegionInfo regionInfo = loadRegionInfo(fs, tableDir);
431        tryMigrate(conf, fs, tableDir, regionInfo, oldTd, td);
432        region = open(conf, td, regionInfo, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
433          server.getServerName().toString());
434      }
435    }
436
437    Path globalArchiveDir = HFileArchiveUtil.getArchivePath(baseConf);
438    MasterRegionFlusherAndCompactor flusherAndCompactor = new MasterRegionFlusherAndCompactor(conf,
439      server, region, params.flushSize(), params.flushPerChanges(), params.flushIntervalMs(),
440      params.compactMin(), globalArchiveDir, params.archivedHFileSuffix());
441    walRoller.setFlusherAndCompactor(flusherAndCompactor);
442    Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
443    if (!fs.mkdirs(archiveDir)) {
444      LOG.warn("Failed to create archive directory {}. Usually this should not happen but it will"
445        + " be created again when we actually archive the hfiles later, so continue", archiveDir);
446    }
447    return new MasterRegion(region, walFactory, flusherAndCompactor, walRoller);
448  }
449}