001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.region;
019
020import static org.apache.hadoop.hbase.HConstants.HREGION_LOGDIR_NAME;
021
022import com.google.errorprone.annotations.RestrictedApi;
023import java.io.IOException;
024import java.util.List;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileStatus;
027import org.apache.hadoop.fs.FileSystem;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.hbase.HBaseIOException;
030import org.apache.hadoop.hbase.Server;
031import org.apache.hadoop.hbase.TableName;
032import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
033import org.apache.hadoop.hbase.client.Get;
034import org.apache.hadoop.hbase.client.RegionInfo;
035import org.apache.hadoop.hbase.client.RegionInfoBuilder;
036import org.apache.hadoop.hbase.client.Result;
037import org.apache.hadoop.hbase.client.ResultScanner;
038import org.apache.hadoop.hbase.client.Scan;
039import org.apache.hadoop.hbase.client.TableDescriptor;
040import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
041import org.apache.hadoop.hbase.regionserver.HRegion;
042import org.apache.hadoop.hbase.regionserver.HRegion.FlushResult;
043import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
044import org.apache.hadoop.hbase.regionserver.RegionScanner;
045import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
046import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
047import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
048import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.CommonFSUtils;
051import org.apache.hadoop.hbase.util.FSTableDescriptors;
052import org.apache.hadoop.hbase.util.FSUtils;
053import org.apache.hadoop.hbase.util.HFileArchiveUtil;
054import org.apache.hadoop.hbase.util.RecoverLeaseFSUtils;
055import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
056import org.apache.hadoop.hbase.wal.WAL;
057import org.apache.hadoop.hbase.wal.WALFactory;
058import org.apache.yetus.audience.InterfaceAudience;
059import org.slf4j.Logger;
060import org.slf4j.LoggerFactory;
061
062import org.apache.hbase.thirdparty.com.google.common.math.IntMath;
063
064/**
065 * A region that stores data in a separated directory, which can be used to store master local data.
066 * <p/>
067 * FileSystem layout:
068 *
069 * <pre>
070 * hbase
071 *   |
072 *   --&lt;region dir&gt;
073 *       |
074 *       --data
075 *       |  |
076 *       |  --/&lt;ns&gt/&lt;table&gt/&lt;encoded-region-name&gt; <---- The region data
077 *       |      |
078 *       |      --replay <---- The edits to replay
079 *       |
080 *       --WALs
081 *          |
082 *          --&lt;master-server-name&gt; <---- The WAL dir for active master
083 *          |
084 *          --&lt;master-server-name&gt;-dead <---- The WAL dir for dead master
085 * </pre>
086 *
087 * Notice that, you can use different root file system and WAL file system. Then the above directory
088 * will be on two file systems, the root file system will have the data directory while the WAL
089 * filesystem will have the WALs directory. The archived HFile will be moved to the global HFile
090 * archived directory with the {@link MasterRegionParams#archivedHFileSuffix()} suffix. The archived
091 * WAL will be moved to the global WAL archived directory with the
092 * {@link MasterRegionParams#archivedWalSuffix()} suffix.
093 */
094@InterfaceAudience.Private
095public final class MasterRegion {
096
097  private static final Logger LOG = LoggerFactory.getLogger(MasterRegion.class);
098
099  private static final String REPLAY_EDITS_DIR = "recovered.wals";
100
101  private static final String DEAD_WAL_DIR_SUFFIX = "-dead";
102
103  static final String INITIALIZING_FLAG = ".initializing";
104
105  static final String INITIALIZED_FLAG = ".initialized";
106
107  private static final int REGION_ID = 1;
108
109  private final WALFactory walFactory;
110
111  final HRegion region;
112
113  final MasterRegionFlusherAndCompactor flusherAndCompactor;
114
115  private MasterRegionWALRoller walRoller;
116
117  private MasterRegion(HRegion region, WALFactory walFactory,
118    MasterRegionFlusherAndCompactor flusherAndCompactor, MasterRegionWALRoller walRoller) {
119    this.region = region;
120    this.walFactory = walFactory;
121    this.flusherAndCompactor = flusherAndCompactor;
122    this.walRoller = walRoller;
123  }
124
125  private void closeRegion(boolean abort) {
126    try {
127      region.close(abort);
128    } catch (IOException e) {
129      LOG.warn("Failed to close region", e);
130    }
131  }
132
133  private void shutdownWAL() {
134    try {
135      walFactory.shutdown();
136    } catch (IOException e) {
137      LOG.warn("Failed to shutdown WAL", e);
138    }
139  }
140
141  public void update(UpdateMasterRegion action) throws IOException {
142    action.update(region);
143    flusherAndCompactor.onUpdate();
144  }
145
146  /**
147   * The design for master region is to only load all the data to memory at once when starting, so
148   * typically you should not use the get method to get a single row of data at runtime.
149   */
150  @RestrictedApi(explanation = "Should only be called in tests", link = "",
151      allowedOnPath = ".*/src/test/.*")
152  public Result get(Get get) throws IOException {
153    return region.get(get);
154  }
155
156  public ResultScanner getScanner(Scan scan) throws IOException {
157    return new RegionScannerAsResultScanner(region.getScanner(scan));
158  }
159
160  public RegionScanner getRegionScanner(Scan scan) throws IOException {
161    return region.getScanner(scan);
162  }
163
164  public FlushResult flush(boolean force) throws IOException {
165    flusherAndCompactor.resetChangesAfterLastFlush();
166    FlushResult flushResult = region.flush(force);
167    flusherAndCompactor.recordLastFlushTime();
168    return flushResult;
169  }
170
171  @RestrictedApi(explanation = "Should only be called in tests", link = "",
172      allowedOnPath = ".*/src/test/.*")
173  public void requestRollAll() {
174    walRoller.requestRollAll();
175  }
176
177  @RestrictedApi(explanation = "Should only be called in tests", link = "",
178      allowedOnPath = ".*/src/test/.*")
179  public void waitUntilWalRollFinished() throws InterruptedException {
180    walRoller.waitUntilWalRollFinished();
181  }
182
183  public void close(boolean abort) {
184    LOG.info("Closing local region {}, isAbort={}", region.getRegionInfo(), abort);
185    if (flusherAndCompactor != null) {
186      flusherAndCompactor.close();
187    }
188    // if abort, we shutdown wal first to fail the ongoing updates to the region, and then close the
189    // region, otherwise there will be dead lock.
190    if (abort) {
191      shutdownWAL();
192      closeRegion(true);
193    } else {
194      closeRegion(false);
195      shutdownWAL();
196    }
197
198    if (walRoller != null) {
199      walRoller.close();
200    }
201  }
202
203  private static WAL createWAL(WALFactory walFactory, MasterRegionWALRoller walRoller,
204    String serverName, FileSystem walFs, Path walRootDir, RegionInfo regionInfo)
205    throws IOException {
206    String logName = AbstractFSWALProvider.getWALDirectoryName(serverName);
207    Path walDir = new Path(walRootDir, logName);
208    LOG.debug("WALDir={}", walDir);
209    if (walFs.exists(walDir)) {
210      throw new HBaseIOException(
211        "Already created wal directory at " + walDir + " for local region " + regionInfo);
212    }
213    if (!walFs.mkdirs(walDir)) {
214      throw new IOException(
215        "Can not create wal directory " + walDir + " for local region " + regionInfo);
216    }
217    WAL wal = walFactory.getWAL(regionInfo);
218    walRoller.addWAL(wal);
219    return wal;
220  }
221
222  private static HRegion bootstrap(Configuration conf, TableDescriptor td, FileSystem fs,
223    Path rootDir, FileSystem walFs, Path walRootDir, WALFactory walFactory,
224    MasterRegionWALRoller walRoller, String serverName, boolean touchInitializingFlag)
225    throws IOException {
226    TableName tn = td.getTableName();
227    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tn).setRegionId(REGION_ID).build();
228    Path tableDir = CommonFSUtils.getTableDir(rootDir, tn);
229    // persist table descriptor
230    FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, td, true);
231    HRegion.createHRegion(conf, regionInfo, fs, tableDir, td).close();
232    Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG);
233    if (!fs.mkdirs(initializedFlag)) {
234      throw new IOException("Can not touch initialized flag: " + initializedFlag);
235    }
236    Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG);
237    if (!fs.delete(initializingFlag, true)) {
238      LOG.warn("failed to clean up initializing flag: " + initializingFlag);
239    }
240    WAL wal = createWAL(walFactory, walRoller, serverName, walFs, walRootDir, regionInfo);
241    return HRegion.openHRegionFromTableDir(conf, fs, tableDir, regionInfo, td, wal, null, null);
242  }
243
244  private static RegionInfo loadRegionInfo(FileSystem fs, Path tableDir) throws IOException {
245    Path regionDir =
246      fs.listStatus(tableDir, p -> RegionInfo.isEncodedRegionName(Bytes.toBytes(p.getName())))[0]
247        .getPath();
248    return HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
249  }
250
251  private static HRegion open(Configuration conf, TableDescriptor td, RegionInfo regionInfo,
252    FileSystem fs, Path rootDir, FileSystem walFs, Path walRootDir, WALFactory walFactory,
253    MasterRegionWALRoller walRoller, String serverName) throws IOException {
254    Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName());
255    Path walRegionDir = FSUtils.getRegionDirFromRootDir(walRootDir, regionInfo);
256    Path replayEditsDir = new Path(walRegionDir, REPLAY_EDITS_DIR);
257    if (!walFs.exists(replayEditsDir) && !walFs.mkdirs(replayEditsDir)) {
258      throw new IOException("Failed to create replay directory: " + replayEditsDir);
259    }
260
261    // Replay any WALs for the Master Region before opening it.
262    Path walsDir = new Path(walRootDir, HREGION_LOGDIR_NAME);
263    // In open(...), we expect that the WAL directory for the MasterRegion to already exist.
264    // This is in contrast to bootstrap() where we create the MasterRegion data and WAL dir.
265    // However, it's possible that users directly remove the WAL directory. We expect walsDir
266    // to always exist in normal situations, but we should guard against users changing the
267    // filesystem outside of HBase's line of sight.
268    if (walFs.exists(walsDir)) {
269      replayWALs(conf, walFs, walRootDir, walsDir, regionInfo, serverName, replayEditsDir);
270    } else {
271      LOG.error(
272        "UNEXPECTED: WAL directory for MasterRegion is missing." + " {} is unexpectedly missing.",
273        walsDir);
274    }
275
276    // Create a new WAL
277    WAL wal = createWAL(walFactory, walRoller, serverName, walFs, walRootDir, regionInfo);
278    conf.set(HRegion.SPECIAL_RECOVERED_EDITS_DIR,
279      replayEditsDir.makeQualified(walFs.getUri(), walFs.getWorkingDirectory()).toString());
280    return HRegion.openHRegionFromTableDir(conf, fs, tableDir, regionInfo, td, wal, null, null);
281  }
282
283  private static void replayWALs(Configuration conf, FileSystem walFs, Path walRootDir,
284    Path walsDir, RegionInfo regionInfo, String serverName, Path replayEditsDir)
285    throws IOException {
286    for (FileStatus walDir : walFs.listStatus(walsDir)) {
287      if (!walDir.isDirectory()) {
288        continue;
289      }
290      if (walDir.getPath().getName().startsWith(serverName)) {
291        LOG.warn("This should not happen in real production as we have not created our WAL "
292          + "directory yet, ignore if you are running a local region related UT");
293      }
294      Path deadWALDir;
295      if (!walDir.getPath().getName().endsWith(DEAD_WAL_DIR_SUFFIX)) {
296        deadWALDir =
297          new Path(walDir.getPath().getParent(), walDir.getPath().getName() + DEAD_WAL_DIR_SUFFIX);
298        if (!walFs.rename(walDir.getPath(), deadWALDir)) {
299          throw new IOException("Can not rename " + walDir + " to " + deadWALDir
300            + " when recovering lease of proc store");
301        }
302        LOG.info("Renamed {} to {} as it is dead", walDir.getPath(), deadWALDir);
303      } else {
304        deadWALDir = walDir.getPath();
305        LOG.info("{} is already marked as dead", deadWALDir);
306      }
307      for (FileStatus walFile : walFs.listStatus(deadWALDir)) {
308        Path replayEditsFile = new Path(replayEditsDir, walFile.getPath().getName());
309        RecoverLeaseFSUtils.recoverFileLease(walFs, walFile.getPath(), conf);
310        if (!walFs.rename(walFile.getPath(), replayEditsFile)) {
311          throw new IOException("Can not rename " + walFile.getPath() + " to " + replayEditsFile
312            + " when recovering lease for local region");
313        }
314        LOG.info("Renamed {} to {}", walFile.getPath(), replayEditsFile);
315      }
316      LOG.info("Delete empty local region wal dir {}", deadWALDir);
317      walFs.delete(deadWALDir, true);
318    }
319  }
320
321  private static void tryMigrate(Configuration conf, FileSystem fs, Path tableDir,
322    RegionInfo regionInfo, TableDescriptor oldTd, TableDescriptor newTd) throws IOException {
323    Class<? extends StoreFileTracker> oldSft =
324      StoreFileTrackerFactory.getTrackerClass(oldTd.getValue(StoreFileTrackerFactory.TRACKER_IMPL));
325    Class<? extends StoreFileTracker> newSft =
326      StoreFileTrackerFactory.getTrackerClass(newTd.getValue(StoreFileTrackerFactory.TRACKER_IMPL));
327    if (oldSft.equals(newSft)) {
328      LOG.debug("old store file tracker {} is the same with new store file tracker, skip migration",
329        StoreFileTrackerFactory.getStoreFileTrackerName(oldSft));
330      if (!oldTd.equals(newTd)) {
331        // we may change other things such as adding a new family, so here we still need to persist
332        // the new table descriptor
333        LOG.info("Update table descriptor from {} to {}", oldTd, newTd);
334        FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, newTd, true);
335      }
336      return;
337    }
338    LOG.info("Migrate store file tracker from {} to {}", oldSft.getSimpleName(),
339      newSft.getSimpleName());
340    HRegionFileSystem hfs =
341      HRegionFileSystem.openRegionFromFileSystem(conf, fs, tableDir, regionInfo, false);
342    for (ColumnFamilyDescriptor oldCfd : oldTd.getColumnFamilies()) {
343      StoreFileTracker oldTracker = StoreFileTrackerFactory.create(conf, oldTd, oldCfd, hfs);
344      StoreFileTracker newTracker = StoreFileTrackerFactory.create(conf, oldTd, oldCfd, hfs);
345      List<StoreFileInfo> files = oldTracker.load();
346      LOG.debug("Store file list for {}: {}", oldCfd.getNameAsString(), files);
347      newTracker.set(oldTracker.load());
348    }
349    // persist the new table descriptor after migration
350    LOG.info("Update table descriptor from {} to {}", oldTd, newTd);
351    FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, newTd, true);
352  }
353
354  public static MasterRegion create(MasterRegionParams params) throws IOException {
355    TableDescriptor td = params.tableDescriptor();
356    LOG.info("Create or load local region for table " + td);
357    Server server = params.server();
358    Configuration baseConf = server.getConfiguration();
359    FileSystem fs = CommonFSUtils.getRootDirFileSystem(baseConf);
360    FileSystem walFs = CommonFSUtils.getWALFileSystem(baseConf);
361    Path globalRootDir = CommonFSUtils.getRootDir(baseConf);
362    Path globalWALRootDir = CommonFSUtils.getWALRootDir(baseConf);
363    Path rootDir = new Path(globalRootDir, params.regionDirName());
364    Path walRootDir = new Path(globalWALRootDir, params.regionDirName());
365    // we will override some configurations so create a new one.
366    Configuration conf = new Configuration(baseConf);
367    CommonFSUtils.setRootDir(conf, rootDir);
368    CommonFSUtils.setWALRootDir(conf, walRootDir);
369    MasterRegionFlusherAndCompactor.setupConf(conf, params.flushSize(), params.flushPerChanges(),
370      params.flushIntervalMs());
371    conf.setInt(AbstractFSWAL.MAX_LOGS, params.maxWals());
372    if (params.useHsync() != null) {
373      conf.setBoolean(HRegion.WAL_HSYNC_CONF_KEY, params.useHsync());
374    }
375    if (params.useMetaCellComparator() != null) {
376      conf.setBoolean(HRegion.USE_META_CELL_COMPARATOR, params.useMetaCellComparator());
377    }
378    conf.setInt(AbstractFSWAL.RING_BUFFER_SLOT_COUNT,
379      IntMath.ceilingPowerOfTwo(params.ringBufferSlotCount()));
380
381    MasterRegionWALRoller walRoller = MasterRegionWALRoller.create(
382      td.getTableName() + "-WAL-Roller", conf, server, walFs, walRootDir, globalWALRootDir,
383      params.archivedWalSuffix(), params.rollPeriodMs(), params.flushSize());
384    walRoller.start();
385
386    WALFactory walFactory = new WALFactory(conf, server.getServerName().toString(), server, false);
387    Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName());
388    Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG);
389    Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG);
390    HRegion region;
391    if (!fs.exists(tableDir)) {
392      // bootstrap, no doubt
393      if (!fs.mkdirs(initializedFlag)) {
394        throw new IOException("Can not touch initialized flag");
395      }
396      region = bootstrap(conf, td, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
397        server.getServerName().toString(), true);
398    } else {
399      if (!fs.exists(initializedFlag)) {
400        if (!fs.exists(initializingFlag)) {
401          // should be old style, where we do not have the initializing or initialized file, persist
402          // the table descriptor, touch the initialized flag and then open the region.
403          // the store file tracker must be DEFAULT
404          LOG.info("No {} or {} file, try upgrading", INITIALIZING_FLAG, INITIALIZED_FLAG);
405          TableDescriptor oldTd =
406            TableDescriptorBuilder.newBuilder(td).setValue(StoreFileTrackerFactory.TRACKER_IMPL,
407              StoreFileTrackerFactory.Trackers.DEFAULT.name()).build();
408          FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, oldTd, true);
409          if (!fs.mkdirs(initializedFlag)) {
410            throw new IOException("Can not touch initialized flag: " + initializedFlag);
411          }
412          RegionInfo regionInfo = loadRegionInfo(fs, tableDir);
413          tryMigrate(conf, fs, tableDir, regionInfo, oldTd, td);
414          region = open(conf, td, regionInfo, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
415            server.getServerName().toString());
416        } else {
417          // delete all contents besides the initializing flag, here we can make sure tableDir
418          // exists(unless someone delete it manually...), so we do not do null check here.
419          for (FileStatus status : fs.listStatus(tableDir)) {
420            if (!status.getPath().getName().equals(INITIALIZING_FLAG)) {
421              fs.delete(status.getPath(), true);
422            }
423          }
424          region = bootstrap(conf, td, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
425            server.getServerName().toString(), false);
426        }
427      } else {
428        if (fs.exists(initializingFlag) && !fs.delete(initializingFlag, true)) {
429          LOG.warn("failed to clean up initializing flag: " + initializingFlag);
430        }
431        // open it, make sure to load the table descriptor from fs
432        TableDescriptor oldTd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
433        RegionInfo regionInfo = loadRegionInfo(fs, tableDir);
434        tryMigrate(conf, fs, tableDir, regionInfo, oldTd, td);
435        region = open(conf, td, regionInfo, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
436          server.getServerName().toString());
437      }
438    }
439
440    Path globalArchiveDir = HFileArchiveUtil.getArchivePath(baseConf);
441    MasterRegionFlusherAndCompactor flusherAndCompactor = new MasterRegionFlusherAndCompactor(conf,
442      server, region, params.flushSize(), params.flushPerChanges(), params.flushIntervalMs(),
443      params.compactMin(), globalArchiveDir, params.archivedHFileSuffix());
444    walRoller.setFlusherAndCompactor(flusherAndCompactor);
445    Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
446    if (!fs.mkdirs(archiveDir)) {
447      LOG.warn("Failed to create archive directory {}. Usually this should not happen but it will"
448        + " be created again when we actually archive the hfiles later, so continue", archiveDir);
449    }
450    return new MasterRegion(region, walFactory, flusherAndCompactor, walRoller);
451  }
452}