001/*
002 * Licensed to the Apache Software Foundation (ASF) under one
003 * or more contributor license agreements.  See the NOTICE file
004 * distributed with this work for additional information
005 * regarding copyright ownership.  The ASF licenses this file
006 * to you under the Apache License, Version 2.0 (the
007 * "License"); you may not use this file except in compliance
008 * with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.apache.hadoop.hbase.master.region;
019
020import static org.apache.hadoop.hbase.HConstants.HREGION_LOGDIR_NAME;
021
022import com.google.errorprone.annotations.RestrictedApi;
023import java.io.IOException;
024import java.util.List;
025import org.apache.hadoop.conf.Configuration;
026import org.apache.hadoop.fs.FileStatus;
027import org.apache.hadoop.fs.FileSystem;
028import org.apache.hadoop.fs.Path;
029import org.apache.hadoop.hbase.HBaseIOException;
030import org.apache.hadoop.hbase.Server;
031import org.apache.hadoop.hbase.TableName;
032import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
033import org.apache.hadoop.hbase.client.Get;
034import org.apache.hadoop.hbase.client.RegionInfo;
035import org.apache.hadoop.hbase.client.RegionInfoBuilder;
036import org.apache.hadoop.hbase.client.Result;
037import org.apache.hadoop.hbase.client.ResultScanner;
038import org.apache.hadoop.hbase.client.Scan;
039import org.apache.hadoop.hbase.client.TableDescriptor;
040import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
041import org.apache.hadoop.hbase.regionserver.HRegion;
042import org.apache.hadoop.hbase.regionserver.HRegion.FlushResult;
043import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
044import org.apache.hadoop.hbase.regionserver.RegionScanner;
045import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
046import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
047import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
048import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL;
049import org.apache.hadoop.hbase.util.Bytes;
050import org.apache.hadoop.hbase.util.CommonFSUtils;
051import org.apache.hadoop.hbase.util.FSTableDescriptors;
052import org.apache.hadoop.hbase.util.FSUtils;
053import org.apache.hadoop.hbase.util.HFileArchiveUtil;
054import org.apache.hadoop.hbase.util.RecoverLeaseFSUtils;
055import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
056import org.apache.hadoop.hbase.wal.WAL;
057import org.apache.hadoop.hbase.wal.WALFactory;
058import org.apache.yetus.audience.InterfaceAudience;
059import org.slf4j.Logger;
060import org.slf4j.LoggerFactory;
061
062import org.apache.hbase.thirdparty.com.google.common.math.IntMath;
063
064/**
065 * A region that stores data in a separated directory, which can be used to store master local data.
066 * <p/>
067 * FileSystem layout:
068 *
069 * <pre>
070 * hbase
071 *   |
072 *   --&lt;region dir&gt;
073 *       |
074 *       --data
075 *       |  |
076 *       |  --/&lt;ns&gt/&lt;table&gt/&lt;encoded-region-name&gt; <---- The region data
077 *       |      |
078 *       |      --replay <---- The edits to replay
079 *       |
080 *       --WALs
081 *          |
082 *          --&lt;master-server-name&gt; <---- The WAL dir for active master
083 *          |
084 *          --&lt;master-server-name&gt;-dead <---- The WAL dir for dead master
085 * </pre>
086 *
087 * Notice that, you can use different root file system and WAL file system. Then the above directory
088 * will be on two file systems, the root file system will have the data directory while the WAL
089 * filesystem will have the WALs directory. The archived HFile will be moved to the global HFile
090 * archived directory with the {@link MasterRegionParams#archivedWalSuffix()} suffix. The archived
091 * WAL will be moved to the global WAL archived directory with the
092 * {@link MasterRegionParams#archivedHFileSuffix()} suffix.
093 */
094@InterfaceAudience.Private
095public final class MasterRegion {
096
097  private static final Logger LOG = LoggerFactory.getLogger(MasterRegion.class);
098
099  private static final String REPLAY_EDITS_DIR = "recovered.wals";
100
101  private static final String DEAD_WAL_DIR_SUFFIX = "-dead";
102
103  static final String INITIALIZING_FLAG = ".initializing";
104
105  static final String INITIALIZED_FLAG = ".initialized";
106
107  private static final int REGION_ID = 1;
108
109  private final WALFactory walFactory;
110
111  final HRegion region;
112
113  final MasterRegionFlusherAndCompactor flusherAndCompactor;
114
115  private MasterRegionWALRoller walRoller;
116
117  private MasterRegion(HRegion region, WALFactory walFactory,
118    MasterRegionFlusherAndCompactor flusherAndCompactor, MasterRegionWALRoller walRoller) {
119    this.region = region;
120    this.walFactory = walFactory;
121    this.flusherAndCompactor = flusherAndCompactor;
122    this.walRoller = walRoller;
123  }
124
125  private void closeRegion(boolean abort) {
126    try {
127      region.close(abort);
128    } catch (IOException e) {
129      LOG.warn("Failed to close region", e);
130    }
131  }
132
133  private void shutdownWAL() {
134    try {
135      walFactory.shutdown();
136    } catch (IOException e) {
137      LOG.warn("Failed to shutdown WAL", e);
138    }
139  }
140
141  public void update(UpdateMasterRegion action) throws IOException {
142    action.update(region);
143    flusherAndCompactor.onUpdate();
144  }
145
146  /**
147   * The design for master region is to only load all the data to memory at once when starting, so
148   * typically you should not use the get method to get a single row of data at runtime.
149   */
150  @RestrictedApi(explanation = "Should only be called in tests", link = "",
151      allowedOnPath = ".*/src/test/.*")
152  public Result get(Get get) throws IOException {
153    return region.get(get);
154  }
155
156  public ResultScanner getScanner(Scan scan) throws IOException {
157    return new RegionScannerAsResultScanner(region.getScanner(scan));
158  }
159
160  public RegionScanner getRegionScanner(Scan scan) throws IOException {
161    return region.getScanner(scan);
162  }
163
164  public FlushResult flush(boolean force) throws IOException {
165    flusherAndCompactor.resetChangesAfterLastFlush();
166    FlushResult flushResult = region.flush(force);
167    flusherAndCompactor.recordLastFlushTime();
168    return flushResult;
169  }
170
171  @RestrictedApi(explanation = "Should only be called in tests", link = "",
172      allowedOnPath = ".*/src/test/.*")
173  public void requestRollAll() {
174    walRoller.requestRollAll();
175  }
176
177  @RestrictedApi(explanation = "Should only be called in tests", link = "",
178      allowedOnPath = ".*/src/test/.*")
179  public void waitUntilWalRollFinished() throws InterruptedException {
180    walRoller.waitUntilWalRollFinished();
181  }
182
183  public void close(boolean abort) {
184    LOG.info("Closing local region {}, isAbort={}", region.getRegionInfo(), abort);
185    if (flusherAndCompactor != null) {
186      flusherAndCompactor.close();
187    }
188    // if abort, we shutdown wal first to fail the ongoing updates to the region, and then close the
189    // region, otherwise there will be dead lock.
190    if (abort) {
191      shutdownWAL();
192      closeRegion(true);
193    } else {
194      closeRegion(false);
195      shutdownWAL();
196    }
197
198    if (walRoller != null) {
199      walRoller.close();
200    }
201  }
202
203  private static WAL createWAL(WALFactory walFactory, MasterRegionWALRoller walRoller,
204    String serverName, FileSystem walFs, Path walRootDir, RegionInfo regionInfo)
205    throws IOException {
206    String logName = AbstractFSWALProvider.getWALDirectoryName(serverName);
207    Path walDir = new Path(walRootDir, logName);
208    LOG.debug("WALDir={}", walDir);
209    if (walFs.exists(walDir)) {
210      throw new HBaseIOException(
211        "Already created wal directory at " + walDir + " for local region " + regionInfo);
212    }
213    if (!walFs.mkdirs(walDir)) {
214      throw new IOException(
215        "Can not create wal directory " + walDir + " for local region " + regionInfo);
216    }
217    WAL wal = walFactory.getWAL(regionInfo);
218    walRoller.addWAL(wal);
219    return wal;
220  }
221
222  private static HRegion bootstrap(Configuration conf, TableDescriptor td, FileSystem fs,
223    Path rootDir, FileSystem walFs, Path walRootDir, WALFactory walFactory,
224    MasterRegionWALRoller walRoller, String serverName, boolean touchInitializingFlag)
225    throws IOException {
226    TableName tn = td.getTableName();
227    RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tn).setRegionId(REGION_ID).build();
228    Path tableDir = CommonFSUtils.getTableDir(rootDir, tn);
229    // persist table descriptor
230    FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, td, true);
231    HRegion.createHRegion(conf, regionInfo, fs, tableDir, td).close();
232    Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG);
233    if (!fs.mkdirs(initializedFlag)) {
234      throw new IOException("Can not touch initialized flag: " + initializedFlag);
235    }
236    Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG);
237    if (!fs.delete(initializingFlag, true)) {
238      LOG.warn("failed to clean up initializing flag: " + initializingFlag);
239    }
240    WAL wal = createWAL(walFactory, walRoller, serverName, walFs, walRootDir, regionInfo);
241    return HRegion.openHRegionFromTableDir(conf, fs, tableDir, regionInfo, td, wal, null, null);
242  }
243
244  private static RegionInfo loadRegionInfo(FileSystem fs, Path tableDir) throws IOException {
245    // on branch-2, the RegionInfo.isEncodedRegionName will returns true for .initializing and
246    // .initialized, see HBASE-25368. Since RegionInfo is IA.Public, changing the implementation may
247    // raise compatibility concerns, so here we just skip them by our own.
248    Path regionDir = fs.listStatus(tableDir, p -> !p.getName().startsWith(".")
249      && RegionInfo.isEncodedRegionName(Bytes.toBytes(p.getName())))[0].getPath();
250    return HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir);
251  }
252
253  private static HRegion open(Configuration conf, TableDescriptor td, RegionInfo regionInfo,
254    FileSystem fs, Path rootDir, FileSystem walFs, Path walRootDir, WALFactory walFactory,
255    MasterRegionWALRoller walRoller, String serverName) throws IOException {
256    Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName());
257    Path walRegionDir = FSUtils.getRegionDirFromRootDir(walRootDir, regionInfo);
258    Path replayEditsDir = new Path(walRegionDir, REPLAY_EDITS_DIR);
259    if (!walFs.exists(replayEditsDir) && !walFs.mkdirs(replayEditsDir)) {
260      throw new IOException("Failed to create replay directory: " + replayEditsDir);
261    }
262
263    // Replay any WALs for the Master Region before opening it.
264    Path walsDir = new Path(walRootDir, HREGION_LOGDIR_NAME);
265    // In open(...), we expect that the WAL directory for the MasterRegion to already exist.
266    // This is in contrast to bootstrap() where we create the MasterRegion data and WAL dir.
267    // However, it's possible that users directly remove the WAL directory. We expect walsDir
268    // to always exist in normal situations, but we should guard against users changing the
269    // filesystem outside of HBase's line of sight.
270    if (walFs.exists(walsDir)) {
271      replayWALs(conf, walFs, walRootDir, walsDir, regionInfo, serverName, replayEditsDir);
272    } else {
273      LOG.error(
274        "UNEXPECTED: WAL directory for MasterRegion is missing." + " {} is unexpectedly missing.",
275        walsDir);
276    }
277
278    // Create a new WAL
279    WAL wal = createWAL(walFactory, walRoller, serverName, walFs, walRootDir, regionInfo);
280    conf.set(HRegion.SPECIAL_RECOVERED_EDITS_DIR,
281      replayEditsDir.makeQualified(walFs.getUri(), walFs.getWorkingDirectory()).toString());
282    return HRegion.openHRegionFromTableDir(conf, fs, tableDir, regionInfo, td, wal, null, null);
283  }
284
285  private static void replayWALs(Configuration conf, FileSystem walFs, Path walRootDir,
286    Path walsDir, RegionInfo regionInfo, String serverName, Path replayEditsDir)
287    throws IOException {
288    for (FileStatus walDir : walFs.listStatus(walsDir)) {
289      if (!walDir.isDirectory()) {
290        continue;
291      }
292      if (walDir.getPath().getName().startsWith(serverName)) {
293        LOG.warn("This should not happen in real production as we have not created our WAL "
294          + "directory yet, ignore if you are running a local region related UT");
295      }
296      Path deadWALDir;
297      if (!walDir.getPath().getName().endsWith(DEAD_WAL_DIR_SUFFIX)) {
298        deadWALDir =
299          new Path(walDir.getPath().getParent(), walDir.getPath().getName() + DEAD_WAL_DIR_SUFFIX);
300        if (!walFs.rename(walDir.getPath(), deadWALDir)) {
301          throw new IOException("Can not rename " + walDir + " to " + deadWALDir
302            + " when recovering lease of proc store");
303        }
304        LOG.info("Renamed {} to {} as it is dead", walDir.getPath(), deadWALDir);
305      } else {
306        deadWALDir = walDir.getPath();
307        LOG.info("{} is already marked as dead", deadWALDir);
308      }
309      for (FileStatus walFile : walFs.listStatus(deadWALDir)) {
310        Path replayEditsFile = new Path(replayEditsDir, walFile.getPath().getName());
311        RecoverLeaseFSUtils.recoverFileLease(walFs, walFile.getPath(), conf);
312        if (!walFs.rename(walFile.getPath(), replayEditsFile)) {
313          throw new IOException("Can not rename " + walFile.getPath() + " to " + replayEditsFile
314            + " when recovering lease for local region");
315        }
316        LOG.info("Renamed {} to {}", walFile.getPath(), replayEditsFile);
317      }
318      LOG.info("Delete empty local region wal dir {}", deadWALDir);
319      walFs.delete(deadWALDir, true);
320    }
321  }
322
323  private static void tryMigrate(Configuration conf, FileSystem fs, Path tableDir,
324    RegionInfo regionInfo, TableDescriptor oldTd, TableDescriptor newTd) throws IOException {
325    Class<? extends StoreFileTracker> oldSft =
326      StoreFileTrackerFactory.getTrackerClass(oldTd.getValue(StoreFileTrackerFactory.TRACKER_IMPL));
327    Class<? extends StoreFileTracker> newSft =
328      StoreFileTrackerFactory.getTrackerClass(newTd.getValue(StoreFileTrackerFactory.TRACKER_IMPL));
329    if (oldSft.equals(newSft)) {
330      LOG.debug("old store file tracker {} is the same with new store file tracker, skip migration",
331        StoreFileTrackerFactory.getStoreFileTrackerName(oldSft));
332      if (!oldTd.equals(newTd)) {
333        // we may change other things such as adding a new family, so here we still need to persist
334        // the new table descriptor
335        LOG.info("Update table descriptor from {} to {}", oldTd, newTd);
336        FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, newTd, true);
337      }
338      return;
339    }
340    LOG.info("Migrate store file tracker from {} to {}", oldSft.getSimpleName(),
341      newSft.getSimpleName());
342    HRegionFileSystem hfs =
343      HRegionFileSystem.openRegionFromFileSystem(conf, fs, tableDir, regionInfo, false);
344    for (ColumnFamilyDescriptor oldCfd : oldTd.getColumnFamilies()) {
345      StoreFileTracker oldTracker = StoreFileTrackerFactory.create(conf, oldTd, oldCfd, hfs);
346      StoreFileTracker newTracker = StoreFileTrackerFactory.create(conf, oldTd, oldCfd, hfs);
347      List<StoreFileInfo> files = oldTracker.load();
348      LOG.debug("Store file list for {}: {}", oldCfd.getNameAsString(), files);
349      newTracker.set(oldTracker.load());
350    }
351    // persist the new table descriptor after migration
352    LOG.info("Update table descriptor from {} to {}", oldTd, newTd);
353    FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, newTd, true);
354  }
355
356  public static MasterRegion create(MasterRegionParams params) throws IOException {
357    TableDescriptor td = params.tableDescriptor();
358    LOG.info("Create or load local region for table " + td);
359    Server server = params.server();
360    Configuration baseConf = server.getConfiguration();
361    FileSystem fs = CommonFSUtils.getRootDirFileSystem(baseConf);
362    FileSystem walFs = CommonFSUtils.getWALFileSystem(baseConf);
363    Path globalRootDir = CommonFSUtils.getRootDir(baseConf);
364    Path globalWALRootDir = CommonFSUtils.getWALRootDir(baseConf);
365    Path rootDir = new Path(globalRootDir, params.regionDirName());
366    Path walRootDir = new Path(globalWALRootDir, params.regionDirName());
367    // we will override some configurations so create a new one.
368    Configuration conf = new Configuration(baseConf);
369    CommonFSUtils.setRootDir(conf, rootDir);
370    CommonFSUtils.setWALRootDir(conf, walRootDir);
371    MasterRegionFlusherAndCompactor.setupConf(conf, params.flushSize(), params.flushPerChanges(),
372      params.flushIntervalMs());
373    conf.setInt(AbstractFSWAL.MAX_LOGS, params.maxWals());
374    if (params.useHsync() != null) {
375      conf.setBoolean(HRegion.WAL_HSYNC_CONF_KEY, params.useHsync());
376    }
377    if (params.useMetaCellComparator() != null) {
378      conf.setBoolean(HRegion.USE_META_CELL_COMPARATOR, params.useMetaCellComparator());
379    }
380    conf.setInt(AbstractFSWAL.RING_BUFFER_SLOT_COUNT,
381      IntMath.ceilingPowerOfTwo(params.ringBufferSlotCount()));
382
383    MasterRegionWALRoller walRoller = MasterRegionWALRoller.create(
384      td.getTableName() + "-WAL-Roller", conf, server, walFs, walRootDir, globalWALRootDir,
385      params.archivedWalSuffix(), params.rollPeriodMs(), params.flushSize());
386    walRoller.start();
387
388    WALFactory walFactory = new WALFactory(conf, server.getServerName().toString());
389    Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName());
390    Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG);
391    Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG);
392    HRegion region;
393    if (!fs.exists(tableDir)) {
394      // bootstrap, no doubt
395      if (!fs.mkdirs(initializedFlag)) {
396        throw new IOException("Can not touch initialized flag");
397      }
398      region = bootstrap(conf, td, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
399        server.getServerName().toString(), true);
400    } else {
401      if (!fs.exists(initializedFlag)) {
402        if (!fs.exists(initializingFlag)) {
403          // should be old style, where we do not have the initializing or initialized file, persist
404          // the table descriptor, touch the initialized flag and then open the region.
405          // the store file tracker must be DEFAULT
406          LOG.info("No {} or {} file, try upgrading", INITIALIZING_FLAG, INITIALIZED_FLAG);
407          TableDescriptor oldTd =
408            TableDescriptorBuilder.newBuilder(td).setValue(StoreFileTrackerFactory.TRACKER_IMPL,
409              StoreFileTrackerFactory.Trackers.DEFAULT.name()).build();
410          FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, oldTd, true);
411          if (!fs.mkdirs(initializedFlag)) {
412            throw new IOException("Can not touch initialized flag: " + initializedFlag);
413          }
414          RegionInfo regionInfo = loadRegionInfo(fs, tableDir);
415          tryMigrate(conf, fs, tableDir, regionInfo, oldTd, td);
416          region = open(conf, td, regionInfo, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
417            server.getServerName().toString());
418        } else {
419          // delete all contents besides the initializing flag, here we can make sure tableDir
420          // exists(unless someone delete it manually...), so we do not do null check here.
421          for (FileStatus status : fs.listStatus(tableDir)) {
422            if (!status.getPath().getName().equals(INITIALIZING_FLAG)) {
423              fs.delete(status.getPath(), true);
424            }
425          }
426          region = bootstrap(conf, td, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
427            server.getServerName().toString(), false);
428        }
429      } else {
430        if (fs.exists(initializingFlag) && !fs.delete(initializingFlag, true)) {
431          LOG.warn("failed to clean up initializing flag: " + initializingFlag);
432        }
433        // open it, make sure to load the table descriptor from fs
434        TableDescriptor oldTd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir);
435        RegionInfo regionInfo = loadRegionInfo(fs, tableDir);
436        tryMigrate(conf, fs, tableDir, regionInfo, oldTd, td);
437        region = open(conf, td, regionInfo, fs, rootDir, walFs, walRootDir, walFactory, walRoller,
438          server.getServerName().toString());
439      }
440    }
441
442    Path globalArchiveDir = HFileArchiveUtil.getArchivePath(baseConf);
443    MasterRegionFlusherAndCompactor flusherAndCompactor = new MasterRegionFlusherAndCompactor(conf,
444      server, region, params.flushSize(), params.flushPerChanges(), params.flushIntervalMs(),
445      params.compactMin(), globalArchiveDir, params.archivedHFileSuffix());
446    walRoller.setFlusherAndCompactor(flusherAndCompactor);
447    Path archiveDir = HFileArchiveUtil.getArchivePath(conf);
448    if (!fs.mkdirs(archiveDir)) {
449      LOG.warn("Failed to create archive directory {}. Usually this should not happen but it will"
450        + " be created again when we actually archive the hfiles later, so continue", archiveDir);
451    }
452    return new MasterRegion(region, walFactory, flusherAndCompactor, walRoller);
453  }
454}