001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.master.region; 019 020import static org.apache.hadoop.hbase.HConstants.HREGION_LOGDIR_NAME; 021 022import com.google.errorprone.annotations.RestrictedApi; 023import java.io.IOException; 024import java.util.List; 025import org.apache.hadoop.conf.Configuration; 026import org.apache.hadoop.fs.FileStatus; 027import org.apache.hadoop.fs.FileSystem; 028import org.apache.hadoop.fs.Path; 029import org.apache.hadoop.hbase.HBaseIOException; 030import org.apache.hadoop.hbase.Server; 031import org.apache.hadoop.hbase.TableName; 032import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; 033import org.apache.hadoop.hbase.client.Get; 034import org.apache.hadoop.hbase.client.RegionInfo; 035import org.apache.hadoop.hbase.client.RegionInfoBuilder; 036import org.apache.hadoop.hbase.client.Result; 037import org.apache.hadoop.hbase.client.ResultScanner; 038import org.apache.hadoop.hbase.client.Scan; 039import org.apache.hadoop.hbase.client.TableDescriptor; 040import org.apache.hadoop.hbase.client.TableDescriptorBuilder; 041import org.apache.hadoop.hbase.regionserver.HRegion; 042import org.apache.hadoop.hbase.regionserver.HRegion.FlushResult; 043import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; 044import org.apache.hadoop.hbase.regionserver.RegionScanner; 045import org.apache.hadoop.hbase.regionserver.StoreFileInfo; 046import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker; 047import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory; 048import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL; 049import org.apache.hadoop.hbase.util.Bytes; 050import org.apache.hadoop.hbase.util.CommonFSUtils; 051import org.apache.hadoop.hbase.util.FSTableDescriptors; 052import org.apache.hadoop.hbase.util.FSUtils; 053import org.apache.hadoop.hbase.util.HFileArchiveUtil; 054import org.apache.hadoop.hbase.util.RecoverLeaseFSUtils; 055import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; 056import org.apache.hadoop.hbase.wal.WAL; 057import org.apache.hadoop.hbase.wal.WALFactory; 058import org.apache.yetus.audience.InterfaceAudience; 059import org.slf4j.Logger; 060import org.slf4j.LoggerFactory; 061 062import org.apache.hbase.thirdparty.com.google.common.math.IntMath; 063 064/** 065 * A region that stores data in a separated directory, which can be used to store master local data. 066 * <p/> 067 * FileSystem layout: 068 * 069 * <pre> 070 * hbase 071 * | 072 * --<region dir> 073 * | 074 * --data 075 * | | 076 * | --/<ns>/<table>/<encoded-region-name> <---- The region data 077 * | | 078 * | --replay <---- The edits to replay 079 * | 080 * --WALs 081 * | 082 * --<master-server-name> <---- The WAL dir for active master 083 * | 084 * --<master-server-name>-dead <---- The WAL dir for dead master 085 * </pre> 086 * 087 * Notice that, you can use different root file system and WAL file system. Then the above directory 088 * will be on two file systems, the root file system will have the data directory while the WAL 089 * filesystem will have the WALs directory. The archived HFile will be moved to the global HFile 090 * archived directory with the {@link MasterRegionParams#archivedWalSuffix()} suffix. The archived 091 * WAL will be moved to the global WAL archived directory with the 092 * {@link MasterRegionParams#archivedHFileSuffix()} suffix. 093 */ 094@InterfaceAudience.Private 095public final class MasterRegion { 096 097 private static final Logger LOG = LoggerFactory.getLogger(MasterRegion.class); 098 099 private static final String REPLAY_EDITS_DIR = "recovered.wals"; 100 101 private static final String DEAD_WAL_DIR_SUFFIX = "-dead"; 102 103 static final String INITIALIZING_FLAG = ".initializing"; 104 105 static final String INITIALIZED_FLAG = ".initialized"; 106 107 private static final int REGION_ID = 1; 108 109 private final WALFactory walFactory; 110 111 final HRegion region; 112 113 final MasterRegionFlusherAndCompactor flusherAndCompactor; 114 115 private MasterRegionWALRoller walRoller; 116 117 private MasterRegion(HRegion region, WALFactory walFactory, 118 MasterRegionFlusherAndCompactor flusherAndCompactor, MasterRegionWALRoller walRoller) { 119 this.region = region; 120 this.walFactory = walFactory; 121 this.flusherAndCompactor = flusherAndCompactor; 122 this.walRoller = walRoller; 123 } 124 125 private void closeRegion(boolean abort) { 126 try { 127 region.close(abort); 128 } catch (IOException e) { 129 LOG.warn("Failed to close region", e); 130 } 131 } 132 133 private void shutdownWAL() { 134 try { 135 walFactory.shutdown(); 136 } catch (IOException e) { 137 LOG.warn("Failed to shutdown WAL", e); 138 } 139 } 140 141 public void update(UpdateMasterRegion action) throws IOException { 142 action.update(region); 143 flusherAndCompactor.onUpdate(); 144 } 145 146 /** 147 * The design for master region is to only load all the data to memory at once when starting, so 148 * typically you should not use the get method to get a single row of data at runtime. 149 */ 150 @RestrictedApi(explanation = "Should only be called in tests", link = "", 151 allowedOnPath = ".*/src/test/.*") 152 public Result get(Get get) throws IOException { 153 return region.get(get); 154 } 155 156 public ResultScanner getScanner(Scan scan) throws IOException { 157 return new RegionScannerAsResultScanner(region.getScanner(scan)); 158 } 159 160 public RegionScanner getRegionScanner(Scan scan) throws IOException { 161 return region.getScanner(scan); 162 } 163 164 public FlushResult flush(boolean force) throws IOException { 165 flusherAndCompactor.resetChangesAfterLastFlush(); 166 FlushResult flushResult = region.flush(force); 167 flusherAndCompactor.recordLastFlushTime(); 168 return flushResult; 169 } 170 171 @RestrictedApi(explanation = "Should only be called in tests", link = "", 172 allowedOnPath = ".*/src/test/.*") 173 public void requestRollAll() { 174 walRoller.requestRollAll(); 175 } 176 177 @RestrictedApi(explanation = "Should only be called in tests", link = "", 178 allowedOnPath = ".*/src/test/.*") 179 public void waitUntilWalRollFinished() throws InterruptedException { 180 walRoller.waitUntilWalRollFinished(); 181 } 182 183 public void close(boolean abort) { 184 LOG.info("Closing local region {}, isAbort={}", region.getRegionInfo(), abort); 185 if (flusherAndCompactor != null) { 186 flusherAndCompactor.close(); 187 } 188 // if abort, we shutdown wal first to fail the ongoing updates to the region, and then close the 189 // region, otherwise there will be dead lock. 190 if (abort) { 191 shutdownWAL(); 192 closeRegion(true); 193 } else { 194 closeRegion(false); 195 shutdownWAL(); 196 } 197 198 if (walRoller != null) { 199 walRoller.close(); 200 } 201 } 202 203 private static WAL createWAL(WALFactory walFactory, MasterRegionWALRoller walRoller, 204 String serverName, FileSystem walFs, Path walRootDir, RegionInfo regionInfo) 205 throws IOException { 206 String logName = AbstractFSWALProvider.getWALDirectoryName(serverName); 207 Path walDir = new Path(walRootDir, logName); 208 LOG.debug("WALDir={}", walDir); 209 if (walFs.exists(walDir)) { 210 throw new HBaseIOException( 211 "Already created wal directory at " + walDir + " for local region " + regionInfo); 212 } 213 if (!walFs.mkdirs(walDir)) { 214 throw new IOException( 215 "Can not create wal directory " + walDir + " for local region " + regionInfo); 216 } 217 WAL wal = walFactory.getWAL(regionInfo); 218 walRoller.addWAL(wal); 219 return wal; 220 } 221 222 private static HRegion bootstrap(Configuration conf, TableDescriptor td, FileSystem fs, 223 Path rootDir, FileSystem walFs, Path walRootDir, WALFactory walFactory, 224 MasterRegionWALRoller walRoller, String serverName, boolean touchInitializingFlag) 225 throws IOException { 226 TableName tn = td.getTableName(); 227 RegionInfo regionInfo = RegionInfoBuilder.newBuilder(tn).setRegionId(REGION_ID).build(); 228 Path tableDir = CommonFSUtils.getTableDir(rootDir, tn); 229 // persist table descriptor 230 FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, td, true); 231 HRegion.createHRegion(conf, regionInfo, fs, tableDir, td).close(); 232 Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG); 233 if (!fs.mkdirs(initializedFlag)) { 234 throw new IOException("Can not touch initialized flag: " + initializedFlag); 235 } 236 Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG); 237 if (!fs.delete(initializingFlag, true)) { 238 LOG.warn("failed to clean up initializing flag: " + initializingFlag); 239 } 240 WAL wal = createWAL(walFactory, walRoller, serverName, walFs, walRootDir, regionInfo); 241 return HRegion.openHRegionFromTableDir(conf, fs, tableDir, regionInfo, td, wal, null, null); 242 } 243 244 private static RegionInfo loadRegionInfo(FileSystem fs, Path tableDir) throws IOException { 245 // on branch-2, the RegionInfo.isEncodedRegionName will returns true for .initializing and 246 // .initialized, see HBASE-25368. Since RegionInfo is IA.Public, changing the implementation may 247 // raise compatibility concerns, so here we just skip them by our own. 248 Path regionDir = fs.listStatus(tableDir, p -> !p.getName().startsWith(".") 249 && RegionInfo.isEncodedRegionName(Bytes.toBytes(p.getName())))[0].getPath(); 250 return HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir); 251 } 252 253 private static HRegion open(Configuration conf, TableDescriptor td, RegionInfo regionInfo, 254 FileSystem fs, Path rootDir, FileSystem walFs, Path walRootDir, WALFactory walFactory, 255 MasterRegionWALRoller walRoller, String serverName) throws IOException { 256 Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName()); 257 Path walRegionDir = FSUtils.getRegionDirFromRootDir(walRootDir, regionInfo); 258 Path replayEditsDir = new Path(walRegionDir, REPLAY_EDITS_DIR); 259 if (!walFs.exists(replayEditsDir) && !walFs.mkdirs(replayEditsDir)) { 260 throw new IOException("Failed to create replay directory: " + replayEditsDir); 261 } 262 263 // Replay any WALs for the Master Region before opening it. 264 Path walsDir = new Path(walRootDir, HREGION_LOGDIR_NAME); 265 // In open(...), we expect that the WAL directory for the MasterRegion to already exist. 266 // This is in contrast to bootstrap() where we create the MasterRegion data and WAL dir. 267 // However, it's possible that users directly remove the WAL directory. We expect walsDir 268 // to always exist in normal situations, but we should guard against users changing the 269 // filesystem outside of HBase's line of sight. 270 if (walFs.exists(walsDir)) { 271 replayWALs(conf, walFs, walRootDir, walsDir, regionInfo, serverName, replayEditsDir); 272 } else { 273 LOG.error( 274 "UNEXPECTED: WAL directory for MasterRegion is missing." + " {} is unexpectedly missing.", 275 walsDir); 276 } 277 278 // Create a new WAL 279 WAL wal = createWAL(walFactory, walRoller, serverName, walFs, walRootDir, regionInfo); 280 conf.set(HRegion.SPECIAL_RECOVERED_EDITS_DIR, 281 replayEditsDir.makeQualified(walFs.getUri(), walFs.getWorkingDirectory()).toString()); 282 return HRegion.openHRegionFromTableDir(conf, fs, tableDir, regionInfo, td, wal, null, null); 283 } 284 285 private static void replayWALs(Configuration conf, FileSystem walFs, Path walRootDir, 286 Path walsDir, RegionInfo regionInfo, String serverName, Path replayEditsDir) 287 throws IOException { 288 for (FileStatus walDir : walFs.listStatus(walsDir)) { 289 if (!walDir.isDirectory()) { 290 continue; 291 } 292 if (walDir.getPath().getName().startsWith(serverName)) { 293 LOG.warn("This should not happen in real production as we have not created our WAL " 294 + "directory yet, ignore if you are running a local region related UT"); 295 } 296 Path deadWALDir; 297 if (!walDir.getPath().getName().endsWith(DEAD_WAL_DIR_SUFFIX)) { 298 deadWALDir = 299 new Path(walDir.getPath().getParent(), walDir.getPath().getName() + DEAD_WAL_DIR_SUFFIX); 300 if (!walFs.rename(walDir.getPath(), deadWALDir)) { 301 throw new IOException("Can not rename " + walDir + " to " + deadWALDir 302 + " when recovering lease of proc store"); 303 } 304 LOG.info("Renamed {} to {} as it is dead", walDir.getPath(), deadWALDir); 305 } else { 306 deadWALDir = walDir.getPath(); 307 LOG.info("{} is already marked as dead", deadWALDir); 308 } 309 for (FileStatus walFile : walFs.listStatus(deadWALDir)) { 310 Path replayEditsFile = new Path(replayEditsDir, walFile.getPath().getName()); 311 RecoverLeaseFSUtils.recoverFileLease(walFs, walFile.getPath(), conf); 312 if (!walFs.rename(walFile.getPath(), replayEditsFile)) { 313 throw new IOException("Can not rename " + walFile.getPath() + " to " + replayEditsFile 314 + " when recovering lease for local region"); 315 } 316 LOG.info("Renamed {} to {}", walFile.getPath(), replayEditsFile); 317 } 318 LOG.info("Delete empty local region wal dir {}", deadWALDir); 319 walFs.delete(deadWALDir, true); 320 } 321 } 322 323 private static void tryMigrate(Configuration conf, FileSystem fs, Path tableDir, 324 RegionInfo regionInfo, TableDescriptor oldTd, TableDescriptor newTd) throws IOException { 325 Class<? extends StoreFileTracker> oldSft = 326 StoreFileTrackerFactory.getTrackerClass(oldTd.getValue(StoreFileTrackerFactory.TRACKER_IMPL)); 327 Class<? extends StoreFileTracker> newSft = 328 StoreFileTrackerFactory.getTrackerClass(newTd.getValue(StoreFileTrackerFactory.TRACKER_IMPL)); 329 if (oldSft.equals(newSft)) { 330 LOG.debug("old store file tracker {} is the same with new store file tracker, skip migration", 331 StoreFileTrackerFactory.getStoreFileTrackerName(oldSft)); 332 if (!oldTd.equals(newTd)) { 333 // we may change other things such as adding a new family, so here we still need to persist 334 // the new table descriptor 335 LOG.info("Update table descriptor from {} to {}", oldTd, newTd); 336 FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, newTd, true); 337 } 338 return; 339 } 340 LOG.info("Migrate store file tracker from {} to {}", oldSft.getSimpleName(), 341 newSft.getSimpleName()); 342 HRegionFileSystem hfs = 343 HRegionFileSystem.openRegionFromFileSystem(conf, fs, tableDir, regionInfo, false); 344 for (ColumnFamilyDescriptor oldCfd : oldTd.getColumnFamilies()) { 345 StoreFileTracker oldTracker = StoreFileTrackerFactory.create(conf, oldTd, oldCfd, hfs); 346 StoreFileTracker newTracker = StoreFileTrackerFactory.create(conf, oldTd, oldCfd, hfs); 347 List<StoreFileInfo> files = oldTracker.load(); 348 LOG.debug("Store file list for {}: {}", oldCfd.getNameAsString(), files); 349 newTracker.set(oldTracker.load()); 350 } 351 // persist the new table descriptor after migration 352 LOG.info("Update table descriptor from {} to {}", oldTd, newTd); 353 FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, newTd, true); 354 } 355 356 public static MasterRegion create(MasterRegionParams params) throws IOException { 357 TableDescriptor td = params.tableDescriptor(); 358 LOG.info("Create or load local region for table " + td); 359 Server server = params.server(); 360 Configuration baseConf = server.getConfiguration(); 361 FileSystem fs = CommonFSUtils.getRootDirFileSystem(baseConf); 362 FileSystem walFs = CommonFSUtils.getWALFileSystem(baseConf); 363 Path globalRootDir = CommonFSUtils.getRootDir(baseConf); 364 Path globalWALRootDir = CommonFSUtils.getWALRootDir(baseConf); 365 Path rootDir = new Path(globalRootDir, params.regionDirName()); 366 Path walRootDir = new Path(globalWALRootDir, params.regionDirName()); 367 // we will override some configurations so create a new one. 368 Configuration conf = new Configuration(baseConf); 369 CommonFSUtils.setRootDir(conf, rootDir); 370 CommonFSUtils.setWALRootDir(conf, walRootDir); 371 MasterRegionFlusherAndCompactor.setupConf(conf, params.flushSize(), params.flushPerChanges(), 372 params.flushIntervalMs()); 373 conf.setInt(AbstractFSWAL.MAX_LOGS, params.maxWals()); 374 if (params.useHsync() != null) { 375 conf.setBoolean(HRegion.WAL_HSYNC_CONF_KEY, params.useHsync()); 376 } 377 if (params.useMetaCellComparator() != null) { 378 conf.setBoolean(HRegion.USE_META_CELL_COMPARATOR, params.useMetaCellComparator()); 379 } 380 conf.setInt(AbstractFSWAL.RING_BUFFER_SLOT_COUNT, 381 IntMath.ceilingPowerOfTwo(params.ringBufferSlotCount())); 382 383 MasterRegionWALRoller walRoller = MasterRegionWALRoller.create( 384 td.getTableName() + "-WAL-Roller", conf, server, walFs, walRootDir, globalWALRootDir, 385 params.archivedWalSuffix(), params.rollPeriodMs(), params.flushSize()); 386 walRoller.start(); 387 388 WALFactory walFactory = new WALFactory(conf, server.getServerName().toString()); 389 Path tableDir = CommonFSUtils.getTableDir(rootDir, td.getTableName()); 390 Path initializingFlag = new Path(tableDir, INITIALIZING_FLAG); 391 Path initializedFlag = new Path(tableDir, INITIALIZED_FLAG); 392 HRegion region; 393 if (!fs.exists(tableDir)) { 394 // bootstrap, no doubt 395 if (!fs.mkdirs(initializedFlag)) { 396 throw new IOException("Can not touch initialized flag"); 397 } 398 region = bootstrap(conf, td, fs, rootDir, walFs, walRootDir, walFactory, walRoller, 399 server.getServerName().toString(), true); 400 } else { 401 if (!fs.exists(initializedFlag)) { 402 if (!fs.exists(initializingFlag)) { 403 // should be old style, where we do not have the initializing or initialized file, persist 404 // the table descriptor, touch the initialized flag and then open the region. 405 // the store file tracker must be DEFAULT 406 LOG.info("No {} or {} file, try upgrading", INITIALIZING_FLAG, INITIALIZED_FLAG); 407 TableDescriptor oldTd = 408 TableDescriptorBuilder.newBuilder(td).setValue(StoreFileTrackerFactory.TRACKER_IMPL, 409 StoreFileTrackerFactory.Trackers.DEFAULT.name()).build(); 410 FSTableDescriptors.createTableDescriptorForTableDirectory(fs, tableDir, oldTd, true); 411 if (!fs.mkdirs(initializedFlag)) { 412 throw new IOException("Can not touch initialized flag: " + initializedFlag); 413 } 414 RegionInfo regionInfo = loadRegionInfo(fs, tableDir); 415 tryMigrate(conf, fs, tableDir, regionInfo, oldTd, td); 416 region = open(conf, td, regionInfo, fs, rootDir, walFs, walRootDir, walFactory, walRoller, 417 server.getServerName().toString()); 418 } else { 419 // delete all contents besides the initializing flag, here we can make sure tableDir 420 // exists(unless someone delete it manually...), so we do not do null check here. 421 for (FileStatus status : fs.listStatus(tableDir)) { 422 if (!status.getPath().getName().equals(INITIALIZING_FLAG)) { 423 fs.delete(status.getPath(), true); 424 } 425 } 426 region = bootstrap(conf, td, fs, rootDir, walFs, walRootDir, walFactory, walRoller, 427 server.getServerName().toString(), false); 428 } 429 } else { 430 if (fs.exists(initializingFlag) && !fs.delete(initializingFlag, true)) { 431 LOG.warn("failed to clean up initializing flag: " + initializingFlag); 432 } 433 // open it, make sure to load the table descriptor from fs 434 TableDescriptor oldTd = FSTableDescriptors.getTableDescriptorFromFs(fs, tableDir); 435 RegionInfo regionInfo = loadRegionInfo(fs, tableDir); 436 tryMigrate(conf, fs, tableDir, regionInfo, oldTd, td); 437 region = open(conf, td, regionInfo, fs, rootDir, walFs, walRootDir, walFactory, walRoller, 438 server.getServerName().toString()); 439 } 440 } 441 442 Path globalArchiveDir = HFileArchiveUtil.getArchivePath(baseConf); 443 MasterRegionFlusherAndCompactor flusherAndCompactor = new MasterRegionFlusherAndCompactor(conf, 444 server, region, params.flushSize(), params.flushPerChanges(), params.flushIntervalMs(), 445 params.compactMin(), globalArchiveDir, params.archivedHFileSuffix()); 446 walRoller.setFlusherAndCompactor(flusherAndCompactor); 447 Path archiveDir = HFileArchiveUtil.getArchivePath(conf); 448 if (!fs.mkdirs(archiveDir)) { 449 LOG.warn("Failed to create archive directory {}. Usually this should not happen but it will" 450 + " be created again when we actually archive the hfiles later, so continue", archiveDir); 451 } 452 return new MasterRegion(region, walFactory, flusherAndCompactor, walRoller); 453 } 454}