001/** 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.wal; 019 020import java.io.FileNotFoundException; 021import java.io.IOException; 022import java.util.ArrayList; 023import java.util.Collections; 024import java.util.List; 025import java.util.Objects; 026import java.util.concurrent.atomic.AtomicBoolean; 027import java.util.concurrent.locks.ReadWriteLock; 028import java.util.concurrent.locks.ReentrantReadWriteLock; 029import java.util.regex.Pattern; 030import org.apache.hadoop.conf.Configuration; 031import org.apache.hadoop.fs.FSDataInputStream; 032import org.apache.hadoop.fs.FileSystem; 033import org.apache.hadoop.fs.Path; 034import org.apache.hadoop.hbase.HConstants; 035import org.apache.hadoop.hbase.ServerName; 036import org.apache.hadoop.hbase.client.RegionInfo; 037import org.apache.hadoop.hbase.regionserver.wal.AbstractFSWAL; 038import org.apache.hadoop.hbase.regionserver.wal.WALActionsListener; 039import org.apache.hadoop.hbase.util.CancelableProgressable; 040import org.apache.hadoop.hbase.util.FSUtils; 041import org.apache.hadoop.hbase.util.LeaseNotRecoveredException; 042import org.apache.yetus.audience.InterfaceAudience; 043import org.apache.yetus.audience.InterfaceStability; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; 048import org.apache.hbase.thirdparty.com.google.common.collect.Lists; 049 050/** 051 * Base class of a WAL Provider that returns a single thread safe WAL that writes to Hadoop FS. By 052 * default, this implementation picks a directory in Hadoop FS based on a combination of 053 * <ul> 054 * <li>the HBase root directory 055 * <li>HConstants.HREGION_LOGDIR_NAME 056 * <li>the given factory's factoryId (usually identifying the regionserver by host:port) 057 * </ul> 058 * It also uses the providerId to differentiate among files. 059 */ 060@InterfaceAudience.Private 061@InterfaceStability.Evolving 062public abstract class AbstractFSWALProvider<T extends AbstractFSWAL<?>> implements WALProvider { 063 064 private static final Logger LOG = LoggerFactory.getLogger(AbstractFSWALProvider.class); 065 066 /** Separate old log into different dir by regionserver name **/ 067 public static final String SEPARATE_OLDLOGDIR = "hbase.separate.oldlogdir.by.regionserver"; 068 public static final boolean DEFAULT_SEPARATE_OLDLOGDIR = false; 069 070 // Only public so classes back in regionserver.wal can access 071 public interface Reader extends WAL.Reader { 072 /** 073 * @param fs File system. 074 * @param path Path. 075 * @param c Configuration. 076 * @param s Input stream that may have been pre-opened by the caller; may be null. 077 */ 078 void init(FileSystem fs, Path path, Configuration c, FSDataInputStream s) throws IOException; 079 } 080 081 protected volatile T wal; 082 protected WALFactory factory; 083 protected Configuration conf; 084 protected List<WALActionsListener> listeners = new ArrayList<>(); 085 protected String providerId; 086 protected AtomicBoolean initialized = new AtomicBoolean(false); 087 // for default wal provider, logPrefix won't change 088 protected String logPrefix; 089 090 /** 091 * We use walCreateLock to prevent wal recreation in different threads, and also prevent getWALs 092 * missing the newly created WAL, see HBASE-21503 for more details. 093 */ 094 private final ReadWriteLock walCreateLock = new ReentrantReadWriteLock(); 095 096 /** 097 * @param factory factory that made us, identity used for FS layout. may not be null 098 * @param conf may not be null 099 * @param providerId differentiate between providers from one factory, used for FS layout. may be 100 * null 101 */ 102 @Override 103 public void init(WALFactory factory, Configuration conf, String providerId) throws IOException { 104 if (!initialized.compareAndSet(false, true)) { 105 throw new IllegalStateException("WALProvider.init should only be called once."); 106 } 107 this.factory = factory; 108 this.conf = conf; 109 this.providerId = providerId; 110 // get log prefix 111 StringBuilder sb = new StringBuilder().append(factory.factoryId); 112 if (providerId != null) { 113 if (providerId.startsWith(WAL_FILE_NAME_DELIMITER)) { 114 sb.append(providerId); 115 } else { 116 sb.append(WAL_FILE_NAME_DELIMITER).append(providerId); 117 } 118 } 119 logPrefix = sb.toString(); 120 doInit(conf); 121 } 122 123 @Override 124 public List<WAL> getWALs() { 125 if (wal != null) { 126 return Lists.newArrayList(wal); 127 } 128 walCreateLock.readLock().lock(); 129 try { 130 if (wal == null) { 131 return Collections.emptyList(); 132 } else { 133 return Lists.newArrayList(wal); 134 } 135 } finally { 136 walCreateLock.readLock().unlock(); 137 } 138 } 139 140 @Override 141 public T getWAL(RegionInfo region) throws IOException { 142 T walCopy = wal; 143 if (walCopy != null) { 144 return walCopy; 145 } 146 walCreateLock.writeLock().lock(); 147 try { 148 walCopy = wal; 149 if (walCopy != null) { 150 return walCopy; 151 } 152 walCopy = createWAL(); 153 wal = walCopy; 154 return walCopy; 155 } finally { 156 walCreateLock.writeLock().unlock(); 157 } 158 } 159 160 protected abstract T createWAL() throws IOException; 161 162 protected abstract void doInit(Configuration conf) throws IOException; 163 164 @Override 165 public void shutdown() throws IOException { 166 T log = this.wal; 167 if (log != null) { 168 log.shutdown(); 169 } 170 } 171 172 @Override 173 public void close() throws IOException { 174 T log = this.wal; 175 if (log != null) { 176 log.close(); 177 } 178 } 179 180 /** 181 * iff the given WALFactory is using the DefaultWALProvider for meta and/or non-meta, count the 182 * number of files (rolled and active). if either of them aren't, count 0 for that provider. 183 */ 184 @Override 185 public long getNumLogFiles() { 186 T log = this.wal; 187 return log == null ? 0 : log.getNumLogFiles(); 188 } 189 190 /** 191 * iff the given WALFactory is using the DefaultWALProvider for meta and/or non-meta, count the 192 * size of files (only rolled). if either of them aren't, count 0 for that provider. 193 */ 194 @Override 195 public long getLogFileSize() { 196 T log = this.wal; 197 return log == null ? 0 : log.getLogFileSize(); 198 } 199 200 /** 201 * returns the number of rolled WAL files. 202 */ 203 @VisibleForTesting 204 public static int getNumRolledLogFiles(WAL wal) { 205 return ((AbstractFSWAL<?>) wal).getNumRolledLogFiles(); 206 } 207 208 /** 209 * returns the size of rolled WAL files. 210 */ 211 @VisibleForTesting 212 public static long getLogFileSize(WAL wal) { 213 return ((AbstractFSWAL<?>) wal).getLogFileSize(); 214 } 215 216 /** 217 * return the current filename from the current wal. 218 */ 219 @VisibleForTesting 220 public static Path getCurrentFileName(final WAL wal) { 221 return ((AbstractFSWAL<?>) wal).getCurrentFileName(); 222 } 223 224 /** 225 * request a log roll, but don't actually do it. 226 */ 227 @VisibleForTesting 228 static void requestLogRoll(final WAL wal) { 229 ((AbstractFSWAL<?>) wal).requestLogRoll(); 230 } 231 232 // should be package private; more visible for use in AbstractFSWAL 233 public static final String WAL_FILE_NAME_DELIMITER = "."; 234 /** The hbase:meta region's WAL filename extension */ 235 @VisibleForTesting 236 public static final String META_WAL_PROVIDER_ID = ".meta"; 237 static final String DEFAULT_PROVIDER_ID = "default"; 238 239 // Implementation details that currently leak in tests or elsewhere follow 240 /** File Extension used while splitting an WAL into regions (HBASE-2312) */ 241 public static final String SPLITTING_EXT = "-splitting"; 242 243 /** 244 * It returns the file create timestamp from the file name. For name format see 245 * {@link #validateWALFilename(String)} public until remaining tests move to o.a.h.h.wal 246 * @param wal must not be null 247 * @return the file number that is part of the WAL file name 248 */ 249 @VisibleForTesting 250 public static long extractFileNumFromWAL(final WAL wal) { 251 final Path walName = ((AbstractFSWAL<?>) wal).getCurrentFileName(); 252 if (walName == null) { 253 throw new IllegalArgumentException("The WAL path couldn't be null"); 254 } 255 final String[] walPathStrs = walName.toString().split("\\" + WAL_FILE_NAME_DELIMITER); 256 return Long.parseLong(walPathStrs[walPathStrs.length - (isMetaFile(walName) ? 2 : 1)]); 257 } 258 259 /** 260 * Pattern used to validate a WAL file name see {@link #validateWALFilename(String)} for 261 * description. 262 */ 263 private static final Pattern pattern = 264 Pattern.compile(".*\\.\\d*(" + META_WAL_PROVIDER_ID + ")*"); 265 266 /** 267 * A WAL file name is of the format: <wal-name>{@link #WAL_FILE_NAME_DELIMITER} 268 * <file-creation-timestamp>[.meta]. provider-name is usually made up of a server-name and a 269 * provider-id 270 * @param filename name of the file to validate 271 * @return <tt>true</tt> if the filename matches an WAL, <tt>false</tt> otherwise 272 */ 273 public static boolean validateWALFilename(String filename) { 274 return pattern.matcher(filename).matches(); 275 } 276 277 /** 278 * Construct the directory name for all WALs on a given server. Dir names currently look like this 279 * for WALs: <code>hbase//WALs/kalashnikov.att.net,61634,1486865297088</code>. 280 * @param serverName Server name formatted as described in {@link ServerName} 281 * @return the relative WAL directory name, e.g. <code>.logs/1.example.org,60030,12345</code> if 282 * <code>serverName</code> passed is <code>1.example.org,60030,12345</code> 283 */ 284 public static String getWALDirectoryName(final String serverName) { 285 StringBuilder dirName = new StringBuilder(HConstants.HREGION_LOGDIR_NAME); 286 dirName.append("/"); 287 dirName.append(serverName); 288 return dirName.toString(); 289 } 290 291 /** 292 * Construct the directory name for all old WALs on a given server. The default old WALs dir looks 293 * like: <code>hbase/oldWALs</code>. If you config hbase.separate.oldlogdir.by.regionserver to 294 * true, it looks like <code>hbase//oldWALs/kalashnikov.att.net,61634,1486865297088</code>. 295 * @param conf 296 * @param serverName Server name formatted as described in {@link ServerName} 297 * @return the relative WAL directory name 298 */ 299 public static String getWALArchiveDirectoryName(Configuration conf, final String serverName) { 300 StringBuilder dirName = new StringBuilder(HConstants.HREGION_OLDLOGDIR_NAME); 301 if (conf.getBoolean(SEPARATE_OLDLOGDIR, DEFAULT_SEPARATE_OLDLOGDIR)) { 302 dirName.append(Path.SEPARATOR); 303 dirName.append(serverName); 304 } 305 return dirName.toString(); 306 } 307 308 /** 309 * Pulls a ServerName out of a Path generated according to our layout rules. In the below layouts, 310 * this method ignores the format of the logfile component. Current format: [base directory for 311 * hbase]/hbase/.logs/ServerName/logfile or [base directory for 312 * hbase]/hbase/.logs/ServerName-splitting/logfile Expected to work for individual log files and 313 * server-specific directories. 314 * @return null if it's not a log file. Returns the ServerName of the region server that created 315 * this log file otherwise. 316 */ 317 public static ServerName getServerNameFromWALDirectoryName(Configuration conf, String path) 318 throws IOException { 319 if (path == null || path.length() <= HConstants.HREGION_LOGDIR_NAME.length()) { 320 return null; 321 } 322 323 if (conf == null) { 324 throw new IllegalArgumentException("parameter conf must be set"); 325 } 326 327 final String rootDir = conf.get(HConstants.HBASE_DIR); 328 if (rootDir == null || rootDir.isEmpty()) { 329 throw new IllegalArgumentException(HConstants.HBASE_DIR + " key not found in conf."); 330 } 331 332 final StringBuilder startPathSB = new StringBuilder(rootDir); 333 if (!rootDir.endsWith("/")) { 334 startPathSB.append('/'); 335 } 336 startPathSB.append(HConstants.HREGION_LOGDIR_NAME); 337 if (!HConstants.HREGION_LOGDIR_NAME.endsWith("/")) { 338 startPathSB.append('/'); 339 } 340 final String startPath = startPathSB.toString(); 341 342 String fullPath; 343 try { 344 fullPath = FileSystem.get(conf).makeQualified(new Path(path)).toString(); 345 } catch (IllegalArgumentException e) { 346 LOG.info("Call to makeQualified failed on " + path + " " + e.getMessage()); 347 return null; 348 } 349 350 if (!fullPath.startsWith(startPath)) { 351 return null; 352 } 353 354 final String serverNameAndFile = fullPath.substring(startPath.length()); 355 356 if (serverNameAndFile.indexOf('/') < "a,0,0".length()) { 357 // Either it's a file (not a directory) or it's not a ServerName format 358 return null; 359 } 360 361 Path p = new Path(path); 362 return getServerNameFromWALDirectoryName(p); 363 } 364 365 /** 366 * This function returns region server name from a log file name which is in one of the following 367 * formats: 368 * <ul> 369 * <li>hdfs://<name node>/hbase/.logs/<server name>-splitting/...</li> 370 * <li>hdfs://<name node>/hbase/.logs/<server name>/...</li> 371 * </ul> 372 * @return null if the passed in logFile isn't a valid WAL file path 373 */ 374 public static ServerName getServerNameFromWALDirectoryName(Path logFile) { 375 String logDirName = logFile.getParent().getName(); 376 // We were passed the directory and not a file in it. 377 if (logDirName.equals(HConstants.HREGION_LOGDIR_NAME)) { 378 logDirName = logFile.getName(); 379 } 380 ServerName serverName = null; 381 if (logDirName.endsWith(SPLITTING_EXT)) { 382 logDirName = logDirName.substring(0, logDirName.length() - SPLITTING_EXT.length()); 383 } 384 try { 385 serverName = ServerName.parseServerName(logDirName); 386 } catch (IllegalArgumentException | IllegalStateException ex) { 387 serverName = null; 388 LOG.warn("Cannot parse a server name from path=" + logFile + "; " + ex.getMessage()); 389 } 390 if (serverName != null && serverName.getStartcode() < 0) { 391 LOG.warn("Invalid log file path=" + logFile); 392 serverName = null; 393 } 394 return serverName; 395 } 396 397 public static boolean isMetaFile(Path p) { 398 return isMetaFile(p.getName()); 399 } 400 401 public static boolean isMetaFile(String p) { 402 if (p != null && p.endsWith(META_WAL_PROVIDER_ID)) { 403 return true; 404 } 405 return false; 406 } 407 408 public static boolean isArchivedLogFile(Path p) { 409 String oldLog = Path.SEPARATOR + HConstants.HREGION_OLDLOGDIR_NAME + Path.SEPARATOR; 410 return p.toString().contains(oldLog); 411 } 412 413 /** 414 * Get the archived WAL file path 415 * @param path - active WAL file path 416 * @param conf - configuration 417 * @return archived path if exists, path - otherwise 418 * @throws IOException exception 419 */ 420 public static Path getArchivedLogPath(Path path, Configuration conf) throws IOException { 421 Path walRootDir = FSUtils.getWALRootDir(conf); 422 Path oldLogDir = new Path(walRootDir, HConstants.HREGION_OLDLOGDIR_NAME); 423 if (conf.getBoolean(SEPARATE_OLDLOGDIR, DEFAULT_SEPARATE_OLDLOGDIR)) { 424 ServerName serverName = getServerNameFromWALDirectoryName(path); 425 if (serverName == null) { 426 LOG.error("Couldn't locate log: " + path); 427 return path; 428 } 429 oldLogDir = new Path(oldLogDir, serverName.getServerName()); 430 } 431 Path archivedLogLocation = new Path(oldLogDir, path.getName()); 432 final FileSystem fs = FSUtils.getWALFileSystem(conf); 433 434 if (fs.exists(archivedLogLocation)) { 435 LOG.info("Log " + path + " was moved to " + archivedLogLocation); 436 return archivedLogLocation; 437 } else { 438 LOG.error("Couldn't locate log: " + path); 439 return path; 440 } 441 } 442 443 /** 444 * Opens WAL reader with retries and additional exception handling 445 * @param path path to WAL file 446 * @param conf configuration 447 * @return WAL Reader instance 448 * @throws IOException 449 */ 450 public static org.apache.hadoop.hbase.wal.WAL.Reader openReader(Path path, Configuration conf) 451 throws IOException 452 453 { 454 long retryInterval = 2000; // 2 sec 455 int maxAttempts = 30; 456 int attempt = 0; 457 Exception ee = null; 458 org.apache.hadoop.hbase.wal.WAL.Reader reader = null; 459 while (reader == null && attempt++ < maxAttempts) { 460 try { 461 // Detect if this is a new file, if so get a new reader else 462 // reset the current reader so that we see the new data 463 reader = WALFactory.createReader(path.getFileSystem(conf), path, conf); 464 return reader; 465 } catch (FileNotFoundException fnfe) { 466 // If the log was archived, continue reading from there 467 Path archivedLog = AbstractFSWALProvider.getArchivedLogPath(path, conf); 468 if (!Objects.equals(path, archivedLog)) { 469 return openReader(archivedLog, conf); 470 } else { 471 throw fnfe; 472 } 473 } catch (LeaseNotRecoveredException lnre) { 474 // HBASE-15019 the WAL was not closed due to some hiccup. 475 LOG.warn("Try to recover the WAL lease " + path, lnre); 476 recoverLease(conf, path); 477 reader = null; 478 ee = lnre; 479 } catch (NullPointerException npe) { 480 // Workaround for race condition in HDFS-4380 481 // which throws a NPE if we open a file before any data node has the most recent block 482 // Just sleep and retry. Will require re-reading compressed WALs for compressionContext. 483 LOG.warn("Got NPE opening reader, will retry."); 484 reader = null; 485 ee = npe; 486 } 487 if (reader == null) { 488 // sleep before next attempt 489 try { 490 Thread.sleep(retryInterval); 491 } catch (InterruptedException e) { 492 } 493 } 494 } 495 throw new IOException("Could not open reader", ee); 496 } 497 498 // For HBASE-15019 499 private static void recoverLease(final Configuration conf, final Path path) { 500 try { 501 final FileSystem dfs = FSUtils.getCurrentFileSystem(conf); 502 FSUtils fsUtils = FSUtils.getInstance(dfs, conf); 503 fsUtils.recoverFileLease(dfs, path, conf, new CancelableProgressable() { 504 @Override 505 public boolean progress() { 506 LOG.debug("Still trying to recover WAL lease: " + path); 507 return true; 508 } 509 }); 510 } catch (IOException e) { 511 LOG.warn("unable to recover lease for WAL: " + path, e); 512 } 513 } 514 515 @Override 516 public void addWALActionsListener(WALActionsListener listener) { 517 listeners.add(listener); 518 } 519 520 /** 521 * Get prefix of the log from its name, assuming WAL name in format of 522 * log_prefix.filenumber.log_suffix 523 * @param name Name of the WAL to parse 524 * @return prefix of the log 525 * @see AbstractFSWAL#getCurrentFileName() 526 */ 527 public static String getWALPrefixFromWALName(String name) { 528 int endIndex = name.replaceAll(META_WAL_PROVIDER_ID, "").lastIndexOf("."); 529 return name.substring(0, endIndex); 530 } 531}