@InterfaceAudience.Private public class HStoreFile extends Object implements StoreFile
StoreFileWriter.Builder
and append data. Be sure to add any metadata before calling close on the
Writer (Use the appendMetadata convenience methods). On close, a StoreFile
is sitting in the Filesystem. To refer to it, create a StoreFile instance
passing filesystem and path. To read, call initReader()
StoreFiles may also reference store files in another Store. The reason for this weird pattern where you use a different instance for the writer and a reader is that we write once but read a lot more.
Modifier and Type | Field and Description |
---|---|
static byte[] |
BLOOM_FILTER_PARAM_KEY
Bloom filter param in FileInfo
|
static byte[] |
BLOOM_FILTER_TYPE_KEY
Bloom filter Type in FileInfo
|
static byte[] |
BULKLOAD_TASK_KEY
Meta key set when store file is a result of a bulk load
|
static byte[] |
BULKLOAD_TIME_KEY |
private CacheConfig |
cacheConf |
private BloomType |
cfBloomType
Bloom filter type specified in column family configuration.
|
private boolean |
compactedAway |
private Set<String> |
compactedStoreFiles |
static byte[] |
COMPACTION_EVENT_KEY
Key for compaction event which contains the compacted storefiles in FileInfo
|
private CellComparator |
comparator |
private static boolean |
DEFAULT_STORE_FILE_READER_NO_READAHEAD |
static byte[] |
DELETE_FAMILY_COUNT
Delete Family Count in FileInfo
|
static byte[] |
EARLIEST_PUT_TS
Key for timestamp of earliest-put in metadata
|
static byte[] |
EXCLUDE_FROM_MINOR_COMPACTION_KEY
Minor compaction flag in FileInfo
|
private boolean |
excludeFromMinorCompaction |
private StoreFileInfo |
fileInfo |
private Optional<Cell> |
firstKey |
private org.apache.hadoop.fs.FileSystem |
fs |
static byte[] |
LAST_BLOOM_KEY
Last Bloom filter key in FileInfo
|
private Optional<Cell> |
lastKey |
private static org.slf4j.Logger |
LOG |
static byte[] |
MAJOR_COMPACTION_KEY
Major compaction flag in FileInfo
|
private AtomicBoolean |
majorCompaction |
static byte[] |
MAX_SEQ_ID_KEY
Max Sequence ID in FileInfo
|
private long |
maxMemstoreTS |
private Map<byte[],byte[]> |
metadataMap
Map of the metadata entries in the corresponding HFile.
|
static byte[] |
MOB_CELLS_COUNT
Key for the number of mob cells in metadata
|
private boolean |
noReadahead |
private boolean |
primaryReplica |
private StoreFileReader |
reader |
private AtomicInteger |
refCount |
private long |
sequenceid |
static byte[] |
SKIP_RESET_SEQ_ID
Key for skipping resetting sequence id in metadata.
|
static String |
STORE_FILE_READER_NO_READAHEAD |
static byte[] |
TIMERANGE_KEY
Key for Timerange information in metadata
|
Constructor and Description |
---|
HStoreFile(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path p,
org.apache.hadoop.conf.Configuration conf,
CacheConfig cacheConf,
BloomType cfBloomType,
boolean primaryReplica)
Constructor, loads a reader and it's indices, etc.
|
HStoreFile(org.apache.hadoop.fs.FileSystem fs,
StoreFileInfo fileInfo,
org.apache.hadoop.conf.Configuration conf,
CacheConfig cacheConf,
BloomType cfBloomType,
boolean primaryReplica)
Constructor, loads a reader and it's indices, etc.
|
Modifier and Type | Method and Description |
---|---|
void |
closeStoreFile(boolean evictOnClose) |
private StoreFileReader |
createStreamReader(boolean canUseDropBehind) |
void |
deleteStoreFile()
Delete this file
|
boolean |
excludeFromMinorCompaction() |
OptionalLong |
getBulkLoadTimestamp()
Return the timestamp at which this bulk load file was generated.
|
CacheConfig |
getCacheConf() |
(package private) Set<String> |
getCompactedStoreFiles() |
CellComparator |
getComparator()
Get the comparator for comparing two cells.
|
org.apache.hadoop.fs.Path |
getEncodedPath() |
StoreFileInfo |
getFileInfo() |
Optional<Cell> |
getFirstKey()
Get the first key in this store file.
|
HDFSBlocksDistribution |
getHDFSBlockDistribution() |
Optional<Cell> |
getLastKey()
Get the last key in this store file.
|
OptionalLong |
getMaximumTimestamp()
Get the max timestamp of all the cells in the store file.
|
long |
getMaxMemStoreTS()
Get max of the MemstoreTS in the KV's in this store file.
|
long |
getMaxSequenceId() |
byte[] |
getMetadataValue(byte[] key)
Only used by the Striped Compaction Policy
|
OptionalLong |
getMinimumTimestamp()
Get the min timestamp of all the cells in the store file.
|
long |
getModificationTimestamp()
Get the modification time of this store file.
|
long |
getModificationTimeStamp()
Get the modification time of this store file.
|
org.apache.hadoop.fs.Path |
getPath() |
StoreFileScanner |
getPreadScanner(boolean cacheBlocks,
long readPt,
long scannerOrder,
boolean canOptimizeForNonNullColumn)
Get a scanner which uses pread.
|
org.apache.hadoop.fs.Path |
getQualifiedPath() |
StoreFileReader |
getReader() |
int |
getRefCount() |
StoreFileScanner |
getStreamScanner(boolean canUseDropBehind,
boolean cacheBlocks,
boolean isCompaction,
long readPt,
long scannerOrder,
boolean canOptimizeForNonNullColumn)
Get a scanner which uses streaming read.
|
void |
initReader()
Initialize the reader used for pread.
|
boolean |
isBulkLoadResult()
Check if this storefile was created by bulk load.
|
boolean |
isCompactedAway() |
boolean |
isHFile() |
boolean |
isMajorCompactionResult() |
boolean |
isReference() |
boolean |
isReferencedInReads() |
private boolean |
isSkipResetSeqId(byte[] skipResetSeqId)
Gets whether to skip resetting the sequence id for cells.
|
void |
markCompactedAway() |
private void |
open()
Opens reader on this store file.
|
String |
toString() |
String |
toStringDetailed() |
private static final org.slf4j.Logger LOG
public static final String STORE_FILE_READER_NO_READAHEAD
private static final boolean DEFAULT_STORE_FILE_READER_NO_READAHEAD
public static final byte[] MAX_SEQ_ID_KEY
public static final byte[] MAJOR_COMPACTION_KEY
public static final byte[] EXCLUDE_FROM_MINOR_COMPACTION_KEY
public static final byte[] COMPACTION_EVENT_KEY
public static final byte[] BLOOM_FILTER_TYPE_KEY
public static final byte[] BLOOM_FILTER_PARAM_KEY
public static final byte[] DELETE_FAMILY_COUNT
public static final byte[] LAST_BLOOM_KEY
public static final byte[] TIMERANGE_KEY
public static final byte[] EARLIEST_PUT_TS
public static final byte[] MOB_CELLS_COUNT
public static final byte[] BULKLOAD_TASK_KEY
public static final byte[] BULKLOAD_TIME_KEY
public static final byte[] SKIP_RESET_SEQ_ID
private final StoreFileInfo fileInfo
private final org.apache.hadoop.fs.FileSystem fs
private final CacheConfig cacheConf
private final AtomicInteger refCount
private final boolean noReadahead
private final boolean primaryReplica
private volatile boolean compactedAway
private long sequenceid
private long maxMemstoreTS
private CellComparator comparator
private AtomicBoolean majorCompaction
private boolean excludeFromMinorCompaction
private final Set<String> compactedStoreFiles
private Map<byte[],byte[]> metadataMap
private volatile StoreFileReader reader
private final BloomType cfBloomType
public HStoreFile(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path p, org.apache.hadoop.conf.Configuration conf, CacheConfig cacheConf, BloomType cfBloomType, boolean primaryReplica) throws IOException
fs
- The current file system to use.p
- The path of the file.conf
- The current configuration.cacheConf
- The cache configuration and block cache reference.cfBloomType
- The bloom type to use for this store file as specified by column family
configuration. This may or may not be the same as the Bloom filter type actually
present in the HFile, because column family configuration might change. If this is
BloomType.NONE
, the existing Bloom filter is ignored.primaryReplica
- true if this is a store file for primary replica, otherwise false.IOException
public HStoreFile(org.apache.hadoop.fs.FileSystem fs, StoreFileInfo fileInfo, org.apache.hadoop.conf.Configuration conf, CacheConfig cacheConf, BloomType cfBloomType, boolean primaryReplica)
fs
- fs The current file system to use.fileInfo
- The store file information.conf
- The current configuration.cacheConf
- The cache configuration and block cache reference.cfBloomType
- The bloom type to use for this store file as specified by column
family configuration. This may or may not be the same as the Bloom filter type
actually present in the HFile, because column family configuration might change. If
this is BloomType.NONE
, the existing Bloom filter is ignored.primaryReplica
- true if this is a store file for primary replica, otherwise false.public CacheConfig getCacheConf()
public Optional<Cell> getFirstKey()
StoreFile
getFirstKey
in interface StoreFile
public Optional<Cell> getLastKey()
StoreFile
getLastKey
in interface StoreFile
public CellComparator getComparator()
StoreFile
getComparator
in interface StoreFile
public long getMaxMemStoreTS()
StoreFile
getMaxMemStoreTS
in interface StoreFile
public StoreFileInfo getFileInfo()
public org.apache.hadoop.fs.Path getPath()
public org.apache.hadoop.fs.Path getEncodedPath()
getEncodedPath
in interface StoreFile
public org.apache.hadoop.fs.Path getQualifiedPath()
getQualifiedPath
in interface StoreFile
public boolean isReference()
isReference
in interface StoreFile
public boolean isHFile()
public boolean isMajorCompactionResult()
isMajorCompactionResult
in interface StoreFile
public boolean excludeFromMinorCompaction()
excludeFromMinorCompaction
in interface StoreFile
public long getMaxSequenceId()
getMaxSequenceId
in interface StoreFile
public long getModificationTimeStamp() throws IOException
StoreFile
getModificationTimeStamp
in interface StoreFile
IOException
StoreFile.getModificationTimestamp()
public long getModificationTimestamp() throws IOException
StoreFile
getModificationTimestamp
in interface StoreFile
IOException
public byte[] getMetadataValue(byte[] key)
key
- public boolean isBulkLoadResult()
StoreFile
'_SeqId_<id-when-loaded>'
to the hfile name, unless
"hbase.mapreduce.bulkload.assign.sequenceNumbers" is explicitly turned off. If
"hbase.mapreduce.bulkload.assign.sequenceNumbers" is turned off, fall back to
BULKLOAD_TIME_KEY.isBulkLoadResult
in interface StoreFile
public boolean isCompactedAway()
public int getRefCount()
public boolean isReferencedInReads()
public OptionalLong getBulkLoadTimestamp()
StoreFile
getBulkLoadTimestamp
in interface StoreFile
public HDFSBlocksDistribution getHDFSBlockDistribution()
private void open() throws IOException
IOException
closeStoreFile(boolean)
public void initReader() throws IOException
IOException
private StoreFileReader createStreamReader(boolean canUseDropBehind) throws IOException
IOException
public StoreFileScanner getPreadScanner(boolean cacheBlocks, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn)
Must be called after initReader.
public StoreFileScanner getStreamScanner(boolean canUseDropBehind, boolean cacheBlocks, boolean isCompaction, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) throws IOException
Must be called after initReader.
IOException
public StoreFileReader getReader()
initReader()
public void closeStoreFile(boolean evictOnClose) throws IOException
evictOnClose
- whether to evict blocks belonging to this fileIOException
public void deleteStoreFile() throws IOException
IOException
public void markCompactedAway()
public String toStringDetailed()
toStringDetailed
in interface StoreFile
private boolean isSkipResetSeqId(byte[] skipResetSeqId)
skipResetSeqId
- The byte array of boolean.public OptionalLong getMinimumTimestamp()
StoreFile
getMinimumTimestamp
in interface StoreFile
public OptionalLong getMaximumTimestamp()
StoreFile
getMaximumTimestamp
in interface StoreFile
Set<String> getCompactedStoreFiles()
Copyright © 2007–2020 The Apache Software Foundation. All rights reserved.