@InterfaceAudience.Private public class HStoreFile extends Object implements StoreFile
StoreFileWriter.Builder
and
append data. Be sure to add any metadata before calling close on the Writer (Use the
appendMetadata convenience methods). On close, a StoreFile is sitting in the Filesystem. To refer
to it, create a StoreFile instance passing filesystem and path. To read, call
initReader()
StoreFiles may also reference store files in another Store. The reason for this weird pattern where you use a different instance for the writer and a reader is that we write once but read a lot more.
Modifier and Type | Field and Description |
---|---|
static byte[] |
BLOOM_FILTER_PARAM_KEY
Bloom filter param in FileInfo
|
static byte[] |
BLOOM_FILTER_TYPE_KEY
Bloom filter Type in FileInfo
|
static byte[] |
BULKLOAD_TASK_KEY
Meta key set when store file is a result of a bulk load
|
static byte[] |
BULKLOAD_TIME_KEY |
private CacheConfig |
cacheConf |
private BloomType |
cfBloomType
Bloom filter type specified in column family configuration.
|
private boolean |
compactedAway |
private Set<String> |
compactedStoreFiles |
static byte[] |
COMPACTION_EVENT_KEY
Key for compaction event which contains the compacted storefiles in FileInfo
|
private CellComparator |
comparator |
static byte[] |
DELETE_FAMILY_COUNT
Delete Family Count in FileInfo
|
static byte[] |
EARLIEST_PUT_TS
Key for timestamp of earliest-put in metadata
|
static byte[] |
EXCLUDE_FROM_MINOR_COMPACTION_KEY
Minor compaction flag in FileInfo
|
private boolean |
excludeFromMinorCompaction |
private StoreFileInfo |
fileInfo |
private Optional<Cell> |
firstKey |
private StoreFileReader |
initialReader |
private InputStreamBlockDistribution |
initialReaderBlockDistribution |
static byte[] |
LAST_BLOOM_KEY
Last Bloom filter key in FileInfo
|
private Optional<Cell> |
lastKey |
private static org.slf4j.Logger |
LOG |
static byte[] |
MAJOR_COMPACTION_KEY
Major compaction flag in FileInfo
|
private AtomicBoolean |
majorCompaction |
static byte[] |
MAX_SEQ_ID_KEY
Max Sequence ID in FileInfo
|
private long |
maxMemstoreTS |
private Map<byte[],byte[]> |
metadataMap
Map of the metadata entries in the corresponding HFile.
|
private BloomFilterMetrics |
metrics |
static byte[] |
MOB_CELLS_COUNT
Key for the number of mob cells in metadata
|
static byte[] |
MOB_FILE_REFS
Key for the list of MOB file references
|
static byte[] |
NULL_VALUE
Null data
|
private long |
sequenceid |
static byte[] |
SKIP_RESET_SEQ_ID
Key for skipping resetting sequence id in metadata.
|
static byte[] |
TIMERANGE_KEY
Key for Timerange information in metadata
|
Constructor and Description |
---|
HStoreFile(org.apache.hadoop.fs.FileSystem fs,
org.apache.hadoop.fs.Path p,
org.apache.hadoop.conf.Configuration conf,
CacheConfig cacheConf,
BloomType cfBloomType,
boolean primaryReplica)
Constructor, loads a reader and it's indices, etc.
|
HStoreFile(StoreFileInfo fileInfo,
BloomType cfBloomType,
CacheConfig cacheConf)
Constructor, loads a reader and it's indices, etc.
|
HStoreFile(StoreFileInfo fileInfo,
BloomType cfBloomType,
CacheConfig cacheConf,
BloomFilterMetrics metrics)
Constructor, loads a reader and it's indices, etc.
|
Modifier and Type | Method and Description |
---|---|
void |
closeStoreFile(boolean evictOnClose) |
private StoreFileReader |
createStreamReader(boolean canUseDropBehind) |
void |
deleteStoreFile()
Delete this file n
|
boolean |
excludeFromMinorCompaction()
Returns True if this file should not be part of a minor compaction.
|
OptionalLong |
getBulkLoadTimestamp()
Return the timestamp at which this bulk load file was generated.
|
CacheConfig |
getCacheConf() |
(package private) Set<String> |
getCompactedStoreFiles() |
CellComparator |
getComparator()
Get the comparator for comparing two cells.
|
org.apache.hadoop.fs.Path |
getEncodedPath()
Returns Encoded Path if this StoreFile was made with a Stream.
|
StoreFileInfo |
getFileInfo() |
Optional<Cell> |
getFirstKey()
Get the first key in this store file.
|
HDFSBlocksDistribution |
getHDFSBlockDistribution() |
Optional<Cell> |
getLastKey()
Get the last key in this store file.
|
OptionalLong |
getMaximumTimestamp()
Get the max timestamp of all the cells in the store file.
|
long |
getMaxMemStoreTS()
Get max of the MemstoreTS in the KV's in this store file.
|
long |
getMaxSequenceId()
Returns This files maximum edit sequence id.
|
byte[] |
getMetadataValue(byte[] key) |
OptionalLong |
getMinimumTimestamp()
Get the min timestamp of all the cells in the store file.
|
long |
getModificationTimestamp()
Get the modification time of this store file.
|
long |
getModificationTimeStamp()
Get the modification time of this store file.
|
org.apache.hadoop.fs.Path |
getPath()
Returns Path or null if this StoreFile was made with a Stream.
|
StoreFileScanner |
getPreadScanner(boolean cacheBlocks,
long readPt,
long scannerOrder,
boolean canOptimizeForNonNullColumn)
Get a scanner which uses pread.
|
org.apache.hadoop.fs.Path |
getQualifiedPath()
Returns Returns the qualified path of this StoreFile
|
StoreFileReader |
getReader() |
int |
getRefCount() |
StoreFileScanner |
getStreamScanner(boolean canUseDropBehind,
boolean cacheBlocks,
boolean isCompaction,
long readPt,
long scannerOrder,
boolean canOptimizeForNonNullColumn)
Get a scanner which uses streaming read.
|
void |
initReader()
Initialize the reader used for pread.
|
boolean |
isBulkLoadResult()
Check if this storefile was created by bulk load.
|
boolean |
isCompactedAway() |
boolean |
isHFile()
Returns True if this is HFile.
|
boolean |
isMajorCompactionResult()
Returns True if this file was made by a major compaction.
|
boolean |
isReference()
Returns True if this is a StoreFile Reference.
|
boolean |
isReferencedInReads()
Returns true if the file is still used in reads
|
private boolean |
isSkipResetSeqId(byte[] skipResetSeqId)
Gets whether to skip resetting the sequence id for cells.
|
void |
markCompactedAway() |
private void |
open()
Opens reader on this store file.
|
String |
toString() |
String |
toStringDetailed()
Returns a length description of this StoreFile, suitable for debug output
|
private static final org.slf4j.Logger LOG
public static final byte[] MAX_SEQ_ID_KEY
public static final byte[] MAJOR_COMPACTION_KEY
public static final byte[] EXCLUDE_FROM_MINOR_COMPACTION_KEY
public static final byte[] COMPACTION_EVENT_KEY
public static final byte[] BLOOM_FILTER_TYPE_KEY
public static final byte[] BLOOM_FILTER_PARAM_KEY
public static final byte[] DELETE_FAMILY_COUNT
public static final byte[] LAST_BLOOM_KEY
public static final byte[] TIMERANGE_KEY
public static final byte[] EARLIEST_PUT_TS
public static final byte[] MOB_CELLS_COUNT
public static final byte[] NULL_VALUE
public static final byte[] MOB_FILE_REFS
public static final byte[] BULKLOAD_TASK_KEY
public static final byte[] BULKLOAD_TIME_KEY
public static final byte[] SKIP_RESET_SEQ_ID
private final StoreFileInfo fileInfo
private volatile StoreFileReader initialReader
private volatile InputStreamBlockDistribution initialReaderBlockDistribution
private final CacheConfig cacheConf
private final BloomFilterMetrics metrics
private volatile boolean compactedAway
private long sequenceid
private long maxMemstoreTS
private CellComparator comparator
private AtomicBoolean majorCompaction
private boolean excludeFromMinorCompaction
private final Set<String> compactedStoreFiles
private Map<byte[],byte[]> metadataMap
private final BloomType cfBloomType
public HStoreFile(org.apache.hadoop.fs.FileSystem fs, org.apache.hadoop.fs.Path p, org.apache.hadoop.conf.Configuration conf, CacheConfig cacheConf, BloomType cfBloomType, boolean primaryReplica) throws IOException
fs
- The current file system to use.p
- The path of the file.conf
- The current configuration.cacheConf
- The cache configuration and block cache reference.cfBloomType
- The bloom type to use for this store file as specified by column family
configuration. This may or may not be the same as the Bloom filter type
actually present in the HFile, because column family configuration might
change. If this is BloomType.NONE
, the existing Bloom filter is
ignored.primaryReplica
- true if this is a store file for primary replica, otherwise false. nIOException
public HStoreFile(StoreFileInfo fileInfo, BloomType cfBloomType, CacheConfig cacheConf)
fileInfo
- The store file information.cfBloomType
- The bloom type to use for this store file as specified by column family
configuration. This may or may not be the same as the Bloom filter type
actually present in the HFile, because column family configuration might
change. If this is BloomType.NONE
, the existing Bloom filter is
ignored.cacheConf
- The cache configuration and block cache reference.public HStoreFile(StoreFileInfo fileInfo, BloomType cfBloomType, CacheConfig cacheConf, BloomFilterMetrics metrics)
fileInfo
- The store file information.cfBloomType
- The bloom type to use for this store file as specified by column family
configuration. This may or may not be the same as the Bloom filter type
actually present in the HFile, because column family configuration might
change. If this is BloomType.NONE
, the existing Bloom filter is
ignored.cacheConf
- The cache configuration and block cache reference.metrics
- Tracks bloom filter requests and results. May be null.public CacheConfig getCacheConf()
public Optional<Cell> getFirstKey()
StoreFile
getFirstKey
in interface StoreFile
public Optional<Cell> getLastKey()
StoreFile
getLastKey
in interface StoreFile
public CellComparator getComparator()
StoreFile
getComparator
in interface StoreFile
public long getMaxMemStoreTS()
StoreFile
getMaxMemStoreTS
in interface StoreFile
public StoreFileInfo getFileInfo()
public org.apache.hadoop.fs.Path getPath()
StoreFile
public org.apache.hadoop.fs.Path getEncodedPath()
StoreFile
getEncodedPath
in interface StoreFile
public org.apache.hadoop.fs.Path getQualifiedPath()
StoreFile
getQualifiedPath
in interface StoreFile
public boolean isReference()
StoreFile
isReference
in interface StoreFile
public boolean isHFile()
StoreFile
public boolean isMajorCompactionResult()
StoreFile
isMajorCompactionResult
in interface StoreFile
public boolean excludeFromMinorCompaction()
StoreFile
excludeFromMinorCompaction
in interface StoreFile
public long getMaxSequenceId()
StoreFile
getMaxSequenceId
in interface StoreFile
public long getModificationTimeStamp() throws IOException
StoreFile
getModificationTimeStamp
in interface StoreFile
IOException
StoreFile.getModificationTimestamp()
public long getModificationTimestamp() throws IOException
StoreFile
getModificationTimestamp
in interface StoreFile
IOException
public byte[] getMetadataValue(byte[] key)
key
- to look uppublic boolean isBulkLoadResult()
StoreFile
'_SeqId_<id-when-loaded>'
to the hfile name, unless
"hbase.mapreduce.bulkload.assign.sequenceNumbers" is explicitly turned off. If
"hbase.mapreduce.bulkload.assign.sequenceNumbers" is turned off, fall back to
BULKLOAD_TIME_KEY.isBulkLoadResult
in interface StoreFile
public boolean isCompactedAway()
public int getRefCount()
public boolean isReferencedInReads()
public OptionalLong getBulkLoadTimestamp()
StoreFile
getBulkLoadTimestamp
in interface StoreFile
public HDFSBlocksDistribution getHDFSBlockDistribution()
private void open() throws IOException
IOException
closeStoreFile(boolean)
public void initReader() throws IOException
IOException
private StoreFileReader createStreamReader(boolean canUseDropBehind) throws IOException
IOException
public StoreFileScanner getPreadScanner(boolean cacheBlocks, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn)
Must be called after initReader.
public StoreFileScanner getStreamScanner(boolean canUseDropBehind, boolean cacheBlocks, boolean isCompaction, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) throws IOException
Must be called after initReader.
IOException
public StoreFileReader getReader()
initReader()
public void closeStoreFile(boolean evictOnClose) throws IOException
evictOnClose
- whether to evict blocks belonging to this file nIOException
public void deleteStoreFile() throws IOException
IOException
public void markCompactedAway()
public String toStringDetailed()
StoreFile
toStringDetailed
in interface StoreFile
private boolean isSkipResetSeqId(byte[] skipResetSeqId)
skipResetSeqId
- The byte array of boolean.public OptionalLong getMinimumTimestamp()
StoreFile
getMinimumTimestamp
in interface StoreFile
public OptionalLong getMaximumTimestamp()
StoreFile
getMaximumTimestamp
in interface StoreFile
Set<String> getCompactedStoreFiles()
Copyright © 2007–2020 The Apache Software Foundation. All rights reserved.