@InterfaceAudience.Private public class TableSnapshotInputFormatImpl extends Object
| Modifier and Type | Class and Description | 
|---|---|
| static class  | TableSnapshotInputFormatImpl.InputSplitImplementation class for InputSplit logic common between mapred and mapreduce. | 
| static class  | TableSnapshotInputFormatImpl.RecordReaderImplementation class for RecordReader logic common between mapred and mapreduce. | 
| Modifier and Type | Field and Description | 
|---|---|
| private static float | DEFAULT_LOCALITY_CUTOFF_MULTIPLIER | 
| private static String | LOCALITY_CUTOFF_MULTIPLIER | 
| static org.slf4j.Logger | LOG | 
| static String | NUM_SPLITS_PER_REGIONFor MapReduce jobs running multiple mappers per region, determines
 number of splits to generate per region. | 
| protected static String | RESTORE_DIR_KEY | 
| static boolean | SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT | 
| static String | SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_KEYWhether to calculate the block location for splits. | 
| static String | SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLITIn some scenario, scan limited rows on each InputSplit for sampling data extraction | 
| private static String | SNAPSHOT_NAME_KEY | 
| static String | SPLIT_ALGOFor MapReduce jobs running multiple mappers per region, determines
 what split algorithm we should be using to find split points for scanners. | 
| Constructor and Description | 
|---|
| TableSnapshotInputFormatImpl() | 
| Modifier and Type | Method and Description | 
|---|---|
| private static List<String> | calculateLocationsForInputSplit(org.apache.hadoop.conf.Configuration conf,
                               TableDescriptor htd,
                               HRegionInfo hri,
                               org.apache.hadoop.fs.Path tableDir,
                               boolean localityEnabled)Compute block locations for snapshot files (which will get the locations for referred hfiles)
 only when localityEnabled is true. | 
| static Scan | extractScanFromConf(org.apache.hadoop.conf.Configuration conf) | 
| static List<String> | getBestLocations(org.apache.hadoop.conf.Configuration conf,
                HDFSBlocksDistribution blockDistribution) | 
| private static List<String> | getBestLocations(org.apache.hadoop.conf.Configuration conf,
                HDFSBlocksDistribution blockDistribution,
                int numTopsAtMost)This computes the locations to be passed from the InputSplit. | 
| static List<HRegionInfo> | getRegionInfosFromManifest(SnapshotManifest manifest) | 
| static SnapshotManifest | getSnapshotManifest(org.apache.hadoop.conf.Configuration conf,
                   String snapshotName,
                   org.apache.hadoop.fs.Path rootDir,
                   org.apache.hadoop.fs.FileSystem fs) | 
| private static String | getSnapshotName(org.apache.hadoop.conf.Configuration conf) | 
| static RegionSplitter.SplitAlgorithm | getSplitAlgo(org.apache.hadoop.conf.Configuration conf) | 
| static List<TableSnapshotInputFormatImpl.InputSplit> | getSplits(org.apache.hadoop.conf.Configuration conf) | 
| static List<TableSnapshotInputFormatImpl.InputSplit> | getSplits(Scan scan,
         SnapshotManifest manifest,
         List<HRegionInfo> regionManifests,
         org.apache.hadoop.fs.Path restoreDir,
         org.apache.hadoop.conf.Configuration conf) | 
| static List<TableSnapshotInputFormatImpl.InputSplit> | getSplits(Scan scan,
         SnapshotManifest manifest,
         List<HRegionInfo> regionManifests,
         org.apache.hadoop.fs.Path restoreDir,
         org.apache.hadoop.conf.Configuration conf,
         RegionSplitter.SplitAlgorithm sa,
         int numSplits) | 
| static void | setInput(org.apache.hadoop.conf.Configuration conf,
        String snapshotName,
        org.apache.hadoop.fs.Path restoreDir)Configures the job to use TableSnapshotInputFormat to read from a snapshot. | 
| static void | setInput(org.apache.hadoop.conf.Configuration conf,
        String snapshotName,
        org.apache.hadoop.fs.Path restoreDir,
        RegionSplitter.SplitAlgorithm splitAlgo,
        int numSplitsPerRegion)Configures the job to use TableSnapshotInputFormat to read from a snapshot. | 
public static final org.slf4j.Logger LOG
private static final String SNAPSHOT_NAME_KEY
protected static final String RESTORE_DIR_KEY
private static final String LOCALITY_CUTOFF_MULTIPLIER
private static final float DEFAULT_LOCALITY_CUTOFF_MULTIPLIER
public static final String SPLIT_ALGO
public static final String NUM_SPLITS_PER_REGION
public static final String SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_KEY
public static final boolean SNAPSHOT_INPUTFORMAT_LOCALITY_ENABLED_DEFAULT
public static final String SNAPSHOT_INPUTFORMAT_ROW_LIMIT_PER_INPUTSPLIT
public TableSnapshotInputFormatImpl()
public static List<TableSnapshotInputFormatImpl.InputSplit> getSplits(org.apache.hadoop.conf.Configuration conf) throws IOException
IOExceptionpublic static RegionSplitter.SplitAlgorithm getSplitAlgo(org.apache.hadoop.conf.Configuration conf) throws IOException
IOExceptionpublic static List<HRegionInfo> getRegionInfosFromManifest(SnapshotManifest manifest)
public static SnapshotManifest getSnapshotManifest(org.apache.hadoop.conf.Configuration conf, String snapshotName, org.apache.hadoop.fs.Path rootDir, org.apache.hadoop.fs.FileSystem fs) throws IOException
IOExceptionpublic static Scan extractScanFromConf(org.apache.hadoop.conf.Configuration conf) throws IOException
IOExceptionpublic static List<TableSnapshotInputFormatImpl.InputSplit> getSplits(Scan scan, SnapshotManifest manifest, List<HRegionInfo> regionManifests, org.apache.hadoop.fs.Path restoreDir, org.apache.hadoop.conf.Configuration conf) throws IOException
IOExceptionpublic static List<TableSnapshotInputFormatImpl.InputSplit> getSplits(Scan scan, SnapshotManifest manifest, List<HRegionInfo> regionManifests, org.apache.hadoop.fs.Path restoreDir, org.apache.hadoop.conf.Configuration conf, RegionSplitter.SplitAlgorithm sa, int numSplits) throws IOException
IOExceptionprivate static List<String> calculateLocationsForInputSplit(org.apache.hadoop.conf.Configuration conf, TableDescriptor htd, HRegionInfo hri, org.apache.hadoop.fs.Path tableDir, boolean localityEnabled) throws IOException
IOExceptionprivate static List<String> getBestLocations(org.apache.hadoop.conf.Configuration conf, HDFSBlocksDistribution blockDistribution, int numTopsAtMost)
public static List<String> getBestLocations(org.apache.hadoop.conf.Configuration conf, HDFSBlocksDistribution blockDistribution)
private static String getSnapshotName(org.apache.hadoop.conf.Configuration conf)
public static void setInput(org.apache.hadoop.conf.Configuration conf, String snapshotName, org.apache.hadoop.fs.Path restoreDir) throws IOException
conf - the job to configurationsnapshotName - the name of the snapshot to read fromrestoreDir - a temporary directory to restore the snapshot into. Current user should have
          write permissions to this directory, and this should not be a subdirectory of rootdir.
          After the job is finished, restoreDir can be deleted.IOException - if an error occurspublic static void setInput(org.apache.hadoop.conf.Configuration conf, String snapshotName, org.apache.hadoop.fs.Path restoreDir, RegionSplitter.SplitAlgorithm splitAlgo, int numSplitsPerRegion) throws IOException
conf - the job to configuresnapshotName - the name of the snapshot to read fromrestoreDir - a temporary directory to restore the snapshot into. Current user should have
          write permissions to this directory, and this should not be a subdirectory of rootdir.
          After the job is finished, restoreDir can be deleted.numSplitsPerRegion - how many input splits to generate per one regionsplitAlgo - SplitAlgorithm to be used when generating InputSplitsIOException - if an error occursCopyright © 2007–2021 The Apache Software Foundation. All rights reserved.