@InterfaceAudience.Public @InterfaceStability.Stable public class TableMapReduceUtil extends Object
TableMapper
and TableReducer
Constructor and Description |
---|
TableMapReduceUtil() |
Modifier and Type | Method and Description |
---|---|
static void |
addDependencyJars(org.apache.hadoop.conf.Configuration conf,
Class<?>... classes)
Add the jars containing the given classes to the job's configuration
such that JobClient will ship them to the cluster and add them to
the DistributedCache.
|
static void |
addDependencyJars(org.apache.hadoop.mapreduce.Job job)
Add the HBase dependency jars as well as jars for any of the configured
job classes to the job configuration, so that JobClient will ship them
to the cluster and add them to the DistributedCache.
|
static void |
initCredentials(org.apache.hadoop.mapreduce.Job job) |
static void |
initTableMapperJob(byte[] table,
Scan scan,
Class<? extends TableMapper> mapper,
Class<?> outputKeyClass,
Class<?> outputValueClass,
org.apache.hadoop.mapreduce.Job job)
Use this before submitting a TableMap job.
|
static void |
initTableMapperJob(byte[] table,
Scan scan,
Class<? extends TableMapper> mapper,
Class<?> outputKeyClass,
Class<?> outputValueClass,
org.apache.hadoop.mapreduce.Job job,
boolean addDependencyJars)
Use this before submitting a TableMap job.
|
static void |
initTableMapperJob(byte[] table,
Scan scan,
Class<? extends TableMapper> mapper,
Class<?> outputKeyClass,
Class<?> outputValueClass,
org.apache.hadoop.mapreduce.Job job,
boolean addDependencyJars,
Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormatClass)
Use this before submitting a TableMap job.
|
static void |
initTableMapperJob(List<Scan> scans,
Class<? extends TableMapper> mapper,
Class<? extends org.apache.hadoop.io.WritableComparable> outputKeyClass,
Class<? extends org.apache.hadoop.io.Writable> outputValueClass,
org.apache.hadoop.mapreduce.Job job)
Use this before submitting a Multi TableMap job.
|
static void |
initTableMapperJob(List<Scan> scans,
Class<? extends TableMapper> mapper,
Class<? extends org.apache.hadoop.io.WritableComparable> outputKeyClass,
Class<? extends org.apache.hadoop.io.Writable> outputValueClass,
org.apache.hadoop.mapreduce.Job job,
boolean addDependencyJars)
Use this before submitting a Multi TableMap job.
|
static void |
initTableMapperJob(String table,
Scan scan,
Class<? extends TableMapper> mapper,
Class<?> outputKeyClass,
Class<?> outputValueClass,
org.apache.hadoop.mapreduce.Job job)
Use this before submitting a TableMap job.
|
static void |
initTableMapperJob(String table,
Scan scan,
Class<? extends TableMapper> mapper,
Class<?> outputKeyClass,
Class<?> outputValueClass,
org.apache.hadoop.mapreduce.Job job,
boolean addDependencyJars)
Use this before submitting a TableMap job.
|
static void |
initTableMapperJob(String table,
Scan scan,
Class<? extends TableMapper> mapper,
Class<?> outputKeyClass,
Class<?> outputValueClass,
org.apache.hadoop.mapreduce.Job job,
boolean addDependencyJars,
Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormatClass)
Use this before submitting a TableMap job.
|
static void |
initTableReducerJob(String table,
Class<? extends TableReducer> reducer,
org.apache.hadoop.mapreduce.Job job)
Use this before submitting a TableReduce job.
|
static void |
initTableReducerJob(String table,
Class<? extends TableReducer> reducer,
org.apache.hadoop.mapreduce.Job job,
Class partitioner)
Use this before submitting a TableReduce job.
|
static void |
initTableReducerJob(String table,
Class<? extends TableReducer> reducer,
org.apache.hadoop.mapreduce.Job job,
Class partitioner,
String quorumAddress,
String serverClass,
String serverImpl)
Use this before submitting a TableReduce job.
|
static void |
initTableReducerJob(String table,
Class<? extends TableReducer> reducer,
org.apache.hadoop.mapreduce.Job job,
Class partitioner,
String quorumAddress,
String serverClass,
String serverImpl,
boolean addDependencyJars)
Use this before submitting a TableReduce job.
|
static void |
limitNumReduceTasks(String table,
org.apache.hadoop.mapreduce.Job job)
Ensures that the given number of reduce tasks for the given job
configuration does not exceed the number of regions for the given table.
|
static void |
setNumReduceTasks(String table,
org.apache.hadoop.mapreduce.Job job)
Sets the number of reduce tasks for the given job configuration to the
number of regions the given table has.
|
static void |
setScannerCaching(org.apache.hadoop.mapreduce.Job job,
int batchSize)
Sets the number of rows to return and cache with each scanner iteration.
|
public static void initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapreduce.Job job) throws IOException
table
- The table name to read from.scan
- The scan instance with the columns, time range etc.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job to adjust. Make sure the passed job is
carrying all necessary HBase configuration.IOException
- When setting up the details fails.public static void initTableMapperJob(byte[] table, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapreduce.Job job) throws IOException
table
- Binary representation of the table name to read from.scan
- The scan instance with the columns, time range etc.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job to adjust. Make sure the passed job is
carrying all necessary HBase configuration.IOException
- When setting up the details fails.public static void initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapreduce.Job job, boolean addDependencyJars, Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormatClass) throws IOException
table
- The table name to read from.scan
- The scan instance with the columns, time range etc.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job to adjust. Make sure the passed job is
carrying all necessary HBase configuration.addDependencyJars
- upload HBase jars and jars for any of the configured
job classes via the distributed cache (tmpjars).IOException
- When setting up the details fails.public static void initTableMapperJob(byte[] table, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapreduce.Job job, boolean addDependencyJars, Class<? extends org.apache.hadoop.mapreduce.InputFormat> inputFormatClass) throws IOException
table
- Binary representation of the table name to read from.scan
- The scan instance with the columns, time range etc.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job to adjust. Make sure the passed job is
carrying all necessary HBase configuration.addDependencyJars
- upload HBase jars and jars for any of the configured
job classes via the distributed cache (tmpjars).inputFormatClass
- The class of the input formatIOException
- When setting up the details fails.public static void initTableMapperJob(byte[] table, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapreduce.Job job, boolean addDependencyJars) throws IOException
table
- Binary representation of the table name to read from.scan
- The scan instance with the columns, time range etc.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job to adjust. Make sure the passed job is
carrying all necessary HBase configuration.addDependencyJars
- upload HBase jars and jars for any of the configured
job classes via the distributed cache (tmpjars).IOException
- When setting up the details fails.public static void initTableMapperJob(String table, Scan scan, Class<? extends TableMapper> mapper, Class<?> outputKeyClass, Class<?> outputValueClass, org.apache.hadoop.mapreduce.Job job, boolean addDependencyJars) throws IOException
table
- The table name to read from.scan
- The scan instance with the columns, time range etc.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job to adjust. Make sure the passed job is
carrying all necessary HBase configuration.addDependencyJars
- upload HBase jars and jars for any of the configured
job classes via the distributed cache (tmpjars).IOException
- When setting up the details fails.public static void initTableMapperJob(List<Scan> scans, Class<? extends TableMapper> mapper, Class<? extends org.apache.hadoop.io.WritableComparable> outputKeyClass, Class<? extends org.apache.hadoop.io.Writable> outputValueClass, org.apache.hadoop.mapreduce.Job job) throws IOException
scans
- The list of Scan
objects to read from.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job to adjust. Make sure the passed job is carrying
all necessary HBase configuration.IOException
- When setting up the details fails.public static void initTableMapperJob(List<Scan> scans, Class<? extends TableMapper> mapper, Class<? extends org.apache.hadoop.io.WritableComparable> outputKeyClass, Class<? extends org.apache.hadoop.io.Writable> outputValueClass, org.apache.hadoop.mapreduce.Job job, boolean addDependencyJars) throws IOException
scans
- The list of Scan
objects to read from.mapper
- The mapper class to use.outputKeyClass
- The class of the output key.outputValueClass
- The class of the output value.job
- The current job to adjust. Make sure the passed job is carrying
all necessary HBase configuration.addDependencyJars
- upload HBase jars and jars for any of the
configured job classes via the distributed cache (tmpjars).IOException
- When setting up the details fails.public static void initCredentials(org.apache.hadoop.mapreduce.Job job) throws IOException
IOException
public static void initTableReducerJob(String table, Class<? extends TableReducer> reducer, org.apache.hadoop.mapreduce.Job job) throws IOException
table
- The output table.reducer
- The reducer class to use.job
- The current job to adjust.IOException
- When determining the region count fails.public static void initTableReducerJob(String table, Class<? extends TableReducer> reducer, org.apache.hadoop.mapreduce.Job job, Class partitioner) throws IOException
table
- The output table.reducer
- The reducer class to use.job
- The current job to adjust.partitioner
- Partitioner to use. Pass null
to use
default partitioner.IOException
- When determining the region count fails.public static void initTableReducerJob(String table, Class<? extends TableReducer> reducer, org.apache.hadoop.mapreduce.Job job, Class partitioner, String quorumAddress, String serverClass, String serverImpl) throws IOException
table
- The output table.reducer
- The reducer class to use.job
- The current job to adjust. Make sure the passed job is
carrying all necessary HBase configuration.partitioner
- Partitioner to use. Pass null
to use
default partitioner.quorumAddress
- Distant cluster to write to; default is null for
output to the cluster that is designated in hbase-site.xml
.
Set this String to the zookeeper ensemble of an alternate remote cluster
when you would have the reduce write a cluster that is other than the
default; e.g. copying tables between clusters, the source would be
designated by hbase-site.xml
and this param would have the
ensemble address of the remote cluster. The format to pass is particular.
Pass <hbase.zookeeper.quorum>:<hbase.zookeeper.client.port>:<zookeeper.znode.parent>
such as server,server2,server3:2181:/hbase
.serverClass
- redefined hbase.regionserver.classserverImpl
- redefined hbase.regionserver.implIOException
- When determining the region count fails.public static void initTableReducerJob(String table, Class<? extends TableReducer> reducer, org.apache.hadoop.mapreduce.Job job, Class partitioner, String quorumAddress, String serverClass, String serverImpl, boolean addDependencyJars) throws IOException
table
- The output table.reducer
- The reducer class to use.job
- The current job to adjust. Make sure the passed job is
carrying all necessary HBase configuration.partitioner
- Partitioner to use. Pass null
to use
default partitioner.quorumAddress
- Distant cluster to write to; default is null for
output to the cluster that is designated in hbase-site.xml
.
Set this String to the zookeeper ensemble of an alternate remote cluster
when you would have the reduce write a cluster that is other than the
default; e.g. copying tables between clusters, the source would be
designated by hbase-site.xml
and this param would have the
ensemble address of the remote cluster. The format to pass is particular.
Pass <hbase.zookeeper.quorum>:<hbase.zookeeper.client.port>:<zookeeper.znode.parent>
such as server,server2,server3:2181:/hbase
.serverClass
- redefined hbase.regionserver.classserverImpl
- redefined hbase.regionserver.impladdDependencyJars
- upload HBase jars and jars for any of the configured
job classes via the distributed cache (tmpjars).IOException
- When determining the region count fails.public static void limitNumReduceTasks(String table, org.apache.hadoop.mapreduce.Job job) throws IOException
table
- The table to get the region count for.job
- The current job to adjust.IOException
- When retrieving the table details fails.public static void setNumReduceTasks(String table, org.apache.hadoop.mapreduce.Job job) throws IOException
table
- The table to get the region count for.job
- The current job to adjust.IOException
- When retrieving the table details fails.public static void setScannerCaching(org.apache.hadoop.mapreduce.Job job, int batchSize)
job
- The current job to adjust.batchSize
- The number of rows to return in batch with each scanner
iteration.public static void addDependencyJars(org.apache.hadoop.mapreduce.Job job) throws IOException
IOException
public static void addDependencyJars(org.apache.hadoop.conf.Configuration conf, Class<?>... classes) throws IOException
IOException
Copyright © 2013 The Apache Software Foundation. All rights reserved.