package demos; import java.io.IOException; import java.util.Iterator; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; public class Grep { private static final String WORD_CONFIG = "word"; public static class GrepMapper extends MapReduceBase implements Mapper { private static Text outputLine = new Text(); private static String wordToFind; public void map(LongWritable key, Text value, OutputCollector output, Reporter reporter) throws IOException { String line = value.toString(); if( line.contains(wordToFind) ){ outputLine.set(line); output.collect(key, outputLine); } } public void configure(JobConf conf){ wordToFind = conf.get(WORD_CONFIG); } } public static void run(String word, String input, String output){ JobConf conf = new JobConf(Grep.class); conf.setJobName("grep"); conf.set(WORD_CONFIG, word); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(GrepMapper.class); conf.setReducerClass(NoOpReducer.class); // specify input and output dirs FileInputFormat.addInputPath(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient client = new JobClient(); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } } public static void main(String[] args) { if( args.length != 3 ){ System.err.println("grep "); }else{ run(args[0], args[1], args[2]); } } }