這篇文章主要介紹了hadoop中如何實現GenericWritable,具有一定借鑒價值,感興趣的朋友可以參考下,希望大家閱讀完這篇文章之后大有收獲,下面讓小編帶著大家一起了解一下。
package com.test; import java.io.IOException; import java.util.Iterator; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.GenericWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; import org.apache.hadoop.mapreduce.lib.input.MultipleInputs; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; /** * 業務場景: * 含有兩個文件,兩個文件中單詞之間的分隔方式不一樣,但是統計出單詞在兩個文件中公共出現的次數 * * 文件來源1,逗號分隔text1.txt * hello,what * you,haha * 文件來源2,制表符分隔text2.txt * girl boy * father mother */ public class WordCountGenericWritable extends Configured implements Tool { public static class Map1 extends Mapper<LongWritable, Text, Text, MyGenericWritable> { public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); StringTokenizer st = new StringTokenizer(line, ","); while(st.hasMoreElements()) { context.write(new Text(st.nextElement().toString()), new MyGenericWritable(new LongWritable(1))); } } } public static class Map2 extends Mapper<Text, Text, Text, MyGenericWritable> { public void map(Text key, Text value, Context context) throws IOException, InterruptedException { context.write(key, new MyGenericWritable(new Text("1"))); context.write(value, new MyGenericWritable(new Text("1"))); } } public static class Reduce extends Reducer<Text, MyGenericWritable, Text, IntWritable> { public void reduce(Text key, Iterable<MyGenericWritable> values, Context context) throws IOException, InterruptedException { int count = 0; Iterator<MyGenericWritable> it = values.iterator(); while(it.hasNext()) { MyGenericWritable myGw = it.next(); Writable value = myGw.get(); if(value instanceof LongWritable) { count = count + Long.valueOf(((LongWritable)value).get()).intValue(); } if(value instanceof Text) { count = count + Long.valueOf(((Text)value).toString()).intValue(); } } context.write(key, new IntWritable(count)); } } public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = this.getConf(); Job job = new Job(conf); job.setJobName(WordCountGenericWritable.class.getSimpleName()); job.setJarByClass(WordCountGenericWritable.class); MultipleInputs.addInputPath(job, new Path("hdfs://grid131:9000/text1.txt"), TextInputFormat.class, Map1.class); MultipleInputs.addInputPath(job, new Path("hdfs://grid131:9000/text2.txt"), KeyValueTextInputFormat.class, Map2.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setReducerClass(Reduce.class); job.setOutputFormatClass(TextOutputFormat.class); //當map的輸出類型和reduce的輸出類型不一致的時候,需要單獨設置map輸出類型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(MyGenericWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.waitForCompletion(true); return job.isSuccessful()?0:1; } public static void main(String[] args) throws Exception { int exit = ToolRunner.run(new WordCount(), args); System.exit(exit); } } class MyGenericWritable extends GenericWritable { public MyGenericWritable() { } public MyGenericWritable(LongWritable longWritable) { super.set(longWritable); } public MyGenericWritable(Text text) { super.set(text); } @Override protected Class<? extends Writable>[] getTypes() { return new Class[]{LongWritable.class, Text.class}; } }
感謝你能夠認真閱讀完這篇文章,希望小編分享的“hadoop中如何實現GenericWritable”這篇文章對大家有幫助,同時也希望大家多多支持億速云,關注億速云行業資訊頻道,更多相關知識等著你來學習!
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。