hadoop案例实现之计算单词出现的频数
Joanna.Y
2024-05-22 00:07:40
最佳回答
1编写j**a代码,实现map函数以及reduce函数package com.paic.el**.test;import j**a.io.ioexception;import j**a.util.stringtokenizer;import org.apache.hadoop.conf.configuration;import org.apache.hadoop.fs.path;import org.apache.hadoop.io.intwritable;import org.apache.hadoop.io.longwritable;import org.apache.hadoop.io.text;import org.apache.hadoop.mapreduce.job;import org.apache.hadoop.mapreduce.mapper;import org.apache.hadoop.mapreduce.reducer;import org.apache.hadoop.mapreduce.lib.input.fileinputformat;import org.apache.hadoop.mapreduce.lib.input.textinputformat;import org.apache.hadoop.mapreduce.lib.output.fileoutputformat;import org.apache.hadoop.mapreduce.lib.output.textoutputformat;public class wordcount {public static class wordcountmap extendsmapper<longwritable, text, text, intwritable> {private final intwritable one = new intwritable(1);private text word = new text();public void map(longwritable key, text value, context context)throws ioexception, interruptedexception {string line = value.tostring();stringtokenizer token = new stringtokenizer(line);while (token.hasmoretokens()) {word.set(token.nexttoken());context.write(word, one);}}}public static class wordcountreduce extendsreducer<text, intwritable, text, intwritable> {public void reduce(text key, iterable<intwritable> values,context context) throws ioexception, interruptedexception {int sum = 0;for (intwritable val : values) {sum += val.get();}context.write(key, new intwritable(sum));}}public static void main(string[] args) throws exception {configuration conf = new configuration();job job = new job(conf);job.setjarbyclass(wordcount.class);job.setjobname("wordcount");job.setoutputkeyclass(text.class);job.setoutputvalueclass(intwritable.class);job.setmapperclass(wordcountmap.class);job.setreducerclass(wordcountreduce.class);job.setinputformatclass(textinputformat.class);job.setoutputformatclass(textoutputformat.class);fileinputformat.addinputpath(job, new path(args[0]));fileoutputformat.setoutputpath(job, new path(args[1]));job.waitforcompletion(true);}}2打包成ja**件并上传到远程云主机。0如何将j**a源码打包成可执行的ja**件win&linux3将文件通过ssh传到远程来4在hadoop中创建文件夹,并将linux 主机的内容上传到hdfs中。5查看是否上传成功。6执行。7执行过程输出8查看结果:8该信息非法爬取自百度经验9输入的文本信息:end 20210311