## MapReduce入门案例——某东手机和电脑销售数据分析

MapReduce入门案例——某东手机和电脑销售数据分析

简介

利用Python爬虫从平台爬取部分时段销售数据,按照一定格式存储在文本文档(input目录下的computerfinal.txt和Pone)中。通过MapReduce进行数据分析输出处理结果到output目录下part-r-00000文件(其余文件为crc校验信息等),之后将结果利用前端页面展示。

工程目录

在这里插入图片描述

数据源

数据量大概数万条,手机电脑分开存储。

电脑数据computerfinal.txt 部分数据如下

8999.00#--惠普(HP)暗影精灵9 Intel 16.1英寸游戏本 #--电脑(13代i9-13900HX RTX4060 16G 1TBSSD 2.5K 240Hz)#--惠普(HP)OMEN暗影精灵京东自营旗舰店#--1#--2023-5-12
9999.00#--联想(Lenovo)拯救者Y9000P 2023 英特尔酷睿i9 16英寸游戏#--电脑(13代i9-13900HX 16G 1T RTX4060 2.5k 240Hz高色域)#--联想京东自营旗舰店#--2#--2023-5-12
6999.00#--联想(Lenovo)拯救者Y7000P 2023 英特尔酷睿 16英寸电竞游戏#--电脑(13代i5-13500H 16G 1T RTX4050 2.5K 165Hz高色域)#--联想京东自营旗舰店#--3#--2023-5-12
6499.00#--机械革命(MECHREVO)极光Pro 15.6英寸游戏本 #--电脑(i7-12650H 16G 512G RTX4060 165HZ 2.5K屏)#--机械革命京东自营官方旗舰店#--4#--2023-5-12
7999.00#--联想(Lenovo)拯救者Y7000P 2023 英特尔酷睿i7 16英寸电竞游戏#--电脑(13代i7-13700H 16G 1T RTX4060 2.5K高刷高色域)#--联想京东自营旗舰店#--5#--2023-5-12
7199.00#--AppleMacBookAir【教育优惠】13.3 8核M1芯片(7核图形处理器) 8G 256G SSD 深空灰 #--电脑 MGN63CH/A#--Apple产品京东自营旗舰店#--6#--2023-5-12
7098.00#--联想拯救者R9000P 16英寸游戏#--电脑(8核16线程R7-6800H 16G 512G RTX3060 2.5k 165Hz高色域)灰#--联想京东自营旗舰店#--7#--2023-5-12
12999.00#--ROG枪神7 Plus 第13代英特尔酷睿i9 18英寸 星云屏 电竞游戏本#--电脑(i9-13980HX 液金导热 16G 1T RTX4060 2.5K 240Hz P3广色域)#--玩家国度ROG京东自营官方旗舰店#--8#--2023-5-12

手机数据Pone 部分数据如下

6899.00#--Apple iPhone 14 (A2884) 支持移动联通电信5G 双卡双待#-- 星光色 256G#--大盛魁手机数码专营店#--22#--2023-5-20
7899.00#--Apple iPhone 14 Pro (A2892) 128GB 暗紫色 支持移动联通电信5G 双卡双待#--【大王卡】#--中国联通京东自营旗舰店#--28#--2023-5-20
2298.00#--荣耀80 1.6亿像素超清主摄 AI Vlog视频大师 全新Magic OS 7.0系统 5G#-- 8GB+256GB 墨玉青#--荣耀京东自营旗舰店#--30#--2023-5-20
8719.00#--Apple 苹果 iPhone 14 Pro Max(A2896) 全网通5G#-- 深空黑色 全网通 256G#--零疆旗舰店#--34#--2023-5-20
9899.00#--Apple iPhone 14 Pro Max (A2896) 256GB 暗紫色 支持移动联通电信5G 双卡双待#--【活动专享】#--中国移动京东自营官方旗舰店#--36#--2023-5-20
8899.00#--Apple iPhone 14 Pro (A2892) 支持移动联通电信5G 双卡双待#-- 银色 256G#--大盛魁手机数码专营店#--37#--2023-5-20
9899.00#--Apple iPhone 14 Pro Max(A2896)全网通智能5G#-- 暗紫色 256GB#--瑞思拜手机专营店#--40#--2023-5-20
7999.00#--Apple 苹果 iphone 14 pro 5G#-- 银色 256G#--创优数码专营店#--42#--2023-5-20
8654.00#--【12期俛息套餐可选】Apple 苹果 iPhone 14 Pro Max 5G 双卡双待#-- 深空黑色 256G#--头号卖家旗舰店#--44#--2023-5-20
8899.00#--Apple iPhone 14 Pro(A2892)全网通智能5G#-- 暗紫色 256GB#--瑞思拜手机专营店#--45#--2023-5-20

核心MapReduce代码

  • HomemadeOrNot:分别输出该时间段内平台售出的国产手机和非国产手机数量

    HomemadeOrNotDriver

    package com.csx.HomemadeOrNot;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import java.io.IOException;
    public class HomemadeOrNotDriver {public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {Configuration conf = new Configuration();Job job = Job.getInstance(conf);job.setJarByClass(HomemadeOrNotDriver.class);job.setMapperClass(HomemadeOrNotMapper.class);job.setReducerClass(HomemadeOrNotReducer.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt"));FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路径不可以存在不可以存在boolean result = job.waitForCompletion(true);System.exit(result ? 0 : 1);}
    }

    HomemadeOrNotMapper

    package com.csx.HomemadeOrNot;import com.csx.Tools.Tools;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;public class HomemadeOrNotMapper extends Mapper<LongWritable,Text, Text, IntWritable> {private Text outKey = new Text();private final static IntWritable outValue = new IntWritable(1);@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {String line = value.toString();String words[] = line.split("#--");if (Tools.findBrand(words[1])) {outKey.set("国产");} else {outKey.set("非国产");}context.write(outKey, outValue);}
    }

    HomemadeOrNotReducer

    package com.csx.HomemadeOrNot;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    import java.io.IOException;public class HomemadeOrNotReducer extends Reducer<Text, IntWritable,Text,IntWritable> {private IntWritable outV = new IntWritable();@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {int sum = 0;for (IntWritable value : values) {sum += value.get();}outV.set(sum);context.write(key,outV);}
    }

    输出结果:part-r-00000

    国产	16914
    非国产	24720
  • LaptopGPUdata:输出被分析数据源中不同型号主流显卡销售情况

    LaptopGPUdataDriver

    package com.csx.LaptopGPUdata;import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;public class LaptopGPUdataDriver {public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {Configuration conf = new Configuration();Job job = Job.getInstance(conf);job.setJarByClass(LaptopGPUdataDriver.class);job.setMapperClass(LaptopGPUdataMapper.class);job.setReducerClass(LaptopGPUdataReducer.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt"));FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路径不可以存在不可以存在boolean result = job.waitForCompletion(true);System.exit(result ? 0 : 1);}}

    LaptopGPUdataMapper

    package com.csx.LaptopGPUdata;import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;public class LaptopGPUdataMapper extends Mapper<LongWritable, Text, Text, IntWritable> {private final static IntWritable outV = new IntWritable(1);private Text outK = new Text();@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {String line = value.toString();String[] words = line.split("#--");if(words[2].contains("RTX4050") || words[2].contains("4050")){outK.set("RTX4050");} else if (words[2].contains("RTX4060") || words[2].contains("4060")) {outK.set("RTX4060");} else if (words[2].contains("RTX4070") || words[2].contains("4070")) {outK.set("RTX4070");} else if (words[2].contains("RTX4080") || words[2].contains("4080")) {outK.set("RTX4080");} else if (words[2].contains("RTX4090") || words[2].contains("4090")) {outK.set("RTX4090");} else if (words[2].contains("RTX3050") || words[2].contains("3050")) {outK.set("RTX3050");} else if (words[2].contains("RTX3060") || words[2].contains("3060")) {outK.set("RTX3060");} else if (words[2].contains("RTX3070") || words[2].contains("3070")) {outK.set("RTX3070");} else if (words[2].contains("RTX3080") || words[2].contains("3080")) {outK.set("RTX3080");} else if (words[2].contains("RTX3090") || words[2].contains("3090")) {outK.set("RTX3090");}else {outK.set("其他");}context.write(outK,outV);}
    }

    LaptopGPUdataReducer

    package com.csx.LaptopGPUdata;import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;public class LaptopGPUdataReducer extends Reducer<Text, IntWritable, Text, IntWritable> {private IntWritable outV = new IntWritable();@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {int sum = 0;for (IntWritable value:values) {sum += value.get();}outV.set(sum);context.write(key,outV);}
    }

    输出结果:part-r-00000

    RTX3050	3865
    RTX3060	1985
    RTX3070	319
    RTX3080	64
    RTX4050	1494
    RTX4060	4708
    RTX4070	984
    RTX4080	694
    RTX4090	584
    其他	26937
    
  • MostExpensiveLaptop:输出被分析时间段内笔记本电脑售价前10名的产品信息

    MostExpensiveLaptopDriver

    package com.csx.MostExpensiveLaptop;import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;public class MostExpensiveLaptopDriver {public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {Configuration conf = new Configuration();Job job = Job.getInstance(conf);job.setJarByClass(MostExpensiveLaptopDriver.class);job.setMapperClass(MostExpensiveLaptopMapper.class);job.setReducerClass(MostExpensiveLaptopReducer.class);job.setMapOutputKeyClass(LaptopInfoBean.class);job.setMapOutputValueClass(Text.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(LaptopInfoBean.class);FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt"));FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路boolean result = job.waitForCompletion(true);System.exit(result ? 0 : 1);}}

    LaptopInfoBean:自定义Bean对象

    package com.csx.MostExpensiveLaptop;import org.apache.hadoop.io.Writable;
    import org.apache.hadoop.io.WritableComparable;import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;public class LaptopInfoBean implements Writable , WritableComparable<LaptopInfoBean> {private double price;private String information;public LaptopInfoBean() {}public LaptopInfoBean(double price, String information) {this.price = price;this.information = information;}public double getPrice() {return price;}public String getInformation() {return information;}public void setPrice(double price) {this.price = price;}public void setInformation(String information) {this.information = information;}@Overridepublic void write(DataOutput dataOutput) throws IOException {dataOutput.writeDouble(price);dataOutput.writeUTF(information);}@Overridepublic void readFields(DataInput dataInput) throws IOException {this.price = dataInput.readDouble();this.information = dataInput.readUTF();}@Overridepublic int compareTo(LaptopInfoBean o) {int result;// 按照总流量大小,倒序排列if (this.price > o.price) {result = -1;}else if (this.price < o.price) {result = 1;}else {//如果总流量一样的话按照上行流量再排result = 0;}return result;}@Overridepublic String toString() {return information+"------------"+price;}
    }

    MostExpensiveLaptopMapper

    package com.csx.MostExpensiveLaptop;import org.apache.hadoop.io.*;
    import org.apache.hadoop.mapreduce.Mapper;
    import java.io.IOException;
    import java.util.Iterator;
    import java.util.TreeMap;public class MostExpensiveLaptopMapper  extends Mapper<LongWritable, Text, LaptopInfoBean,Text> {LaptopInfoBean outK = new LaptopInfoBean();private Text outV = new Text();@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, LaptopInfoBean, Text>.Context context) throws IOException, InterruptedException {String line = value.toString();String[] words = line.split("#--");outK.setPrice(Double.parseDouble(words[0]));outK.setInformation(words[1]+words[2]);context.write(outK,outV);}}

    MostExpensiveLaptopReducer

    package com.csx.MostExpensiveLaptop;import org.apache.hadoop.io.DoubleWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;
    import java.util.Iterator;
    import java.util.TreeMap;public class MostExpensiveLaptopReducer extends Reducer<LaptopInfoBean, Text,Text,LaptopInfoBean> {private TreeMap<LaptopInfoBean,Text> tMap = new TreeMap<LaptopInfoBean, Text>();@Overrideprotected void reduce(LaptopInfoBean key, Iterable<Text> values, Reducer<LaptopInfoBean, Text, Text, LaptopInfoBean>.Context context) throws IOException, InterruptedException {for (Text val:values) {LaptopInfoBean bean = new LaptopInfoBean();bean.setInformation(key.getInformation());bean.setPrice(key.getPrice());tMap.put(bean,val);if(tMap.size()>10){tMap.remove(tMap.lastKey());}}}@Overrideprotected void cleanup(Reducer<LaptopInfoBean, Text, Text, LaptopInfoBean>.Context context) throws IOException, InterruptedException {Iterator<LaptopInfoBean> it =tMap.keySet().iterator();LaptopInfoBean k;while (it.hasNext()){k = it.next();Text val = tMap.get(k);context.write(val,k);}}
    }

    输出结果:part-r-00000

    	联想ThinkPad商用电脑P15 15.6英寸移动工作站(标配i9-11950H 64G 2T 独显16G A5000 Win11pro 4K屏)------------60999.0联想电脑ThinkPad P1 2022(0FCD)16英寸高性能轻薄设计师工作站 i9-12900H 32G 2T A5500 600nit触摸4K------------54999.0外星人(Alienware)全新 m18 18英寸高端游戏本i9-13980HX 64G 2T RTX4090 165Hz 高刷屏高性能电脑1996QB------------51999.0外星人(alienware)m18 高端游戏本全新13代酷睿电脑18英寸电竞 1996:i9 64G 2TB 4090标配 13代处理器 Cherry键盘------------49999.0外星人(Alienware)全新 m18 18英寸高端游戏本i9-13980HX 64G 2T RTX4090 165Hz 高刷屏高性能电脑1996QB------------49969.0[新十三代i9]RazerBlade雷蛇灵刃18电竞游戏电脑2.5K-240Hz高刷IPS屏幕 i9-13980HX/RTX4090 官配64G内存/2T固态------------42999.0雷蛇(Razer)[新十三代i9]RazerBlade雷蛇灵刃18电竞游戏电脑2.5K-240Hz高刷IPS屏幕 i9-13980HX/RTX4090 官配64G内存/2T固态------------41999.0外星人(alienware)m18 高端游戏本全新13代酷睿电脑18英寸电竞 1986Q:i9HX/4080/2.5K 包鼠套装 官方标配------------41098.0ROG冰刃7 双屏 16英寸 设计师高性能 游戏本电脑(R9 7945HX 32G 1T RTX4090 2.5K 240Hz MiniLED)------------40999.0外星人(alienware)m18 高端游戏本全新13代酷睿电脑18英寸电竞 1986:标配加购包鼠套装 13代处理器 Cherry键盘------------40888.0
  • PhoneRAM:分析输出被分析时间段内各常见大小内存的手机售量

    PhoneRAMDriver

    package com.csx.PhoneRAM;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;public class PhoneRAMDriver {public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {Configuration conf = new Configuration();Job job = Job.getInstance(conf);job.setJarByClass(PhoneRAMDriver.class);job.setMapperClass(PhoneRAMMapper.class);job.setReducerClass(PhoneRAMReducer.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt"));FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路boolean result = job.waitForCompletion(true);System.exit(result ? 0 : 1);}}
    

    PhoneRAMMapper

    package com.csx.PhoneRAM;import com.csx.Tools.Tools;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;public class PhoneRAMMapper extends Mapper <LongWritable, Text, Text, IntWritable>{private final static IntWritable outV = new IntWritable(1);private Text outK = new Text();@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {String line = value.toString();String[] words = line.split("#--");if("4G"==Tools.findPhoneRAM(words[1]) || "4G"==Tools.findPhoneRAM(words[2])){outK.set("4G");} else if ("6G"== Tools.findPhoneRAM(words[1]) || "6G"==Tools.findPhoneRAM(words[2])) {outK.set("6G");} else if ("8G"==Tools.findPhoneRAM(words[1]) || "8G"==Tools.findPhoneRAM(words[2])) {outK.set("8G");} else if ("16G"==Tools.findPhoneRAM(words[1]) || "16G"==Tools.findPhoneRAM(words[2])) {outK.set("16G");}context.write(outK,outV);}
    }

    PhoneRAMReducer

    package com.csx.PhoneRAM;import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;import java.io.IOException;public class PhoneRAMReducer extends Reducer<Text, IntWritable, Text, IntWritable> {private IntWritable outV = new IntWritable();@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {int sum = 0;for (IntWritable value:values) {sum += value.get();}outV.set(sum);context.write(key,outV);}
    }

    输出结果:part-r-00000

    16G	32571
    4G	3983
    6G	240
    8G	4840
    
  • MaxAndMin2:分析输出本分析时间段内,各主流手机品牌产品价格最低与最高值(数据爬取中不可避免地爬取到了一些脏数据,比如手机壳的销售信息,导致输出结果出现了很低的售价)

    MaxAndMin2Bean

    package com.csx.MaxAndMin2;import org.apache.hadoop.io.WritableComparable;import java.io.DataInput;
    import java.io.DataOutput;
    import java.io.IOException;public class MaxAndMin2Bean implements WritableComparable<MaxAndMin2Bean> {private int type;//类型表示品牌private double min;//最低价格private double max;//最高价格public MaxAndMin2Bean(int type, double min, double max) {this.type = type;this.min = min;this.max = max;}public MaxAndMin2Bean() {}public int getType() {return type;}public double getMin() {return min;}public double getMax() {return max;}public void setType(int type) {this.type = type;}public void setMin(double min) {this.min = min;}public void setMax(double max) {this.max = max;}@Overridepublic int compareTo(MaxAndMin2Bean o) {return 1;}@Overridepublic void write(DataOutput dataOutput) throws IOException {dataOutput.writeInt(type);dataOutput.writeDouble(min);dataOutput.writeDouble(max);}@Overridepublic void readFields(DataInput dataInput) throws IOException {type = dataInput.readInt();min = dataInput.readDouble();max = dataInput.readDouble();}@Overridepublic String toString() {String s = null;if (type == 0) s = "HUAWEI";else if (type == 1) s = "Apple";else if (type == 2) s = "OPPO";else if (type == 3) s = "Redmi";else if (type == 4) s = "荣耀";else if (type == 5) s = "魅族";else if (type == 6) s = "小米";else if (type == 7) s = "三星";else if (type == 8) s = "vivo";else if (type == 9) s = "realme";else if (type == 10) s = "其他";return s + "\t" + min + "\t" + max + "\t";}
    }

    MaxAndMin2Driver

    package com.csx.MaxAndMin2;import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import java.io.IOException;public class MaxAndMin2Driver {public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {Configuration conf = new Configuration();Job job = Job.getInstance(conf);job.setJarByClass(MaxAndMin2Driver.class);job.setMapperClass(MaxAndMin2Mapper.class);job.setReducerClass(MaxAndMin2Reducer.class);job.setMapOutputKeyClass(MaxAndMin2Bean.class);job.setMapOutputValueClass(NullWritable.class);job.setOutputKeyClass(MaxAndMin2Bean.class);job.setOutputValueClass(NullWritable.class);FileInputFormat.setInputPaths(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\input\\computer\\computerfinal.txt"));FileOutputFormat.setOutputPath(job,new Path("D:\\Dream\\Hadoop编程\\学期项目\\JavaPrioject\\src\\main\\resources\\output\\csx"));//输出路输出路径不可以存在boolean result = job.waitForCompletion(true);System.exit(result ? 0 : 1);}
    }
    

    MaxAndMin2Mapper

    package com.csx.MaxAndMin2;import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;import java.io.IOException;public class MaxAndMin2Mapper extends Mapper<LongWritable, Text, MaxAndMin2Bean, NullWritable> {private double[] min = {Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE};private double[] max = {Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE};int type;@Overrideprotected void map(LongWritable key, Text value, Mapper<LongWritable, Text, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException {String line = value.toString();String[] words = line.split("#--");double price = Double.parseDouble(words[0]);if (words[1].contains("HUAWEI") || words[1].contains("华为")) {type = 0;if (price < min[type]) min[type] = price;if(price > max[type]) max[type] = price;}else if (words[1].contains("Apple") || words[1].contains("iphone") || words[1].contains("IPHONE")) {type = 1;if (price < min[type]) min[type] = price;if(price > max[type]) max[type] = price;}else if (words[1].contains("OPPO")) {type = 2;if (price < min[type]) min[type] = price;if(price > max[type]) max[type] = price;}else if (words[1].contains("Redmi")) {type = 3;if (price < min[type]) min[type] = price;if(price > max[type]) max[type] = price;}else if (words[1].contains("荣耀") || words[1].contains("HONOR")) {type = 4;if (price < min[type]) min[type] = price;if(price > max[type]) max[type] = price;}else if (words[1].contains("魅族") || words[1].contains("MEIZU")) {type = 5;if (price < min[type]) min[type] = price;if(price > max[type]) max[type] = price;}else if (words[1].contains("小米")) {type = 6;if (price < min[type]) min[type] = price;if(price > max[type]) max[type] = price;}else if (words[1].contains("三星") || words[1].contains("SAMSUNG")) {type = 7;if (price < min[type]) min[type] = price;if(price > max[type]) max[type] = price;}else if (words[1].contains("vivo")) {type = 8;if (price < min[type]) min[type] = price;if(price > max[type]) max[type] = price;}else if (words[1].contains("realme")) {type = 9;if (price < min[type]) min[type] = price;if(price > max[type]) max[type] = price;}else{type = 10;if (price < min[type]) min[type] = price;if(price > max[type]) max[type] = price;}}@Overrideprotected void cleanup(Mapper<LongWritable, Text, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException {for (int i = 0; i <= 10; i ++ ){MaxAndMin2Bean keyOut = new MaxAndMin2Bean(i, min[i], max[i]);context.write(keyOut, NullWritable.get());}}
    }
    

    MaxAndMin2Reducer

    package com.csx.MaxAndMin2;
    import org.apache.hadoop.io.NullWritable;
    import org.apache.hadoop.mapreduce.Reducer;
    import java.io.IOException;public class MaxAndMin2Reducer extends Reducer<MaxAndMin2Bean, NullWritable, MaxAndMin2Bean, NullWritable> {private double[] min = {Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE, Double.MAX_VALUE};private double[] max = {Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE, Double.MIN_VALUE};@Overrideprotected void reduce(MaxAndMin2Bean key, Iterable<NullWritable> values, Reducer<MaxAndMin2Bean, NullWritable, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException {if (key.getMin() < min[key.getType()]) {min[key.getType()] = key.getMin();}if (key.getMax() > max[key.getType()]) {max[key.getType()] = key.getMax();}}@Overrideprotected void cleanup(Reducer<MaxAndMin2Bean, NullWritable, MaxAndMin2Bean, NullWritable>.Context context) throws IOException, InterruptedException {for (int i = 0; i <= 10; i ++ ){MaxAndMin2Bean keyOut = new MaxAndMin2Bean(i, min[i], max[i]);context.write(keyOut, NullWritable.get());}}
    }
    


本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!

相关文章

立即
投稿

微信公众账号

微信扫一扫加关注

返回
顶部