基于Node.js爬取数据的展示网站实现
一、实验要求
•基于第一个项目爬虫爬取的数据,完成数据展示网站。
•基本要求:
•1、用户可注册登录网站,非注册用户不可登录查看数据
•2、用户注册、登录、查询等操作记入数据库中的日志
•3、爬虫数据查询结果列表支持分页和排序
•4、用Echarts或者D3实现3个以上的数据分析图表展示在网站中
•5、实现一个管理端界面,可以查看(查看用户的操作记录)和管理(停用启用)注册用户。
二、实现过程
1.在mysql中创建三个张表格,包含:
- 新闻表
- 用户表
- 用户日志表
--之前的新闻数据表
CREATE TABLE `fetches` (`id_fetches` int(11) NOT NULL AUTO_INCREMENT,`url` varchar(200) DEFAULT NULL,`source_name` varchar(200) DEFAULT NULL,`source_encoding` varchar(45) DEFAULT NULL,`title` varchar(200) DEFAULT NULL,`keywords` varchar(200) DEFAULT NULL,`author` varchar(200) DEFAULT NULL,`publish_date` date DEFAULT NULL,`crawltime` datetime DEFAULT NULL,`content` longtext,`createtime` datetime DEFAULT CURRENT_TIMESTAMP,PRIMARY KEY (`id_fetches`),UNIQUE KEY `id_fetches_UNIQUE` (`id_fetches`),UNIQUE KEY `url_UNIQUE` (`url`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;--创建用户信息数据表
CREATE TABLE `crawl`.`user` (`id` INT UNSIGNED NOT NULL AUTO_INCREMENT,`username` VARCHAR(45) NOT NULL,`password` VARCHAR(45) NOT NULL,`registertime` datetime DEFAULT CURRENT_TIMESTAMP,`status` int(4),PRIMARY KEY (`id`),UNIQUE KEY `username_UNIQUE` (`username`))
ENGINE=InnoDB DEFAULT CHARSET=utf8;--记录用户的登陆,查询(具体查询语句)操作
CREATE TABLE `crawl`.`user_action` (`id` INT UNSIGNED NOT NULL AUTO_INCREMENT,`username` VARCHAR(45) NOT NULL,`request_time` VARCHAR(45) NOT NULL,`request_method` VARCHAR(20) NOT NULL,`request_url` VARCHAR(300) NOT NULL,`status` int(4),`remote_addr` VARCHAR(100) NOT NULL,PRIMARY KEY (`id`))
ENGINE=InnoDB DEFAULT CHARSET=utf8;
2.创建路由,当请求打过来时,会链接到对应的路由处理请求
这里包含了查询请求以及四张图标的绘制
var newsDAO = require('../dao/newsDAO');
var express = require('express');
var router = express.Router();var mywordcutModule = require('./wordcut.js');
var myfreqchangeModule = require('./freqchange.js');router.get('/search', function(request, response) {console.log(request.session['username']);//sql字符串和参数if (request.session['username']===undefined) {// response.redirect('/index.html')response.json({message:'url',result:'/index.html'});}else {var param = request.query;newsDAO.search(param,function (err, result, fields) {response.json({message:'data',result:result});})}
});router.get('/histogram', function(request, response) {//sql字符串和参数console.log(request.session['username']);//sql字符串和参数if (request.session['username']===undefined) {// response.redirect('/index.html')response.json({message:'url',result:'/index.html'});}else {var fetchSql = "select publish_date as x,count(publish_date) as y from fetches group by publish_date order by publish_date;";newsDAO.query_noparam(fetchSql, function (err, result, fields) {response.writeHead(200, {"Content-Type": "application/json","Cache-Control": "no-cache, no-store, must-revalidate","Pragma": "no-cache","Expires": 0});response.write(JSON.stringify({message:'data',result:result}));response.end();});}
});router.get('/pie', function(request, response) {//sql字符串和参数console.log(request.session['username']);//sql字符串和参数if (request.session['username']===undefined) {// response.redirect('/index.html')response.json({message:'url',result:'/index.html'});}else {var fetchSql = "select author as x,count(author) as y from fetches group by author;";newsDAO.query_noparam(fetchSql, function (err, result, fields) {response.writeHead(200, {"Content-Type": "application/json","Cache-Control": "no-cache, no-store, must-revalidate","Pragma": "no-cache","Expires": 0});// console.log(result);response.write(JSON.stringify({message:'data',result:result}));response.end();});}
});router.get('/line', function(request, response) {//sql字符串和参数console.log(request.session['username']);//sql字符串和参数if (request.session['username']===undefined) {// response.redirect('/index.html')response.json({message:'url',result:'/index.html'});}else {var keyword = '金融'; //也可以改进,接受前端提交传入的搜索词var fetchSql = "select content ,publish_date from fetches where content like'%" + keyword + "%' order by publish_date;";newsDAO.query_noparam(fetchSql, function (err, result, fields) {response.writeHead(200, {"Content-Type": "application/json","Cache-Control": "no-cache, no-store, must-revalidate","Pragma": "no-cache","Expires": 0});response.write(JSON.stringify({message:'data',result:myfreqchangeModule.freqchange(result, keyword)}));//result:myfreqchangeModule.freqchange(result, keyword)response.end();});}
});router.get('/wordcloud', function(request, response) {//sql字符串和参数console.log(request.session['username']);//sql字符串和参数if (request.session['username']===undefined) {// response.redirect('/index.html')response.json({message:'url',result:'/index.html'});}else {var fetchSql = "select content from fetches;";newsDAO.query_noparam(fetchSql, function (err, result, fields) {response.writeHead(200, {"Content-Type": "application/json","Cache-Control": "no-cache, no-store, must-revalidate","Pragma": "no-cache","Expires": 0});response.write(JSON.stringify({message:'data',result:mywordcutModule.wordcut(result)}));//返回处理过的数据response.end();});}
});module.exports = router;
用户路由如下,包含了用户登录、注册、登出以及管理
const { response } = require('express');
var express = require('express');
var router = express.Router();
var userDAO = require('../dao/userDAO');router.post('/login', function(req, res) {var username = req.body.username;var password = req.body.password;// var sess = req.session;userDAO.getByUsername(username, function (user) {if(user.length==0){res.json({msg:'用户不存在!请检查后输入'});}else {if(password===user[0].password ){if (username[0].status === 0){res.json({msg:'该用户已被禁用!'});}else{req.session['username'] = username;res.cookie('username', username);res.json({msg: 'ok'});}// res.json({msg:'ok'});}else{res.json({msg:'用户名或密码错误!请检查后输入'});}}});
});/* add users */
router.post('/register', function (req, res) {var add_user = req.body;// 先检查用户是否存在userDAO.getByUsername(add_user.username, function (user) {if (user.length != 0) {// res.render('index', {msg:'用户不存在!'});res.json({msg: '用户已存在!'});}else {userDAO.add(add_user, function (success) {res.json({msg: '成功注册!请登录'});})}});});// 退出登录
router.get('/logout', function(req, res, next){// 备注:这里用的 session-file-store 在destroy 方法里,并没有销毁cookie// 所以客户端的 cookie 还是存在,导致的问题 --> 退出登陆后,服务端检测到cookie// 然后去查找对应的 session 文件,报错// session-file-store 本身的bugreq.session.destroy(function(err) {if(err){res.json('退出登录失败');return;}// req.session.loginUser = null;res.clearCookie('username');res.json({result:'/index.html'});});
});router.get('/manage',function(req,res,next){var user = req.session["username"];if (user === undefined){response.json({message:'url',result:'/index.html'});}if (user != "shenxiaoqi"){console.log(user);res.json({msg:'你没有管理员权限',result:'/index.html'});}else{res.json({result:'/manage.html'});}})
module.exports = router;
管理路由如下,包含了管理员的查询、启动/禁用用户请求
- 我设置的管理员只有username = “shenxiaoqi” 一个用户,只有该用户有次权限操作。
- 从前端读取用户信息,得到其status,在注册时status设置为1,表示该用户正常,未被禁用,当管理员设置为禁用用户后,该用户的status被设置为0,不可登录。
var logDAO = require('../dao/logDAO');
var userDAO = require('../dao/userDAO');
var express = require('express');
var router = express.Router();router.get('/search', function(request, response) {if (request.session['username']===undefined) {response.json({message:'url',result:'/index.html'});} else {console.log(request.query.username); logDAO.search(request.query.username, function (result) {console.log(result);response.json({message:'data',result:result});});}
});router.get('/forbid', function(request, response){if (request.session["username"]===undefined) {response.json({message:'url',result:'/index.html'});} else {userDAO.getByUsername(request.session['username'], function (user) {if(user[0].username === "shenxiaoqi"){var usr = request.query.username;userDAO.getByUsername(usr, function(user){if(user.length == 0)response.json({message:'data',result:"该用户不存在!"})else{var new_state = 1 - user[0].status;userDAO.forbid(new_state, usr, function(result){//1是能运行,0则被禁用console.log("456789");});if(new_state===1) response.json({message:'data',result:"启动用户成功!"});else response.json({message:'data',result:"禁用用户成功"});}});} else {response.json({message:'url',result:'/index.html'});}});}
});module.exports = router;
3.JavaScript编写
当用户在前端界面发出请求,js监测到后会调用相应代码,接着链接到对应的路由处理请求,在主页包含了登录、注册、登出、管理功能。
const { response } = require('express');
var express = require('express');
var router = express.Router();
var userDAO = require('../dao/userDAO');router.post('/login', function(req, res) {var username = req.body.username;var password = req.body.password;// var sess = req.session;userDAO.getByUsername(username, function (user) {if(user.length==0){res.json({msg:'用户不存在!请检查后输入'});}else {if(password===user[0].password ){if (username[0].status === 0){res.json({msg:'该用户已被禁用!'});}else{req.session['username'] = username;res.cookie('username', username);res.json({msg: 'ok'});}// res.json({msg:'ok'});}else{res.json({msg:'用户名或密码错误!请检查后输入'});}}});
});/* add users */
router.post('/register', function (req, res) {var add_user = req.body;// 先检查用户是否存在userDAO.getByUsername(add_user.username, function (user) {if (user.length != 0) {// res.render('index', {msg:'用户不存在!'});res.json({msg: '用户已存在!'});}else {userDAO.add(add_user, function (success) {res.json({msg: '成功注册!请登录'});})}});});// 退出登录
router.get('/logout', function(req, res, next){// 备注:这里用的 session-file-store 在destroy 方法里,并没有销毁cookie// 所以客户端的 cookie 还是存在,导致的问题 --> 退出登陆后,服务端检测到cookie// 然后去查找对应的 session 文件,报错// session-file-store 本身的bugreq.session.destroy(function(err) {if(err){res.json('退出登录失败');return;}// req.session.loginUser = null;res.clearCookie('username');res.json({result:'/index.html'});});
});router.get('/manage',function(req,res,next){var user = req.session["username"];if (user === undefined){response.json({message:'url',result:'/index.html'});}if (user != "shenxiaoqi"){console.log(user);res.json({msg:'你没有管理员权限',result:'/index.html'});}else{res.json({result:'/manage.html'});}})
module.exports = router;
在manage.js中包含了查询数据、禁用/启用,分页
var app = angular.module('manage', []);
app.controller('manage_Ctrl', function ($scope, $http, $timeout) {// 控制查询页面是否显示$scope.showSearch = function () {$scope.isShow = true;$scope.isisshowresult = false;};$scope.logout = function () {// $http.get().then();$http.get("/users/logout").then(function (res) {window.location.href=res.data.result;},function (err) {$scope.msg = err.data;});};$scope.backToNews = function () {window.location.href="/news.html";};$scope.stopit = function () {console.log($scope.forbiduser);var fuser = $scope.forbiduser;var myurl = `/manage/forbid?username=${fuser}`;console.log(myurl);$http.get(myurl).then(function (res){console.log('123456');if(res.data.message=='data'){console.log("alert");alert(res.data.result);}else {window.location.href=res.data.result;}// alert(res.data);// alert(res.data.result);},function (err) {$scope.msg = err.data;console.log(err.data);});};// 查询数据$scope.search = function () {var user = $scope.user;console.log(user);var myurl = `/manage/search?username=${user}`;console.log(myurl);$http.get(myurl).then(function (res) {if(res.data.message=='data'){$scope.isisshowresult = true; //显示表格查询结果$scope.initPageSort(res.data.result)}else {window.location.href=res.data.result;}},function (err) {$scope.msg = err.data;});};// 分页$scope.initPageSort=function(item){$scope.pageSize=5; //每页显示的数据量,可以随意更改$scope.selPage = 1;$scope.data = item;$scope.pages = Math.ceil($scope.data.length / $scope.pageSize); //分页数$scope.pageList = [];//最多显示5页,后面6页之后不会全部列出页码来$scope.index = 1;var len = $scope.pages> 5 ? 5:$scope.pages;$scope.pageList = Array.from({length: len}, (x,i) => i+1);//设置表格数据源(分页)$scope.items = $scope.data.slice(0, $scope.pageSize);};//打印当前选中页$scope.selectPage = function (page) {//不能小于1大于最大(第一页不会有前一页,最后一页不会有后一页)if (page < 1 || page > $scope.pages) return;//最多显示分页数5,开始分页转换var pageList = [];if(page>2){for (var i = page-2; i <= $scope.pages && i < page+3; i++) {pageList.push(i);}}else {for (var i = page; i <= $scope.pages && i < page+5; i++) {pageList.push(i);}}$scope.index =(page-1)*$scope.pageSize+1;$scope.pageList = pageList;$scope.selPage = page;$scope.items = $scope.data.slice(($scope.pageSize * (page - 1)), (page * $scope.pageSize));//通过当前页数筛选出表格当前显示数据console.log("选择的页:" + page);};//设置当前选中页样式$scope.isActivePage = function (page) {return $scope.selPage == page;};//上一页$scope.Previous = function () {$scope.selectPage($scope.selPage - 1);};//下一页$scope.Next = function () {$scope.selectPage($scope.selPage + 1);};$scope.searchsortASC = function () {$scope.sorttime = '1';$scope.search();};$scope.searchsortDESC = function () {$scope.sorttime = '2';$scope.search();};});
4.前端展示
这里仅展示manage.html,该页面中,管理员可以通过搜索用户的名称来查询该用户的日志,也可以选择启动/禁用用户,由于启动或禁用用户我是通过设置state来完成的,因此我将其设置为一个按钮
<html ng-app="manage">
<head><meta charset="utf-8"><title>Administratortitle><link rel="stylesheet" href="https://cdn.staticfile.org/twitter-bootstrap/3.3.7/css/bootstrap.min.css"><script src="https://cdn.staticfile.org/jquery/2.1.1/jquery.min.js">script><script src="http://cdn.bootcss.com/bootstrap/3.3.4/js/bootstrap.min.js">script><script type="text/javascript" src="https://cdn.jsdelivr.net/npm/echarts@4.7.0/dist/echarts.min.js">script><script src='javascripts/dist/echarts-wordcloud.min.js'>script><script src="/angular/angular.min.js">script><script src="javascripts/manage.js" type="text/javascript">script>head>
<body ng-controller="manage_Ctrl" ng-init="isShow=false">
<nav class="navbar navbar-inverse navbar-fixed-top"><div class="container"><div class="navbar-header"><a class="navbar-brand" href="#">管理用户a>div><div id="navbar" class="navbar-collapse collapse"><ul class="nav navbar-nav"><li ><a ng-click="showSearch()">管理a>li><li ><a ng-click="backToNews()">返回Newsa>li><li><a href="#" class="dropdown-toggle" data-toggle="dropdown">账号管理<span class="caret">span>a><ul class="dropdown-menu"><li class="dropdown-header">账号li><li><a ng-click="logout()">退出登录a>li>ul>li>ul>div>div>nav>
<div ng-show="isShow" style="width: 1300px;position:relative; top:70px;left: 80px"><form class="form-horizontal" role="form"><div class="row" style="margin-bottom: 10px;"><label class="col-lg-2 control-label">查询用户label><div class="col-lg-3"><input type="text" class="form-control" placeholder="用户" ng-model="$parent.user">div>div><div class="form-group"><div class="col-md-offset-9"><button type="submit" class="btn btn-default" ng-click="search()">查询button>div>div>form><form class="form-horizontal" role="form"><div class="row" style="margin-bottom: 10px;"><label class="col-lg-2 control-label">启用/停用用户label><div class="col-lg-3"><input type="text" class="form-control" placeholder="用户" ng-model="$parent.forbiduser">div>div><div class="form-group"><div class="col-md-offset-9"><input type="reset" class="btn btn-default" ng-click="stopit()" value="启用/停用">input>div>div>form><div ng-show="isisshowresult"><table class="table table-striped"><thead><tr><td>序号td><td>用户td><td>操作时间td><td>methodtd><td>urltd><td>状态td>tr>thead><tbody><tr ng-repeat="(key, item) in items"><td>{{index+key}}td><td>{{item.username}}td><td>{{item.request_time}}td><td>{{item.request_method}}td><td>{{item.request_url}}td><td>{{item.status}}td>tr>tbody>table><div class="row"><div class="pull-left" style="margin-top: 12px;"><button type="submit" class="btn btn-primary" ng-click="searchsortASC()" >时间升序button><button type="submit" class="btn btn-primary" ng-click="searchsortDESC()">时间降序button>div><div class="pull-right"><nav><ul class="pagination"><li><a ng-click="Previous()" role="button"><span role="button">上一页span>a>li><li ng-repeat="page in pageList" ng-class="{active:isActivePage(page)}" role="button"><a ng-click="selectPage(page)" >{{ page }}a>li><li><a ng-click="Next()" role="button"><span role="button">下一页span>a>li>ul>nav>div>div>div>
div>
body>
html>
三、效果展示
1.用户注册

2.用户登录
登录成功后可以进行新闻搜索,这里列举的是标题关键词为“中国” 的搜索结果

并且可以进行分页

以及可以根据发布时间升序/降序

以及支持组合关键词查询

3.数据可视化
- 柱状图

- 饼状图

- 折线图

- 词云

4.数据库数据展示
- user表

- user_action表

5.管理界面
进入username=“shenxiaoqi” 的用户管理界面
管理员可以查询用户的操作日志以及选择启动or禁用其他用户

查询用户记录

禁用该用户

被禁用的用户登不上网站

再次启用后

该用户又可以重新登录啦
数据库变化

本文来自互联网用户投稿,文章观点仅代表作者本人,不代表本站立场,不承担相关法律责任。如若转载,请注明出处。 如若内容造成侵权/违法违规/事实不符,请点击【内容举报】进行投诉反馈!
