enron 数据可视化
思路
- 目的:统计enron数据集每周发的邮件总数
- 步骤:
- 下载数据集导入数据库
- 用nodejs查询出结果返回json
- 前端chartjs解析为图表
创建数据库
mysql -u root -p
create database enron
下载enron数据库并导入到数据库
wget http://www.ahschulz.de/pub/R/data/enron-mysqldump_v5.sql.gz
gzip -d enron-mysqldump_v5.sql.gz
mysql enron < enron-mysqldump_v5.sql
编写 sql 语句
select concat(year(date),"/",weekofyear(date)) as year_week,count(mid) from message where year(date) between 1998 and 2002 group by year_week order by date asc;
nodejs 代码
初始化项目
npm init
npm install express --save
npm install mysql --save
node app.js
代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37var express = require('express');
var mysql = require('mysql');
var config = require('./config.json');
var db = config.db;
var app = express();
var connection = mysql.createConnection({
host: db.host,
port: db.port,
user: db.user,
password: db.password,
database: db.database
});
connection.connect();
app.get('/', function(req, res){
res.header("Access-Control-Allow-Origin", "*");
var sql = 'select concat(year(date),"/",weekofyear(date)) as year_week,count(mid) from message where year(date) between 1998 and 2002 group by year_week order by date asc';
var weeklist = [];
var midlist = [];
connection.query(sql, function(err, results){
if(err){console.log("error: " + err.sqlMessage); return;}
results.forEach(function(item){
weeklist.push(item['year_week']);
midlist.push(item['count(mid)']);
});
res.json({
weeklist: weeklist,
midlist: midlist
})
});
});
app.listen(8083);
console.log('the application started at port 8083');
前端html
1 |
|
错误处理
enron message 数据表有很多错误数据,比如有的年份只有两位数,sql语句中用 where year(date) between 1998 and 2002 解决
爬取itunes某个歌手的部分单曲
利用itunes的api,访问https://itunes.apple.com/search?term=jack+johnson
term=歌手名,会返回一个json文件
编写nodejs代码
1 | var request = require('sync-request'); |
此代码会发送http请求,返回得到json数据,解析数据,提取歌手的单曲名,并存入mongodb数据库中