【校招VIP】java分析nginx网站日志

05月11日 收藏 0 评论 1 java开发

【校招VIP】java分析nginx网站日志

文章声明:转载来源:https://blog.csdn.net/weixin_44563213/article/details/108771787

自由开源,超高性能,免费的java分析nginx网站日志的工具,最终生成方便查看的网页版的网站监控报表。

自己用java分析nginx网站日志

先看效果图:

一,为什么不用第三方的网站日志分析工具

大家都熟悉的第三方的nginx网站日志分析工具,有收费的宝塔网站监控面板,有免费的goaccess和基于js的51la和百度网站统计等等。那为什么还要自己做一个网站日志分析工具呢?

第三方网站日志分析工具要么收费,要么不好用;
咱们自己做,可以根据自己的需要,对感兴趣的数据进行统计分析;
可以自己设计外观,自己喜欢什么风格就做成什么风格;
自己做不会泄露网站资料;

二,网站日志分析的基本原理

咱们nginx服务器生成的原始网站日志文件是一个文本文件,而且是非常有规律的格式化的文本文件,这样就便于我们对这个原始的网站日志文件进行处理。
第一步,就是生成结构化的数据:把原始日志文件的各个数据元素提取出来,放进sqlite数据库临时的网站统计表中。
第二步,在sqlite数据库的网站统计表中通过各种查询方法,统计出我们感兴趣的数据,把统计出的数据再存储到的sqlite数据库中的日志存档表中。
第三部,查询日志存档表,将查询结果以html网页的形式输出到网站根目录下,我们可以直接在浏览器中访问和查看。

三,下面介绍具体的算法和源代码

(一),创建sqlite数据库

package com.face1688;

import java.io.File;
import java.sql.*;

public class SHelper {
public static String dbFilePath = "site_record.db";
public static Connection cn;
public static ResultSet rs;

public static void connect() {
try {
File file = new File(dbFilePath);
boolean exist = file.exists();
if (!exist) {
file.createNewFile();
}
Class.forName("org.sqlite.JDBC");
cn = DriverManager.getConnection("jdbc:sqlite:" + dbFilePath);
if (!exist)
createTables();
else
readSettings();
} catch (Exception e) {
Log.error(e);
}
}

/**
* 数据库字段不能以数字开头。
*/
public static void createTables() {
try {
//删除表和索引
Statement ps = cn.createStatement();
ps.executeUpdate("drop index if exists index_wztjb");
ps.executeUpdate("drop table if exists wztjb");
ps.executeUpdate("drop index if exists index_pzb");
ps.executeUpdate("drop table if exists pzb");
ps.executeUpdate("drop index if exists index_rzcdb");
ps.executeUpdate("drop table if exists rzcdb");
//新建表
//配置表
ps.executeUpdate("Create table pzb(name text primary key,myvalue blob)");
//网站统计表
ps.executeUpdate("Create table if not exists wztjb(id integer primary key autoincrement,date,time,ip,request,ret_code,ret_count,enter,client)");
//索引
ps.executeUpdate("Create index if not exists index_wztjb on wztjb(id,date,ip,ret_code,ret_count,enter,client)");
//日志存档表
ps.executeUpdate("Create table if not exists rzcdb(id integer primary key autoincrement,date,pv,uv,xzl,gjs,baiduspider,smspider,qihuspider,bingbot,googlebot)");
//索引
ps.executeUpdate("Create index if not exists index_rzcdb on rzcdb(id,date)");


} catch (Exception e) {
Log.error(e);
}
}

/**
* 复位网站统计表,删除后重建
*/
public static void resetWztjb() {
try {
//删除表和索引
Statement ps = cn.createStatement();
ps.executeUpdate("drop table if exists wztjb");
ps.executeUpdate("drop index if exists index_wztjb");
//网站统计表
ps.executeUpdate("Create table if not exists wztjb(id integer primary key autoincrement,date,time,ip,request,ret_code,ret_count,enter,client)");
//索引
ps.executeUpdate("Create index if not exists index_wztjb on wztjb(id,date,ip,ret_code,ret_count,enter,client)");

} catch (Exception e) {
Log.error(e);
}
}

//region 读取配置
public static void readSettings() {

try {
PreparedStatement ps = cn.prepareStatement("select name,myvalue from pzb");
rs = ps.executeQuery();
String name;
while (rs.next()) {
name = rs.getString(1);
byte[] b = rs.getBytes(2);
switch (name) {
case "lastRow"://最新统计到的行号
ServerSettings.lastRow = BitConverter.toInt(b, 0);
break;
case "minutes"://间隔分钟
ServerSettings.minutes = BitConverter.toInt(b, 0);
if(ServerSettings.minutes ==0)ServerSettings.minutes = 5;
break;
default:
break;
}

}
rs.close();
} catch (Exception e) {
Log.error(e);
}
//加载默认设置

}

private static void UpdateSettingsByName(String name, byte[] value) {
try {
PreparedStatement ps = cn.prepareStatement("insert or replace into pzb values(?,?)");
ps.setString(1, name);
ps.setBytes(2, value);
ps.executeUpdate();
} catch (Exception e) {
Log.info(e.getMessage());
}
}

private static void Write(String name, byte[] value) {
UpdateSettingsByName(name, value);
}

public static void SaveSettingsByName(String name, String value) {
byte[] bv = value.getBytes();
Write(name, bv);
}

public static void SaveSettingsByName(String name, int value) {
byte[] bv = BitConverter.getBytes(value);
Write(name, bv);
}
}

(二)核心分析代码

package com.face1688;

import java.io.*;
import java.sql.CallableStatement;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.*;

public class Main {
public static boolean appRun = true;
public static int startTime;
public static boolean busy = false;
static PreparedStatement psInsert;

public static void main(String[] args) throws Exception {
SHelper.connect();
psInsert = SHelper.cn.prepareStatement("insert into wztjb(date,time,ip,request,ret_code,ret_count,enter,client) values(?,?,?,?,?,?,?,?)");
startTime = ServerTime.getShortTime();
Log.info("日志分析服务正在运行");
new Thread(new Runnable() {
public void run() {
while (appRun) {
try {
busy = true;
SHelper.cn.setAutoCommit(false);
readFromLine(ServerSettings.lastRow);
saveSiteLog();
updateHtmlReport();
SHelper.cn.commit();
Log.info("更新完成");
busy = false;
Thread.sleep(1000 * ServerSettings.minutes*60);//更新频率,10分钟,随便设置
//Thread.sleep(1000 * 10);//更新频率,10分钟,随便设置

} catch (Exception e) {
Log.info(e.getMessage());
busy = false;
break;
}
}
}
}).start();

Controller.run();//显示控制台
}

/**
* 从指定的行开始读取到结束,并且保持最后的行号。
*
* @param row 行号
*/
public static void readFromLine(int row) throws Exception {

Log.info("开始读取,行号"+row);
SHelper.resetWztjb();
String fileName = "/www/wwwlogs/face1688.com.log";
//String fileName = "face1688.com.log";
FileReader fileReader = new FileReader(fileName);
LineNumberReader lineNumberReader = new LineNumberReader(fileReader);
int n = 0;
String txt;

while (true) {
if (n < row)
lineNumberReader.readLine();
else {
txt = lineNumberReader.readLine();
if (txt == null) {
ServerSettings.lastRow = n;
SHelper.SaveSettingsByName("lastRow", n);//保存当前的行,下次从这里读取。

Log.info("读取结束,最新行号"+n);
break;
}

writeToTempTable(txt);
//Log.info("读取一行,行号"+n);
}
++n;
}

}

static PVItem pvItem = new PVItem();


/**
* 将读取的记录写入数据库临时表,以便统计分析。
*
* @param sitetxt
*/
public static void writeToTempTable(String sitetxt) {
//以下是一个pv的各个元素在字符串的索引
int ip1 = 0;
int ip2 = 0;
int date1 = 0;
int date2 = 0;
int time1 = 0;
int time2 = 0;
int request1 = 0;
int request2 = 0;
int ret_code1 = 0;
int ret_code2 = 0;
int ret_count1 = 0;
int ret_count2 = 0;
int enter1 = 0;
int enter2 = 0;
int client1 = 0;
int client2 = 0;
int index = 0;
int total = sitetxt.length();
while (index < total) {
char a = sitetxt.charAt(index);
if (a == '-') {
if (ip2 > 0) continue;//已经取到ip2,则这个-可能是url中的-连接符,需要忽略掉。
if (sitetxt.charAt(index + 2) == '-') {
char s = sitetxt.charAt(index + 33);
if (s != '"') break;//意料之外错误退出
//PVItem pvItem = new PVItem();
request1 = index + 34;//请求方法的开始位置也找到了。
ip2 = index - 1;
pvItem.ip = sitetxt.substring(ip1, ip2);
//取到ip2,则日期和时间也取到了,因为它们的格式的固定的。
date1 = index + 5;
date2 = index + 16;
pvItem.date = sitetxt.substring(date1, date2);
time1 = index + 17;
time2 = index + 25;
pvItem.time = sitetxt.substring(time1, time2);
index += 34;//直接跳过已经取到的ip和日期时间,直接到get和post区域。
//取get方法的下一个引号
ok:
while (index < total) {//查找请求方法结束位置
if (sitetxt.charAt(index) == '"') {
request2 = index;//请求方法的结束位置
pvItem.request = sitetxt.substring(request1, request2);
//查找服务器返回的结果位置
ret_code1 = index + 2;
ret_code2 = index + 5;
pvItem.ret_code = sitetxt.substring(ret_code1, ret_code2);
ret_count1 = index + 6;
//查找服务器返回字节数的结束位置
index += 6;
while (index < total) {
if (sitetxt.charAt(index) == ' ') {
ret_count2 = index;//
pvItem.ret_count = sitetxt.substring(ret_count1, ret_count2);
enter1 = ret_count2 + 2;
//查找来源网址的结束位置
index = enter1 + 1;
while (index < total) {
if (sitetxt.charAt(index) == '"') {
enter2 = index;
pvItem.enterPage = sitetxt.substring(enter1, enter2);
client1 = enter2 + 3;
index = client1 + 1;
//查找客户端浏览器的结束位置
while (index < total) {
if (sitetxt.charAt(index) == '"') {
client2 = index;
pvItem.client = sitetxt.substring(client1, client2);
ip1 = index + 2;
ip2 = 0;
//pvItems.add(pvItem);
index += 2;
break ok;
} else ++index;
}
} else ++index;
}
} else ++index;
}
} else ++index;
}
} else ++index;

} else ++index;

//++index;
}

try {
//CallableStatement psInsert= SHelper.cn.prepareCall("insert into wztjb(date,time,ip,request,ret_code,ret_count,enter,client) values(?,?,?,?,?,?,?,?)");

psInsert.setString(1, pvItem.date);
psInsert.setString(2, pvItem.time);
psInsert.setString(3, pvItem.ip);
psInsert.setString(4, pvItem.request);
psInsert.setString(5, pvItem.ret_code);
psInsert.setString(6, pvItem.ret_count);
psInsert.setString(7, pvItem.enterPage);
psInsert.setString(8, pvItem.client);
psInsert.executeUpdate();
//psInsert.close();
//SHelper.cn.commit();
} catch (Exception e) {
//Log.info(e.getMessage());
}


//System.out.println(pvItems.size());
}

/**
* 统计分析网站日志,并将结果存档。
*/
public static void saveSiteLog() throws Exception {
PreparedStatement ps;
ResultSet rs;
String date;
ArrayList<String> dates = new ArrayList<>();
dayLogHashMap.clear();
//分析pv
//分析uv
ps = SHelper.cn.prepareStatement("select date,count(ip), count(distinct ip) from wztjb group by date");
rs = ps.executeQuery();
while (rs.next()) {
date = rs.getString(1);
dates.add(date);
getDayLog(date);
dayLog.pv = rs.getInt(2);
dayLog.uv = rs.getInt(3);
}
rs.close();
ps.close();
//下载量
ps = SHelper.cn.prepareStatement("select date, count(id) from wztjb where request like '%baitanbao.apk%' group by date");
rs = ps.executeQuery();
while (rs.next()) {
date = rs.getString(1);
getDayLog(date);
dayLog.xzl = rs.getInt(2);
}
rs.close();
ps.close();
//攻击数
ps = SHelper.cn.prepareStatement("select date, count(distinct ip) from wztjb where request like '%wp-login%' group by date");
rs = ps.executeQuery();
while (rs.next()) {
date = rs.getString(1);
getDayLog(date);
dayLog.gjs = rs.getInt(2);
}
rs.close();
ps.close();
//百度蜘蛛
ps = SHelper.cn.prepareStatement("select date,count(ip) from wztjb where client like '%baiduspider%' group by date");
rs = ps.executeQuery();
while (rs.next()) {
date = rs.getString(1);
getDayLog(date);
dayLog.baiduspider = rs.getInt(2);
}
rs.close();
ps.close();
//神马蜘蛛
ps = SHelper.cn.prepareStatement("select date,count(ip) from wztjb where client like '%yisouspider%' group by date");
rs = ps.executeQuery();
while (rs.next()) {
date = rs.getString(1);
getDayLog(date);
dayLog.smspider = rs.getInt(2);
}
rs.close();
ps.close();
//360蜘蛛
ps = SHelper.cn.prepareStatement("select date,count(ip) from wztjb where client like '%360spider%' group by date");
rs = ps.executeQuery();
while (rs.next()) {
date = rs.getString(1);
getDayLog(date);
dayLog.qihuspider = rs.getInt(2);
}
rs.close();
ps.close();
//必应蜘蛛
ps = SHelper.cn.prepareStatement("select date,count(ip) from wztjb where client like '%Sogou web spider%' group by date");
rs = ps.executeQuery();
while (rs.next()) {
date = rs.getString(1);
getDayLog(date);
dayLog.bingbot = rs.getInt(2);
}
rs.close();
ps.close();
//谷歌蜘蛛
ps = SHelper.cn.prepareStatement("select date,count(ip) from wztjb where client like '%googlebot%' group by date");
rs = ps.executeQuery();
while (rs.next()) {
date = rs.getString(1);
getDayLog(date);
dayLog.googlebot = rs.getInt(2);
}
rs.close();
ps.close();
//存档
int id = 0;
for (String day : dates) {
DayLog dayLog = dayLogHashMap.get(day);
ps = SHelper.cn.prepareStatement("select id from rzcdb where date=?");
ps.setString(1, day);
rs = ps.executeQuery();
if (rs.next()) {
id = rs.getInt(1);
}
rs.close();
ps.close();
if (id > 0) {//更新
ps = SHelper.cn.prepareStatement("update rzcdb set pv=pv+?,uv=uv+?,xzl=xzl+?,gjs=gjs+?,baiduspider=baiduspider+?,smspider=smspider+?,qihuspider=qihuspider+?,bingbot=bingbot+?,googlebot=googlebot+? where date=?");
ps.setInt(1, dayLog.pv);
ps.setInt(2, dayLog.uv);
ps.setInt(3, dayLog.xzl);
ps.setInt(4, dayLog.gjs);
ps.setInt(5, dayLog.baiduspider);
ps.setInt(6, dayLog.smspider);
ps.setInt(7, dayLog.qihuspider);
ps.setInt(8, dayLog.bingbot);
ps.setInt(9, dayLog.googlebot);
ps.setString(10, day);
ps.executeUpdate();
} else {//插入
ps = SHelper.cn.prepareStatement("insert into rzcdb(date,pv,uv,xzl,gjs,baiduspider,smspider,qihuspider,bingbot,googlebot)values(?,?,?,?,?,?,?,?,?,?)");
ps.setString(1, day);
ps.setInt(2, dayLog.pv);
ps.setInt(3, dayLog.uv);
ps.setInt(4, dayLog.xzl);
ps.setInt(5, dayLog.gjs);
ps.setInt(6, dayLog.baiduspider);
ps.setInt(7, dayLog.smspider);
ps.setInt(8, dayLog.qihuspider);
ps.setInt(9, dayLog.bingbot);
ps.setInt(10, dayLog.googlebot);
ps.executeUpdate();
}
ps.close();
}

}

static HashMap<String, DayLog> dayLogHashMap = new HashMap<>();
static DayLog dayLog;

public static DayLog getDayLog(String date) {
if (dayLogHashMap.containsKey(date))
dayLog = dayLogHashMap.get(date);
else {
dayLog = new DayLog();
dayLogHashMap.put(date, dayLog);
}
return dayLog;
}

/**
* 更新网页报表,展示最近30天的数据
*/
public static void updateHtmlReport()throws Exception{
// File report = new File("report.html");
// if(!report.exists())report.createNewFile();
//FileReader fileReader = new FileReader(report);
StringBuilder sb = new StringBuilder();
//String reportFile="report.html";
String reportFile="/data/face1688/report.html";
PrintStream printStream = new PrintStream(new FileOutputStream(reportFile),true,"utf-8");

sb.append("<!DOCTYPE html>");
sb.append("<html><head>");
sb.append("<meta charset=\"utf-8\" />");
sb.append("<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"/>");
sb.append("<title>网站报表</title>");
sb.append("<style type=\"text/css\">");
sb.append("#th1{background-color:rgb(160, 160, 160);color:rgb(17, 17, 12);font-size:125%;border-bottom-style:dashed;}");
sb.append("#th2{background-color:rgb(203, 226, 170);color:rgb(8, 8, 8);font-family:\"宋体\";border-bottom-style:dashed;}");
sb.append("#th3{background-color:rgb(247, 247, 209);color:rgb(10, 10, 10);font-family:\"宋体\";border-bottom-style:dashed;}");
sb.append("#tr1{background-color:rgb(138, 156, 144);color:rgb(0, 0, 0);font-family:\"宋体\";}");
sb.append(".tr1{background-color:rgb(203, 226, 170);}");
sb.append(".tr2{background-color:rgb(247, 247, 209);}");

sb.append("TABLE{border-collapse:collapse;border-left:solid 1 #000000; border-top:solid 1 #000000;padding:5px;width:80%;}");
sb.append("TH{border-right:solid 1 #000000;border-bottom:solid 1 #000000;}");
sb.append("TD{font:normal;border-right:solid 1 #000000;border-bottom:solid 1 #000000;}");
sb.append("</style></head>");
sb.append("<body bgcolor=\"#FFF8DC\">");
sb.append("<div align=\"center\">");
sb.append("<table border=1");
sb.append("<tr\"><th id=\"th1\" colspan=10>168网站监控报表v1.02</th></tr>");
sb.append("<tr><th id=\"th2\"colspan=10>自由开源,超高性能,www.face1688.com</th></tr>");
sb.append("<tr><th id=\"th3\" colspan=10>"+ServerTime.getShortDateTime()+"——每"+ServerSettings.minutes+"分钟更新一次</th></tr>");

sb.append("<tr id=\"tr1\">");
sb.append("<th>日期</th>");
sb.append("<th>PV</th>");
sb.append("<th>UV</th>");
sb.append("<th>下载</th>");
sb.append("<th>攻击</th>");
sb.append("<th>百度</th>");
sb.append("<th>神马</th>");
sb.append("<th>奇虎</th>");
sb.append("<th>搜狗</th>");
sb.append("<th>谷歌</th>");
sb.append("</tr>");



PreparedStatement ps = SHelper.cn.prepareStatement("select date,pv,uv,xzl,gjs,baiduspider,smspider,qihuspider,bingbot,googlebot from rzcdb order by id desc limit 30");
ResultSet rs = ps.executeQuery();
int index = 0;
while(rs.next()){
if((index++&1)==1)
sb.append("<tr class=\"tr2\"><td>"+rs.getString(1)+"</td>");
else
sb.append("<tr class=\"tr1\"><td>"+rs.getString(1)+"</td>");

sb.append("<td>"+rs.getInt(2)+"</td>");
sb.append("<td>"+rs.getInt(3)+"</td>");
sb.append("<td>"+rs.getInt(4)+"</td>");
sb.append("<td>"+rs.getInt(5)+"</td>");
sb.append("<td>"+rs.getInt(6)+"</td>");
sb.append("<td>"+rs.getInt(7)+"</td>");
sb.append("<td>"+rs.getInt(8)+"</td>");
sb.append("<td>"+rs.getInt(9)+"</td>");
sb.append("<td>"+rs.getInt(10)+"</td></tr>");
}
rs.close();
ps.close();
sb.append("</table>");
sb.append("</div></body></html>");
printStream.println(sb.toString());



}

public static void readfile() throws Exception {

String fileName = "face1688.com.log";
FileReader fileReader = new FileReader(fileName);
int num = 0;
char c = ' ';
long t1 = System.currentTimeMillis();

t1 = System.currentTimeMillis();
LineNumberReader lineNumberReader = new LineNumberReader(fileReader);
lineNumberReader.skip(Long.MAX_VALUE);
int lines = lineNumberReader.getLineNumber();

System.out.println(lines + 1);
System.out.println(System.currentTimeMillis() - t1);


// BufferedReader bufferedReader = new BufferedReader(new FileReader(fileName));
// String s;
// while ((s = bufferedReader.readLine()) != null) {
// System.out.println(s);
// }

}

public static void readAll() throws Exception {
String sitetxt =
"112.87.85.237 - - [18/Sep/2020:20:20:18 +0800] \"POST /wp-admin/admin-ajax.php HTTP/1.1\" 200 58 \"http://www.face1688.com/wp-admin/admin.php?page=cs-framework\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:21:18 +0800] \"POST /wp-admin/admin-ajax.php HTTP/1.1\" 200 58 \"http://www.face1688.com/wp-admin/admin.php?page=cs-framework\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:21:22 +0800] \"POST /wp-admin/options.php HTTP/1.1\" 302 5 \"http://www.face1688.com/wp-admin/admin.php?page=cs-framework\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:21:23 +0800] \"GET /wp-admin/edit.php?post_type=page HTTP/1.1\" 200 29758 \"http://www.face1688.com/wp-admin/admin.php?page=cs-framework\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:21:23 +0800] \"GET /wp-admin/admin.php?page=cs-framework&settings-updated=true HTTP/1.1\" 200 42551 \"http://www.face1688.com/wp-admin/admin.php?page=cs-framework\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:21:24 +0800] \"POST /wp-admin/admin-ajax.php HTTP/1.1\" 200 58 \"http://www.face1688.com/wp-admin/edit.php?post_type=page\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:21:27 +0800] \"GET /wp-admin/post.php?post=3&action=edit HTTP/1.1\" 200 42268 \"http://www.face1688.com/wp-admin/edit.php?post_type=page\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:21:43 +0800] \"POST /wp-admin/admin-ajax.php HTTP/1.1\" 200 109 \"http://www.face1688.com/wp-admin/post.php?post=3&action=edit\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:21:57 +0800] \"POST /wp-admin/post.php HTTP/1.1\" 302 5 \"http://www.face1688.com/wp-admin/post.php?post=3&action=edit\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:21:58 +0800] \"GET /wp-admin/post.php?post=3&action=edit&message=1 HTTP/1.1\" 200 42479 \"http://www.face1688.com/wp-admin/post.php?post=3&action=edit\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:21:58 +0800] \"POST /wp-admin/admin-ajax.php HTTP/1.1\" 200 109 \"http://www.face1688.com/wp-admin/post.php?post=3&action=edit\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:21:59 +0800] \"GET /wp-admin/admin-ajax.php?action=oembed-cache&post=3 HTTP/1.1\" 200 32 \"http://www.face1688.com/wp-admin/post.php?post=3&action=edit\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:22:00 +0800] \"GET /privacy-policy HTTP/1.1\" 200 3873 \"http://www.face1688.com/336.html\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:23:21 +0800] \"POST /wp-admin/admin-ajax.php HTTP/1.1\" 200 109 \"http://www.face1688.com/wp-admin/post.php?post=3&action=edit\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:23:22 +0800] \"POST /wp-admin/admin-ajax.php HTTP/1.1\" 200 32 \"http://www.face1688.com/wp-admin/post.php?post=3&action=edit\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:23:26 +0800] \"GET / HTTP/1.1\" 200 6443 \"-\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"220.181.108.92 - - [18/Sep/2020:20:31:00 +0800] \"GET / HTTP/1.1\" 200 6440 \"-\" \"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)\"\n" +
"220.181.108.154 - - [18/Sep/2020:20:31:01 +0800] \"GET / HTTP/1.1\" 200 6440 \"-\" \"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)\"\n" +
"116.179.32.37 - - [18/Sep/2020:20:31:01 +0800] \"GET /dtmr HTTP/1.1\" 200 4014 \"-\" \"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:35:29 +0800] \"GET / HTTP/1.1\" 200 6443 \"-\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:35:58 +0800] \"GET /336.html HTTP/1.1\" 200 7474 \"http://www.face1688.com/\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:36:02 +0800] \"GET /wp-admin/post.php?post=336&action=edit HTTP/1.1\" 200 46647 \"http://www.face1688.com/336.html\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:36:08 +0800] \"GET /wp-admin/edit.php?post_type=page HTTP/1.1\" 200 29759 \"http://www.face1688.com/wp-admin/post.php?post=336&action=edit\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:36:08 +0800] \"POST /wp-admin/admin-ajax.php HTTP/1.1\" 200 32 \"http://www.face1688.com/wp-admin/post.php?post=336&action=edit\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:36:08 +0800] \"POST /wp-admin/admin-ajax.php HTTP/1.1\" 200 58 \"http://www.face1688.com/wp-admin/edit.php?post_type=page\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:36:12 +0800] \"GET /wp-admin/post.php?post=3&action=edit HTTP/1.1\" 200 42322 \"http://www.face1688.com/wp-admin/edit.php?post_type=page\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:36:22 +0800] \"GET /wp-admin/admin.php?page=cs-framework HTTP/1.1\" 200 42510 \"http://www.face1688.com/wp-admin/post.php?post=3&action=edit\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:36:22 +0800] \"POST /wp-admin/admin-ajax.php HTTP/1.1\" 200 32 \"http://www.face1688.com/wp-admin/post.php?post=3&action=edit\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:37:17 +0800] \"POST /wp-admin/options.php HTTP/1.1\" 302 5 \"http://www.face1688.com/wp-admin/admin.php?page=cs-framework\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:37:18 +0800] \"GET /wp-admin/admin.php?page=cs-framework&settings-updated=true HTTP/1.1\" 200 42554 \"http://www.face1688.com/wp-admin/admin.php?page=cs-framework\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:37:23 +0800] \"GET / HTTP/1.1\" 200 6444 \"-\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"114.99.20.167 - - [18/Sep/2020:20:38:40 +0800] \"GET / HTTP/1.1\" 200 6531 \"https://www.baidu.com/link?url=HrlLPxfvG8eSPt7Qa8kkTsdxNoEwSYcCEfxYvoox0uSJQYm8P6M0ZZa10FzwBDjH&wd=&eqid=fa6611f9000910bd000000065f64aa47\" \"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 Safari/537.36 Core/1.53.4482.400 QQBrowser/9.7.12906.400\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:54:04 +0800] \"GET / HTTP/1.1\" 200 6444 \"-\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:54:17 +0800] \"GET /download/goldface/baitanbao.apk HTTP/1.1\" 304 0 \"http://www.goldface.vip/\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:20:56:05 +0800] \"GET / HTTP/1.1\" 200 6531 \"-\" \"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"122.230.17.111 - - [18/Sep/2020:21:02:59 +0800] \"GET / HTTP/1.1\" 304 0 \"http://www.baidu.com/link?url=JOiVlowB30hTG6NHPE_y6CXpJmXWHCF4f2UQJwVbRcWo4qF6Spx8glBZilR3QCdg&wd=&eqid=f2b13eaf0003a118000000045f64aff8\" \"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; Creative AutoUpdate v1.41.09; SE 2.X MetaSr 1.0; rv:11.0) like Gecko\"\n" +
"49.7.21.114 - - [18/Sep/2020:21:18:50 +0800] \"GET / HTTP/1.1\" 301 162 \"-\" \"Sogou web spider/4.0(+http://www.sogou.com/docs/help/webmasters.htm#07)\"\n" +
"112.87.85.237 - - [18/Sep/2020:21:31:40 +0800] \"GET /btzj HTTP/1.1\" 200 5631 \"http://www.face1688.com/\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n" +
"112.87.85.237 - - [18/Sep/2020:21:31:41 +0800] \"GET /wp-admin/edit-tags.php?taxonomy=category HTTP/1.1\" 302 5 \"http://www.face1688.com/wp-admin/nav-menus.php\" \"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36\"\n";


//String[] txtItems = sitetxt.split("\n");
ArrayList<PVItem> pvItems = new ArrayList<>();
//以下是一个pv的各个元素在字符串的索引
int ip1 = 0;
int ip2 = 0;
int date1 = 0;
int date2 = 0;
int time1 = 0;
int time2 = 0;
int request1 = 0;
int request2 = 0;
int ret_code1 = 0;
int ret_code2 = 0;
int ret_count1 = 0;
int ret_count2 = 0;
int enter1 = 0;
int enter2 = 0;
int client1 = 0;
int client2 = 0;
int index = 0;
int total = sitetxt.length();

while (index < total) {
char a = sitetxt.charAt(index);
if (a == '-') {
if (ip2 > 0) continue;//已经取到ip2,则这个-可能是url中的-连接符,需要忽略掉。
if (sitetxt.charAt(index + 2) == '-') {
char s = sitetxt.charAt(index + 33);
if (s != '"') break;//意料之外错误退出
PVItem pvItem = new PVItem();
request1 = index + 34;//请求方法的开始位置也找到了。
ip2 = index - 1;
pvItem.ip = sitetxt.substring(ip1, ip2);
//取到ip2,则日期和时间也取到了,因为它们的格式的固定的。
date1 = index + 5;
date2 = index + 16;
pvItem.date = sitetxt.substring(date1, date2);
time1 = index + 17;
time2 = index + 25;
pvItem.time = sitetxt.substring(time1, time2);
index += 34;//直接跳过已经取到的ip和日期时间,直接到get和post区域。
//取get方法的下一个引号
ok:
while (index < total) {//查找请求方法结束位置
if (sitetxt.charAt(index) == '"') {
request2 = index;//请求方法的结束位置
pvItem.request = sitetxt.substring(request1, request2);
//查找服务器返回的结果位置
ret_code1 = index + 2;
ret_code2 = index + 5;
pvItem.ret_code = sitetxt.substring(ret_code1, ret_code2);
ret_count1 = index + 6;
//查找服务器返回字节数的结束位置
index += 6;
while (index < total) {
if (sitetxt.charAt(index) == ' ') {
ret_count2 = index;//
pvItem.ret_count = sitetxt.substring(ret_count1, ret_count2);
enter1 = ret_count2 + 2;
//查找来源网址的结束位置
index = enter1 + 1;
while (index < total) {
if (sitetxt.charAt(index) == '"') {
enter2 = index;
pvItem.enterPage = sitetxt.substring(enter1, enter2);
client1 = enter2 + 3;
index = client1 + 1;
//查找客户端浏览器的结束位置
while (index < total) {
if (sitetxt.charAt(index) == '"') {
client2 = index;
pvItem.client = sitetxt.substring(client1, client2);
ip1 = index + 2;
ip2 = 0;
pvItems.add(pvItem);
index += 2;
break ok;
} else ++index;
}
} else ++index;
}
} else ++index;
}
} else ++index;
}
} else ++index;

} else ++index;

//++index;
}
//PVItem pvItem = pvItems.get(0);
//SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd/m/yyyy HH:mm:ss");
//Date date = simpleDateFormat.parse(pvItem.date+" "+pvItem.time);
//long t = date.getTime();
for (PVItem pvItem : pvItems) {
try {
PreparedStatement ps = SHelper.cn.prepareStatement("insert into wztjb(date,time,ip,request,ret_code,ret_count,enter,client) values(?,?,?,?,?,?,?,?)");
ps.setString(1, pvItem.date);
ps.setString(2, pvItem.time);
ps.setString(3, pvItem.ip);
ps.setString(4, pvItem.request);
ps.setString(5, pvItem.ret_code);
ps.setString(6, pvItem.ret_count);
ps.setString(7, pvItem.enterPage);
ps.setString(8, pvItem.client);
ps.executeUpdate();
} catch (Exception e) {
}
}


System.out.println(pvItems.size());

//readfile();
//将默认的nginx日期格式转为普通的日期格式
/*SimpleDateFormat simpleDateFormat = new SimpleDateFormat("dd/MMMM/yyyy:HH:mm:ss", Locale.ENGLISH);
Date date = simpleDateFormat.parse("18/Sep/1998:20:21:18");
SimpleDateFormat s2 = new SimpleDateFormat("yy-MM-dd HH:mm:ss");
String sj = s2.format(date);
System.out.println(sj);*/
}

}

四,测试环境和部署

我是在Winows7的电脑上开发的,在电脑上开发,相关的日志文件和输出的网页报表文件可以就放在项目文件夹下,这样便于调试。
部署环境是Linux服务器,宝塔面板,nginx环境。
如果linux服务器没有java运行环境,需要linux安装jdk1.8。
在电脑上测试好后,需要把相关文件路径改成你自己网站上相关文件的具体路径。然后编译输出为可执行的jar文件。
因为java是跨平台的,所以咱们最终得到jar文件在linux和windows云服务器上都可以运行。
咱们这个分析的是nginx网站日志,有兴趣的可以分析apache的网站日志,原理是一样的。

好了,用java分析nginx网站日志,并生成网站监控报表,到此就全部讲完了。

C 1条回复 评论
清歌

这问题真不好答

发表于 2022-07-20 11:03:23
1 1
Alkali :

拉开距离拉土

发表于 2022-07-20 11:03:23
回复