在前面的文章中我们已经学习了怎么来识别蜘蛛(搜素引擎的爬虫),现在我们来运用我们学习到的知识写一个简单的程序。当然你必须在你需要统计的页面引入spider.php,否则是无法统计到的哦!
一、spider.php
<?php
function?spider(){
$spider='0';//首先定义蜘蛛的默认值为0
$user_agent?=?$_SERVER['HTTP_USER_AGENT'];
$user_agent=strtolower($user_agent);
if(strpos($user_agent,'baidu.php')>='1'){$spider='0';
}elseif(strpos($user_agent,'spider_log.php')>='1'){$spider='0';
}elseif(strpos($user_agent,'spider.php')>='1'){$spider='0';
}elseif(strpos($user_agent,'google')>='1'){$spider='谷歌';
}elseif(strpos($user_agent,'bing')>='1'){$spider='必应';
}elseif(strpos($user_agent,'Slurp')>='1'){$spider='雅虎';
}elseif(strpos($user_agent,'ahrefs')>='1'){$spider='国外SEO蜘蛛';
}elseif(strpos($user_agent,'baidu')>='1'){$spider='百度';
}elseif(strpos($user_agent,'sogou')>='1'){$spider='搜狗';
}elseif(strpos($user_agent,'yahoo')>='1'){$spider='雅虎';
}elseif(strpos($user_agent,'search')>='1'){$spider='必应';
}elseif(strpos($user_agent,'yodao')>='1'){$spider='有道';
}elseif(strpos($user_agent,'mediabot')>='1'){$spider='谷歌广告';
}elseif(strpos($user_agent,'soso')>='1'){$spider='360搜搜';
}elseif(strpos($user_agent,'ia_archiver')>='1'){$spider='Alexa';
}elseif(strpos($user_agent,'bot',)>='1'){$spider='其他蜘蛛';
}elseif(strpos($user_agent,'spider')>='1'){$spider='其他蜘蛛';
}elseif(strpos($user_agent,'robot')>='1'){$spider='其他蜘蛛';
}elseif(strpos($user_agent,'spiders')>='1'){$spider='其他蜘蛛';
}
return?$spider;
}
if(spider()!='0'){
$html[0]=$spider;//蜘蛛名称
$html[1]=$_SERVER["REMOTE_ADDR"];//IP
$html[2]=$_SERVER["SERVER_PORT"];//端口
$html[3]=$_SERVER["HTTP_REFERER"];//访问页面,这里的访问页面为什么是上级页面,因为我们实在网页中使用js代码调用本文件的,如果在你网页的PHP中直接引用本程序
//这里$_SERVER["HTTP_REFERER"]需要换为$_SERVER['HTTP_HOST'].$_SERVER['REQUEST_URI'];
$html[4]=date('Y-m-d?H:i:s',time());//爬取时间
$html[5]=$_SERVER["REQUEST_METHOD"];//访问页面的请求方式
$html[6]=$_SERVER["QUERY_STRING"];//query值
$html=$html[0].'|'.$html[1].'|'.$html[2].'|'.$html[3].'|'.$html[4]."<->";
$file=date("Y-m-d",time()).'.txt';
@file_put_contents("$file",$html,?FILE_APPEND);//将蜘蛛爬取信息添加进txt文件保存到本地
}
?>
二、index.php
<?php
$file=date("Y-m-d",time()).'.txt';
@$file?=?file_get_contents("$file");//获得内容?
@$file?=?explode("<->",$file);
$x=count($file)-1;
$page=$_GET['page'];
$nums=50;
$pages=$x/$nums;
$pages=ceil($pages);
if($page==null?or?$page==0?or?$page==1){
$page=1;
$max=$nums-1;
$min=0;
}else{
$max=$page*$nums-1;
$min=($page-1)*$nums;
$uppage=$page-1;
$upurl='<a?href="http://spider.wensha.info/index.php?page='.$uppage.'">上一页</a>';
$nextpage=$page+1;
$nexturl='<a?href="http://spider.wensha.info/index.php?page='.$nextpage.'">下一页</a>';
}
if($page<$pages){
$nextpage=$page+1;
$nexturl='<a?href="http://spider.wensha.info/index.php?page='.$nextpage.'">下一页</a>';????
}
if($page>$pages){
$max=$x-1;??
$uppage=$page-1;
$upurl='<a?href="http://spider.wensha.info/index.php?page='.$uppage.'">上一页</a>';
$nexturl='';
}
if($page=$pages){
$max=$x-1;
$upurl='';
$nexturl='';
}
$b=0;
for($i=$min;$i<=$max;$i++){
$b=$b+1;
$log?=?explode("|",$file[$i]);
$echo=<<<EOT
????????<tr>
????????????<td>
???????????????? $b?、?$log[0]
????????????</td>
????????????<td>
??????????????? $log[1]
????????????</td>
????????????<td>
?????????????? $log[4]
????????????</td>
????????????<td>
??????????????? <a?href="$log[3]"?target="_blank">$log[3]</a>
????????????</td>
????????</tr>
EOT;
$html=$html.$echo;
}
?>
<!DOCTYPE?html>
<html>
<head>
?<meta?charset="UTF-8">
?<meta?name="viewport"?content="width=device-width,?initial-scale=1.0">
????<title>文煞php笔记网?-?Spider蜘蛛爬取记录</title>
????<link?rel="stylesheet"?rev="stylesheet"?href="style/main.css"?type="text/css"?media="all"/>
</head>
<body>
?<header>
??<h1>文煞PHP笔记网</h1>
?</header>
??<nav>
?????<a><strong>Spider蜘蛛爬取记录列表</strong> 时间:<?=date("Y年m月d日",time())?></a></nav>
<div>
????<table?width="100%"?class="table">
????????<thead>
????????????<tr>
????????????????<th>
????????????????????Spider名称
????????????????</th>
????????????????<th>
????????????????????SpiderIP
????????????????</th>
????????????????<th>
????????????????????访问时间
????????????????</th>
????????????????<th>
????????????????????访问页面
????????????????</th>
????????????</tr>
????????</thead>
<?=$html?>
????</table>???
????<center></ecnter>共<?=$x?>条记录 <a?href="http://spider.wensha.info/index.php">首页</a> <?=$upurl?> <?=$nexturl?> <a?href="http://spider.wensha.info/index.php?page=<?=$pages?>">尾页</a> <?=$page?>/<?=$pages?></center>
????</div>
?<footer>
?????<p><a?rel="external?nofollow"?href="http://beian.miit.gov.cn/"?target="_blank">黔ICP备2023001763号-1</a></p>
??<p>版权所有??2023???文煞PHP笔记网蜘蛛统计代码v.0.0.1</p?>
?</footer>
</body>
三、main.css
?body?{
???margin:?0;
???padding:?0;
???font-family:?Arial,?sans-serif;
???background-color:?#f2f2f2;
??}
??header?{
???background-color:?#333;
???color:?#fff;
???padding:?10px;
???text-align:?center;
??}
??nav?{
???background-color:?#ddd;
???padding:?10px;
???text-align:?center;
??}
??nav?a?{
???display:?inline-block;
???padding:?10px;
???color:?#333;
???text-decoration:?none;
??}
??section?{
???padding:?20px;
???text-align:?center;
??}
??footer?{
???background-color:?#333;
???color:?#fff;
???padding:?20px;
???text-align:?center;
??}
??@media?only?screen?and?(min-width:?600px)?{
???header?{
????padding:?20px;
???}
???nav?{
????display:?flex;
????justify-content:?space-around;
???}
???nav?a?{
????padding:?20px;
???}
???section?{
????display:?flex;
????flex-wrap:?wrap;
????justify-content:?space-around;
???}
???section?article?{
????flex-basis:?30%;
????margin-bottom:?20px;
???}
??}
table
????????{
????????????border-collapse:?collapse;
????????????margin:?0?auto;
????????}
????????table?td,?table?th
????????{
????????????border:?1px?solid?#cad9ea;
????????????color:?#666;
????????????height:?30px;
????????}
????????table?thead?th
????????{
????????????background-color:?#CCE8EB;
????????????width:?100px;
????????}
????????table?tr:nth-child(odd)
????????{
????????????background:?#fff;
????????}
????????table?tr:nth-child(even)
????????{
????????????background:?#F5FAFA;
????????}
????a{
????????color:#999999;
????????}
????a:hover{
????????color:#009900;
????????}
????a:active{
????????color:#F707EE;
????????}
提示:这里的代码虽然本站已经测试过,但是在发布的时候做了修改,你应该根据错误提示修改相关代码。
更多学习资料请看博客: