跟昨晚的768.cc爬行差不多,因为昨晚已经爬了一些数据,所以插入数据库函数做了点修改
2个脚本其实大同小异,用到的函数并不多1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108'0768114.net网址大全爬行脚本
'For WebSpider2
'梦游的猫
'创建时间 2006年02月25日 01:11:01
'最后修改 2006年02月25日 02:29:31
'Option Explicit
Set conn = CreateObject("ADODB.Connection")
MyConn="Provider=SQLOLEDB.1;Persist Security InFso=true;Data Source=127.0.0.1;Initial Catalog='WebSpider';User ID='spider';Password='spider';CONNect Timeout=30"
Conn.Open MyConn
Sub Main
urlMain = "http://0768114.net/"
CLSLog()
CLSTMPXML()
Call Spider.OpenURL(Document,urlMain)
tempI = 0
For Each it in Document.getElementsByTagName("table")
if it.id = "AutoNumber7" then
if tempI = 1 then
For Each itA in it.getElementsByTagName("a")
if instr(itA.href,"sc.asp?classid") > 0 then
showlog trim(itA.innerHtml)
myUrl = myUrl & "||" & itA.href
myType = myType & "||" & trim(itA.innerHtml)
end if
next
exit for
end if
tempI = tempI + 1
end if
next
urlArr = split(myUrl,"||")
typeArr = split(myType,"||")
for i = 0 to ubound(urlArr)
if instr(urlArr(i) , "http://0768114.net/") > 0 then getPage urlArr(i),typeArr(i)
next
End Sub
Function getPage(myUrl,myType)
Call Spider.OpenURL(Document,myUrl)
html = document.body.innerHtml
type2 = trim(getStr(html,"本分类的下级分类:** ","**"))
' if type2 = "无下级分类" then type2 = null
page = trim(getStr(html,"页/共","页 总计"))
for i = 1 to page
if i > 1 then Call Spider.OpenURL(Document,myUrl & "&pn=" & i)
tempI = 0
For Each it in Document.getElementsByTagName("table")
if it.id = "AutoNumber7" then
if tempI = 3 then
For Each itA in it.getElementsByTagName("a")
writeTMPXML myType & " >> " & type2 & " : " & trim(itA.innerHtml) & chr(9) & itA.href & vbCrlf
saveDate myType,type2,trim(itA.innerHtml),itA.href
next
exit for
end if
tempI = tempI + 1
end if
next
next
End Function
Function getStr(str,str1,str2) '返回str1和str2之间的字符串
if isNull(str1) then str1 = ""
if isNull(str2) then str2 = ""
pStart = instr(str,str1) + len(str1)
if pStart > 0 then
pEnd = instr(pStart,str,str2)
if str2 = "" then
getStr = mid(str,pStart)
elseif pEnd > 0 then
getStr = mid(str,pStart,pEnd – pStart)
else
getStr = mid(str,pStart)
end if
else
pEnd = instr(str,str2)
if pEnd > 0 then
getStr = mid(str,0,pEnd – len(str))
else
getStr = getStr
end if
end if
end function
function saveDate(type1,type2,name,url)
url = replace(url,"'","")
if left(url,7) <> "http://" then url = "http://" & url
set rs = CreateObject("ADODB.RecordSet")
sql = "select * from czNetURL where url = '" & url & "'"
rs.open sql,conn,1,3
if rs.eof then
rs.addNew
end if
if isNull(rs("type1")) or isEmpty(rs("type1")) then rs("type1") = trim(type1)
if isNull(rs("type2")) or isEmpty(rs("type2")) then rs("type2") = trim(type2)
if isNull(rs("name")) or isEmpty(rs("name")) then rs("name") = trim(name)
rs("url") = trim(url)
rs.update
rs.close
set rs = Nothing
end Function