百度,不用介绍,大家都熟悉。
百度的搜索功能,也不用介绍,大家也都熟悉。
当我们需要对一批数据进行百度搜索时,可能我们会选择逐个输入到百度搜索框中进行搜索,这个是非常耗时而且低效的。
其实可以通过网抓的代码实现批量百度搜索。
本文介绍用WinHttpRequest对象进行百度搜索,返回百度搜索第一页的网页源代码:
Sub QQ1722187970() Dim oWK As Worksheet Set oWK = Sheet1 iRow = oWK.Range("a65536").End(xlUp).Row + 1 Dim oHtml As Object Set oHtml = VBA.CreateObject("WinHttp.WinHttpRequest.5.1") Dim sUrl As String Dim sCharset As String sCharset = "utf-8" For i = 2 To iRow 'A列为要查找的关键词 sText = oWK.Cells(i, "a") sUrl = "http://www.baidu.com/s?wd=" & sText With oHtml .Open "GET", sUrl, False .send '获取返回的字节数组 bResult = .ResponseBody '将源代码显示出来,同时获取源代码的文本 sResult = Byte2Txt(bResult, sCharset) End With Next i End Sub Function HtmlBasic(ByVal sVerb As String, ByVal sUrl As String, Optional ByVal sCharset As String = "utf-8", Optional ByVal sPostData As String = "") 'sVerb为发送的Html请求的方法,sUrl为具体的网址,sCharset为网址对应的字符集编码,sPostData为Post方法对应的发送body Dim oHtml As Object 'https://msdn.microsoft.com/en-us/library/windows/desktop/aa384106(v=vs.85).aspx Set oHtml = VBA.CreateObject("WinHttp.WinHttpRequest.5.1") With oHtml Select Case sVerb Case "GET" .Open "GET", sUrl, False Case "POST" .Open "POST", sUrl, False .SetRequestHeader "Content-Type", "application/x-www-form-urlencoded" End Select .send (sPostData) '获取返回的字节数组 bResult = .ResponseBody '按照指定的字符编码显示 sResult = Byte2String(bResult, sCharset) Debug.Print sResult HtmlBasic = sResult End With Set oHtml = Nothing End Function '将抓取网页的返回字节转变为文本 Function Byte2String(bContent, ByVal sCharset As String) Const adTypeBinary = 1 Const adTypeText = 2 Const adModeRead = 1 Const adModeWrite = 2 Const adModeReadWrite = 3 Dim oStream As Object '创建流对象 Set oStream = CreateObject("ADODB.Stream") With oStream '打开流 .Open '设置为字节模式 .Type = adTypeBinary '写入字节 .write bContent '将位置定位在第一个字节 .Position = 0 '设置为文本模式 .Type = adTypeText '设置编码的字符集 .Charset = sCharset '读取编码后的文本 Byte2String = .ReadText '关闭流对象 .Close End With End Function '将抓取的网页的源代码显示在文本文档中 Function Byte2Txt(bContent, ByVal sCharset As String) '添加容错语句,防止文本文件不存在时,Kill语句出错 On Error Resume Next Const adTypeBinary = 1 Const adTypeText = 2 Const adModeRead = 1 Const adModeWrite = 2 Const adModeReadWrite = 3 Dim oStream As Object '创建流对象 Set oStream = CreateObject("ADODB.Stream") With oStream '打开流 .Open '设置为字节模式 .Type = adTypeBinary '写入字节 .write bContent '将位置定位在第一个字节 .Position = 0 '设置为文本模式 .Type = adTypeText '设置编码的字符集 .Charset = sCharset '读取编码后的文本 Byte2Txt = .ReadText sPath = Excel.ThisWorkbook.Path sFilePath = sPath & "\view-source.txt" Kill sFilePath '将源代码保存到文本文件中 .SaveToFile sFilePath '打开文本文件 Shell ("notepad " & sFilePath), vbMaximizedFocus '关闭流对象 .Close End With End Function '提取网页表格的代码 Sub HtmlTable(ByVal sHtml As String) '网页html文档对象 Dim oHtmlDom As Object '网页表格对象 Dim oTable As Object '网页表格行对象 Dim oRows As Object '网页表格单元格对象 Dim oCells As Object '抓取的数据存放的excel表格对象 Dim oWK As Worksheet Set oWK = Sheet1 iRow = oWK.Range("a65536").End(xlUp).Row + 1 Set oHtmlDom = CreateObject("htmlfile") With oHtmlDom .body.innerHTML = sHtml Set oTable = .getElementsByTagName("table")(0) 'Set oTable = getElementById("id") With oTable Set oRows = .Rows For i = 1 To oRows.Length - 1 Set oCells = oRows(i).Cells For j = 0 To oCells.Length - 1 oWK.Cells(iRow, j + 1) = oCells(j).innertext Next j iRow = iRow + 1 Next i End With End With Set oHtmlDom = Nothing Set oTable = Nothing Set oRows = Nothing Set oCells = Nothing End Sub
发表评论