bedvit, по многопоточности - всё просто
Вот есть компонент WinHTTPRequest, их можно наплодить несколько штук (создавая динамически) используя модули класса и события компонента WinHTTPRequest
Ключевая строка модуля класса: Public WithEvents HTTP As WinHttpRequest
у меня код сильно завязан на функции и объекты других модулей надстройки, потому, рабочий пример делать лень (много времени нужно)
можете посмотреть немного урезанный код, для понимания принципа должно хватить:
Скрытый текст |
---|
Код |
---|
' Пример использования (код в стандартном модуле)
' функция, на входе принимающая коллекцию ссылок для загрузки
' возвращающая массив загруженных веб-страниц
Function LoadHTML_MultiThreading(ByRef coll As Collection, Optional ByVal MaxThreads&, Optional ByVal Encoding$) As Variant
On Error Resume Next
Dim sender As New HTTP_Requests
URLsAmount& = coll.Count
With sender
If Len(Encoding$) Then .Encoding = Encoding$
If MaxThreads& > 0 Then .MaxThreads = MaxThreads&
.ExecuteAll coll ' запуск загрузки
If .WaitForResponse() Then
' Debug.Print "Загрузка завершена успешно"
End If
LoadHTML_MultiThreading = .result.Items
End With
Set coll = Nothing
Set sender = Nothing
End Function
' ===========================================
' первый модуль класса: HTTP_Requests
' ===========================================
'---------------------------------------------------------------------------------------
' Class Module : HTTP_Requests Version: 1.1
' Author : Igor Vakhnenko Date: 13.06.2016
' Professional application development for Microsoft Excel
' https://excelvba.ru/programmes/Parser
'---------------------------------------------------------------------------------------
Option Compare Text: Option Explicit
Dim Requests As New Dictionary
Dim SourceURLs As New Collection
Public result As New Dictionary
Public SourceURLsAmount As Long
Public Encoding As String ' кодировка страниц
Public MaxThreads As Long ' количество потоков
Public Timeout As Long ' таймаут в секундах
Public HTML_Filters As String ' фильтры для пост-обработки ответа сервера
Public pi As ProgressIndicatorNew
' текущее состояние
Public RequestStarted As Long ' сколько запросов в работе
Public RequestFrozen As Long ' запрос подвис, истек таймаут
Public RequestFinished As Long ' сколько запросов завершено
Public RequestStatusOK As Long ' код ответа 2xx или 3xx
Public RequestStatusFailed As Long ' код ответа 4xx или 5xx
Private Sub Class_Initialize()
Me.MaxThreads = 50
Me.Timeout = DEFAULT_QUERY_TIMEOUT&
End Sub
Private Sub Class_Terminate()
On Error Resume Next
Dim h As New HTTP_Request
For Each h In Requests
Set h = Nothing
Next
Set Requests = Nothing
Set SourceURLs = Nothing
Set result = Nothing
End Sub
Function ExecuteAll(ByVal coll As Collection) As Variant
On Error Resume Next
Dim i&
' заносим все исходные ссылки в список загрузок - коллекцию SourceURLs
Set SourceURLs = coll
SourceURLsAmount = SourceURLs.Count
pi.ShowExtraBar "Многопоточная загрузка страниц (" & SourceURLsAmount & " URL, потоков: " & Me.MaxThreads & ", таймаут: " & Timeout & " сек.)", False
' запускаем максимальное количество потоков
For i = 1 To Application.Min(Me.MaxThreads, SourceURLs.Count)
'Debug.Print "ind=" & i&, Now
ExecuteNext
DoEvents
UpdateProgressBar
Next
End Function
Function WaitForResponse(Optional ByVal Timeout&) As Boolean
On Error Resume Next
If Timeout& <= 0 Then Timeout& = (SourceURLsAmount / Me.MaxThreads * 1.2 + 2) * Me.Timeout
Dim i&, TimeStamp As Double: TimeStamp = CDbl(Now) * 86400
While (Me.RequestFinished < SourceURLsAmount) And (CDbl(Now) * 86400 - TimeStamp < Timeout&)
If StopMacro Then pi.HideExtraBar: Exit Function
For i = 1 To 100: DoEvents: Next
UpdateProgressBar
Wend
'UpdateProgressBar
For i = 1 To 100: DoEvents: Next
'WaitForResponse = Me.RequestFinished = SourceURLsAmount
WaitForResponse = (Me.RequestFrozen + Me.RequestStatusFailed) = 0
pi.HideExtraBar
End Function
Sub UpdateProgressBar()
On Error Resume Next
Static LastPercent&, LastUpdateTime As Date
If Now - LastUpdateTime < 0.1 / 86400 Then Exit Sub
' ...
For Each k In Requests.Keys
Set h = Requests.Item(CStr(k))
If h.IsFrosen(Timeout) Then
Me.RequestFrozen = Me.RequestFrozen + 1
Me.Finished h
End If
Next
Set h = Nothing
' ...
LastUpdateTime = Now
DoEvents
End Sub
Sub ExecuteNext()
If StopMacro Then Exit Sub
If SourceURLs.Count = 0 Then Exit Sub
On Error Resume Next
Dim h As New HTTP_Request, ind&
ind& = SourceURLsAmount - SourceURLs.Count + 1
With h
Set .Parent = Me
h.ThreadID = CStr(ind&)
h.Encoding = Me.Encoding
h.URL = SourceURLs(1&)
If h.URL Like "*[А-Яа-яЁё]*" Then h.URL = ConvertURLtoPunycode(h.URL)
h.StartTime = Now
End With
SourceURLs.Remove 1&
Requests.Add h.ThreadID, h
With h.HTTP
.SetTimeouts Me.Timeout * 1000, Me.Timeout * 1000, Me.Timeout * 1000, Me.Timeout * 1000
.Open "GET", h.URL, True
If RunningParser.Options.UseClientCertificate Then .SetClientCertificate RunningParser.Options.ClientCertificateName
PageLoadRequested h.URL
AddRequestHeadersFromStore h.HTTP
If Not CookiesStore Is Nothing Then .SetRequestHeader "Cookie", GetCookiesFromStore
.Send
End With
End Sub
Sub Finished(ByRef h As HTTP_Request) ' срабатывает по окончании загрузки очередного запроса
On Error Resume Next: Dim ThreadID$, InfoTag$
ThreadID$ = h.ThreadID
With h
If .Encoding = "utf-8" Then .Response = DecodeUTF8(.Response)
InfoTag$ = "<info id='parser multithreading' status='" & .Status & "' encoding='" & .Encoding & "' href='" & .URL & "'></info>"
If Len(Me.HTML_Filters) Then
.Response = InfoTag$ & vbNewLine & ApplyHTMLFilters(.Response, Me.HTML_Filters)
Else
.Response = Replace(.Response, "</head>", vbNewLine & InfoTag$ & vbNewLine & "</head>", , 1)
End If
' если не добавился - добавляем в самое начало ответа сервера
If InStr(1, .Response, InfoTag$, vbTextCompare) = 0 Then .Response = InfoTag$ & vbNewLine & .Response
result.Add ThreadID$, .Response
End With
Set h = Nothing
Me.RequestFinished = Me.RequestFinished + 1
Requests.Remove ThreadID$
'Debug.Print "ThreadID " & ThreadID$ & " destroyed"
ExecuteNext
End Sub
' ===========================================
' второй модуль класса: HTTP_Request
' ===========================================
'---------------------------------------------------------------------------------------
' Class Module : HTTP_Request Version: 1.1
' Author : Igor Vakhnenko Date: 14.06.2016
' Professional application development for Microsoft Excel
' https://excelvba.ru/programmes/Parser
'---------------------------------------------------------------------------------------
Option Compare Text: Option Explicit
Public WithEvents HTTP As WinHttpRequest
Public Parent As HTTP_Requests
Public ThreadID As String
Public URL As String
Public Response As String
Public Encoding As String
Public Status As Long
Public StartTime As Date
Private Sub Class_Initialize()
Set HTTP = New WinHttpRequest
End Sub
Private Sub Class_Terminate()
'Debug.Print "ThreadID " & ThreadID & " destroyed"
Set HTTP = Nothing
Set Parent = Nothing
End Sub
Function IsFrosen(ByRef Timeout As Long) As Boolean
IsFrosen = ((Now - StartTime) * 86400 >= (Timeout * 1))
End Function
Private Sub http_OnResponseDataAvailable(data() As Byte)
On Error Resume Next
'Debug.Print "ThreadID=" & ThreadID, "DataAvailable", "size=" & UBound(Data)
Response = Response & StrConv(data, vbUnicode)
End Sub
Private Sub http_OnResponseFinished()
On Error Resume Next
Me.Parent.Finished Me
End Sub
Private Sub http_OnResponseStart(ByVal Status As Long, ByVal ContentType As String)
On Error Resume Next
If Me.Encoding = "" Then
If ContentType Like "*=utf-8*" Then Me.Encoding = "utf-8"
If ContentType Like "*1251*" Then Me.Encoding = "windows-1251"
End If
Me.Status = Status
With Me.Parent
.RequestStarted = .RequestStarted + 1
Select Case Status
Case 200 To 399: .RequestStatusOK = .RequestStatusOK + 1
Case 400 To 599: .RequestStatusFailed = .RequestStatusFailed + 1
Case Else: Debug.Print "Unsupported Status=" & Status
End Select
End With
'Debug.Print "ThreadID=" & ThreadID, "Status=" & Status, "ContentType=" & ContentType
End Sub |
|
количество потоков от сайта зависит
на быстрых сайтах и 200 потоков работает (если больше - Excel глючит), обычно использую 20-40 потоков