Function
LoadHTML_MultiThreading(
ByRef
coll
As
Collection,
Optional
ByVal
MaxThreads&,
Optional
ByVal
Encoding$)
As
Variant
On
Error
Resume
Next
Dim
sender
As
New
HTTP_Requests
URLsAmount& = coll.Count
With
sender
If
Len(Encoding$)
Then
.Encoding = Encoding$
If
MaxThreads& > 0
Then
.MaxThreads = MaxThreads&
.ExecuteAll coll
If
.WaitForResponse()
Then
End
If
LoadHTML_MultiThreading = .result.Items
End
With
Set
coll =
Nothing
Set
sender =
Nothing
End
Function
Option
Compare Text:
Option
Explicit
Dim
Requests
As
New
Dictionary
Dim
SourceURLs
As
New
Collection
Public
result
As
New
Dictionary
Public
SourceURLsAmount
As
Long
Public
Encoding
As
String
Public
MaxThreads
As
Long
Public
Timeout
As
Long
Public
HTML_Filters
As
String
Public
pi
As
ProgressIndicatorNew
Public
RequestStarted
As
Long
Public
RequestFrozen
As
Long
Public
RequestFinished
As
Long
Public
RequestStatusOK
As
Long
Public
RequestStatusFailed
As
Long
Private
Sub
Class_Initialize()
Me
.MaxThreads = 50
Me
.Timeout = DEFAULT_QUERY_TIMEOUT&
End
Sub
Private
Sub
Class_Terminate()
On
Error
Resume
Next
Dim
h
As
New
HTTP_Request
For
Each
h
In
Requests
Set
h =
Nothing
Next
Set
Requests =
Nothing
Set
SourceURLs =
Nothing
Set
result =
Nothing
End
Sub
Function
ExecuteAll(
ByVal
coll
As
Collection)
As
Variant
On
Error
Resume
Next
Dim
i&
Set
SourceURLs = coll
SourceURLsAmount = SourceURLs.Count
pi.ShowExtraBar
"Многопоточная загрузка страниц ("
& SourceURLsAmount &
" URL, потоков: "
&
Me
.MaxThreads &
", таймаут: "
& Timeout &
" сек.)"
,
False
For
i = 1
To
Application.Min(
Me
.MaxThreads, SourceURLs.Count)
ExecuteNext
DoEvents
UpdateProgressBar
Next
End
Function
Function
WaitForResponse(
Optional
ByVal
Timeout&)
As
Boolean
On
Error
Resume
Next
If
Timeout& <= 0
Then
Timeout& = (SourceURLsAmount /
Me
.MaxThreads * 1.2 + 2) *
Me
.Timeout
Dim
i&, TimeStamp
As
Double
: TimeStamp =
CDbl
(Now) * 86400
While
(
Me
.RequestFinished < SourceURLsAmount)
And
(
CDbl
(Now) * 86400 - TimeStamp < Timeout&)
If
StopMacro
Then
pi.HideExtraBar:
Exit
Function
For
i = 1
To
100: DoEvents:
Next
UpdateProgressBar
Wend
For
i = 1
To
100: DoEvents:
Next
WaitForResponse = (
Me
.RequestFrozen +
Me
.RequestStatusFailed) = 0
pi.HideExtraBar
End
Function
Sub
UpdateProgressBar()
On
Error
Resume
Next
Static
LastPercent&, LastUpdateTime
As
Date
If
Now - LastUpdateTime < 0.1 / 86400
Then
Exit
Sub
For
Each
k
In
Requests.Keys
Set
h = Requests.Item(
CStr
(k))
If
h.IsFrosen(Timeout)
Then
Me
.RequestFrozen =
Me
.RequestFrozen + 1
Me
.Finished h
End
If
Next
Set
h =
Nothing
LastUpdateTime = Now
DoEvents
End
Sub
Sub
ExecuteNext()
If
StopMacro
Then
Exit
Sub
If
SourceURLs.Count = 0
Then
Exit
Sub
On
Error
Resume
Next
Dim
h
As
New
HTTP_Request, ind&
ind& = SourceURLsAmount - SourceURLs.Count + 1
With
h
Set
.Parent =
Me
h.ThreadID =
CStr
(ind&)
h.Encoding =
Me
.Encoding
h.URL = SourceURLs(1&)
If
h.URL
Like
"*[А-Яа-яЁё]*"
Then
h.URL = ConvertURLtoPunycode(h.URL)
h.StartTime = Now
End
With
SourceURLs.Remove 1&
Requests.Add h.ThreadID, h
With
h.HTTP
.SetTimeouts
Me
.Timeout * 1000,
Me
.Timeout * 1000,
Me
.Timeout * 1000,
Me
.Timeout * 1000
.Open
"GET"
, h.URL,
True
If
RunningParser.Options.UseClientCertificate
Then
.SetClientCertificate RunningParser.Options.ClientCertificateName
PageLoadRequested h.URL
AddRequestHeadersFromStore h.HTTP
If
Not
CookiesStore
Is
Nothing
Then
.SetRequestHeader
"Cookie"
, GetCookiesFromStore
.Send
End
With
End
Sub
Sub
Finished(
ByRef
h
As
HTTP_Request)
On
Error
Resume
Next
:
Dim
ThreadID$, InfoTag$
ThreadID$ = h.ThreadID
With
h
If
.Encoding =
"utf-8"
Then
.Response = DecodeUTF8(.Response)
InfoTag$ =
"<info id='parser multithreading' status='"
& .Status &
"' encoding='"
& .Encoding &
"' href='"
& .URL &
"'></info>"
If
Len(
Me
.HTML_Filters)
Then
.Response = InfoTag$ & vbNewLine & ApplyHTMLFilters(.Response,
Me
.HTML_Filters)
Else
.Response = Replace(.Response,
"</head>"
, vbNewLine & InfoTag$ & vbNewLine &
"</head>"
, , 1)
End
If
If
InStr(1, .Response, InfoTag$, vbTextCompare) = 0
Then
.Response = InfoTag$ & vbNewLine & .Response
result.Add ThreadID$, .Response
End
With
Set
h =
Nothing
Me
.RequestFinished =
Me
.RequestFinished + 1
Requests.Remove ThreadID$
ExecuteNext
End
Sub
Option
Compare Text:
Option
Explicit
Public
WithEvents
HTTP
As
WinHttpRequest
Public
Parent
As
HTTP_Requests
Public
ThreadID
As
String
Public
URL
As
String
Public
Response
As
String
Public
Encoding
As
String
Public
Status
As
Long
Public
StartTime
As
Date
Private
Sub
Class_Initialize()
Set
HTTP =
New
WinHttpRequest
End
Sub
Private
Sub
Class_Terminate()
Set
HTTP =
Nothing
Set
Parent =
Nothing
End
Sub
Function
IsFrosen(
ByRef
Timeout
As
Long
)
As
Boolean
IsFrosen = ((Now - StartTime) * 86400 >= (Timeout * 1))
End
Function
Private
Sub
http_OnResponseDataAvailable(data()
As
Byte
)
On
Error
Resume
Next
Response = Response & StrConv(data, vbUnicode)
End
Sub
Private
Sub
http_OnResponseFinished()
On
Error
Resume
Next
Me
.Parent.Finished
Me
End
Sub
Private
Sub
http_OnResponseStart(
ByVal
Status
As
Long
,
ByVal
ContentType
As
String
)
On
Error
Resume
Next
If
Me
.Encoding =
""
Then
If
ContentType
Like
"*=utf-8*"
Then
Me
.Encoding =
"utf-8"
If
ContentType
Like
"*1251*"
Then
Me
.Encoding =
"windows-1251"
End
If
Me
.Status = Status
With
Me
.Parent
.RequestStarted = .RequestStarted + 1
Select
Case
Status
Case
200
To
399: .RequestStatusOK = .RequestStatusOK + 1
Case
400
To
599: .RequestStatusFailed = .RequestStatusFailed + 1
Case
Else
: Debug.Print
"Unsupported Status="
& Status
End
Select
End
With
End
Sub