Structure VList Dim id As Integer Dim title As String Dim vid1 As String Dim vid2 As String
Overloads Function ToString() As String Return String.Format("{0}:<{1}> [{2}]", id, title, vid1) End Function End Structure
Dim myList As New List(Of VList)
Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button1.Click ' 防止重复创建变量 Dim wr1 As HttpWebRequest Dim wr2 As HttpWebResponse Dim ret As String Dim reg As Match Dim g As Group
Dim preVid As String = "" '上一个VID Dim nowid As Integer = 0 '当前的视频集数
Dim listUrl As String = TextBox1.Text '获取专辑URL,如 http://www.youku.com/playlist_show/id_2350764.html Dim tarUrl As String = "http://v.youku.com/v_playlist/f{0}" '{0}ListID
reg = Regex.Match(listUrl, "playlist_show/id_(/d+).*/.html") If Not reg.Success Then MsgBox("专辑列表提取失败!") Exit Sub End If g = reg.Groups(1) tarUrl = String.Format(tarUrl, g.Value) & "o{1}p{0}.html" '{0}集数 {1}排序
wr1 = HttpWebRequest.Create(TextBox1.Text) wr2 = wr1.GetResponse ret = New StreamReader(wr2.GetResponseStream, Encoding.GetEncoding(wr2.CharacterSet)).ReadToEnd
reg = Regex.Match(ret, "<title>(.+) - 专辑 - 优酷视频</title>") If Not reg.Success Then MsgBox("专辑名称提取失败!") Else g = reg.Groups(1) MsgBox("专辑名:《" & g.Value & "》") End If
Do ' 从Web流中获取页面文本 wr1 = HttpWebRequest.Create(String.Format(tarUrl, nowid, "0")) '按倒序方式查找视频 wr2 = wr1.GetResponse ret = New StreamReader(wr2.GetResponseStream, Encoding.GetEncoding(wr2.CharacterSet)).ReadToEnd
'TextBox2.Text = ret
' 创建一个临时视频列表变量 Dim nlist As New VList nlist.id = nowid '获取ID ' 获取videoId reg = Regex.Match(ret, "var/s+videoId/s*=/s*""(/d+)""/s*;") If Not reg.Success Then Exit Do g = reg.Groups(1) ' 如果VID等于上一个VID最退出 If g.Value = preVid Then Exit Do nlist.vid1 = g.Value ' 获取videoId2 reg = Regex.Match(ret, "var/s+videoId2/s*=/s*""((/w|=)+)""/s*;") '"var/s+videoId2/s*=/s*""(/w+)""/s*;") If Not reg.Success Then Exit Do g = reg.Groups(1) nlist.vid2 = g.Value ' 获取标题 reg = Regex.Match(ret, "<title>(.+) - (.+) - 视频 - 优酷视频 - 在线观看 - </title>") If Not reg.Success Then nlist.title = "{名称查找错误}" Else g = reg.Groups(2) nlist.title = g.Value End If ' 收尾工作 myList.Add(nlist) '添加到总列表中 preVid = nlist.vid1 '记录最后一个VID wr2.Close()
Me.Text = nowid & " : 处理完成!"
nowid += 1
Loop
wr2.Close() MsgBox(nowid & " 个视频全部采集处理完成!")
Button2_Click(sender, e) End Sub
Private Sub Button2_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles Button2.Click ListBox1.Items.Clear() For Each ls As VList In myList ListBox1.Items.Add(String.Format("{0}:<{1}> [{2}]", ls.id, ls.title, ls.vid1)) Next myList.Clear() End Sub