Anybody know how to convert this into a project and have a webform work with it instead of the console?
http://sourceforge.net/projects/imdbparse/
' Copyright (c) 2004, Andrew Brereton
' All rights reserved.
'
' Redistribution and use in source and binary forms, with or without
' modification, are permitted provided that the following conditions are met:
' * Redistributions of source code must retain the above copyright notice, this
' list of conditions and the following disclaimer.
' * Redistributions in binary form must reproduce the above copyright notice,
' this list of conditions and the following disclaimer in the documentation
' and/or other materials provided with the distribution.
' * Neither the name of the original developer nor the names of its contributors
' may be used to endorse or promote products derived from this software
' without specific prior written permission.
'
' THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
' ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
' WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
' DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
' ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
' (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
' LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
' ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
' (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
' SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.Option Explicit On
Imports System
Imports System.IO
Imports System.Net
Imports System.Text.RegularExpressionsModule IMDBParse
Private html As String
Sub Main()
Dim req As System.Net.HttpWebRequest
Dim res As System.Net.HttpWebResponse
Dim sr As System.IO.StreamReader
Dim html As String
Dim cmd As String = Command()
Dim i As Integer
Dim intStart As Integer, intEnd As Integer, intLength As Integer, intExtraSpace As Integer
Dim strStart As String, strEnd As String, intStartLen As Integer, intEndLen As Integer, strTemp As String
Dim strTitle As String, strYear As String, strDirector As String, strGenre As String, strTagline As String, strRunningTime As String, strAudience As String, strPlot As StringIf ValidCommand(cmd) Then
req = System.Net.WebRequest.Create("http://imdb.com/title/tt" & cmd & "/")
req.Accept = "text/html, application/xml;q=0.9, application/xhtml+xml;q=0.9, image/png, image/jpeg, image/gif, image/x-xbitmap, */*;q=0.1"
req.UserAgent = "Opera/7.23 (Windows NT 5.0; U) [en]"
req.Headers.Add("Accept-Charset", "windows-1252, utf-8, utf-16, iso-8859-1;q=0.6, *;q=0.1")
req.Headers.Add("Accept-Encoding", "deflate, gzip, x-gzip, identity, *;q=0")
req.Headers.Add("Accept-Language", "en")
req.Headers.Add("Accept-Encoding", "gzip,deflate")
res = req.GetResponse()'read in the page
sr = New StreamReader(res.GetResponseStream())html = sr.ReadToEnd()
sr.Close()
intStart = 1
' *** TITLE***
intExtraSpace = 0
strStart = "<title>" : intStartLen = Len(strStart)
strEnd = "</title>" : intEndLen = Len(strEnd)
intStart = InStr(intStart, html, strStart, 1) + intStartLen + intExtraSpace
intEnd = InStr(intStart + intStartLen - intExtraSpace, html, strEnd, 1)
intLength = intEnd - intStart
strTemp = Mid(html, intStart, intLength)
strTitle = Left(strTemp, InStrRev(strTemp, " ") - 1) ' Find the space jsut before the "(yyyy)" in the title
strTitle = StripBlanks(strTitle)
Console.WriteLine(strTitle)' *** YEAR ***
strYear = Mid(strTemp, InStrRev(strTemp, "(") + 1, 4)
Console.WriteLine(strYear)' *** DIRECTOR ***
intExtraSpace = 12
strStart = "/name/" : intStartLen = Len(strStart)
strEnd = "</a>" : intEndLen = Len(strEnd)
intStart = InStr(intStart, html, strStart, 1) + intStartLen + intExtraSpace
intEnd = InStr(intStart + intStartLen - intExtraSpace, html, strEnd, 1)
intLength = intEnd - intStart
strDirector = Mid(html, intStart, intLength)
strDirector = StripBlanks(strDirector)
Console.WriteLine(strDirector)' *** GENRE(s) ***
intExtraSpace = 1
strStart = "Genre" : intStartLen = Len(strStart)
strEnd = "(more)" : intEndLen = Len(strEnd)
intStart = InStr(intStart, html, strStart, 1) + intStartLen + intExtraSpace
intEnd = InStr(intStart + intStartLen - intExtraSpace, html, strEnd, 1)
intLength = intEnd - intStart
strGenre = Mid(html, intStart, intLength)
strGenre = StripHTML(strGenre)
Console.WriteLine(strGenre)' *** TAGLINE ***
intExtraSpace = 6
strStart = "Tagline" : intStartLen = Len(strStart)
strEnd = "<a" : intEndLen = Len(strEnd)
intStart = InStr(intStart, html, strStart, 1) + intStartLen + intExtraSpace
intEnd = InStr(intStart + intStartLen - intExtraSpace, html, strEnd, 1)
intLength = intEnd - intStart
strTagline = Mid(html, intStart, intLength)
strTagline = StripBlanks(strTagline)
Console.WriteLine(strTagline)' *** PLOT ***
intExtraSpace = 6
strStart = "Plot Outline" : intStartLen = Len(strStart)
strEnd = "<a" : intEndLen = Len(strEnd)
intStart = InStr(intStart, html, strStart, 1) + intStartLen + intExtraSpace
intEnd = InStr(intStart + intStartLen - intExtraSpace, html, strEnd, 1)
intLength = intEnd - intStart
strPlot = Mid(html, intStart, intLength)
strPlot = StripBlanks(strPlot)
Console.WriteLine(strPlot)' *** RUNNING TIME ***
intExtraSpace = 6
strStart = "Runtime" : intStartLen = Len(strStart)
strEnd = "min" : intEndLen = Len(strEnd)
intStart = InStr(intStart, html, strStart, 1) + intStartLen + intExtraSpace
intEnd = InStr(intStart + intStartLen - intExtraSpace, html, strEnd, 1)
intLength = intEnd - intStart
strRunningTime = Mid(html, intStart, intLength)
strRunningTime = StripBlanks(strRunningTime)
Console.WriteLine(strRunningTime)' *** AUDIENCE ***
intExtraSpace = 0
strStart = ">Australia:" : intStartLen = Len(strStart)
strEnd = "/" : intEndLen = Len(strEnd)
intStart = InStr(intStart, html, strStart, 1) + intStartLen + intExtraSpace
intEnd = InStr(intStart + intStartLen - intExtraSpace, html, strEnd, 1) - 17
intLength = intEnd - intStart
strAudience = Mid(html, intStart, intLength)
strAudience = StripBlanks(strAudience)
Console.WriteLine(strAudience)sr.Close()
res.Close()End If
End
End Sub
' Strip the HTML from the given string
Private Function StripHTML(ByVal strHTML As String) As String
Return (Regex.Replace(strHTML, "<[^>]*>", ""))
End Function' Strip odd characters from the string
Private Function StripBlanks(ByVal strText As String) As String
strText = Replace(strText, vbCr, "")
strText = Replace(strText, vbLf, "")
strText = Trim(strText)
Return (strText)
End Function' Validate the commandline parameter
Private Function ValidCommand(ByVal cmd As String) As Boolean
Return ((cmd.Length() = 7) And (IsNumeric(cmd)))
End FunctionEnd Module
![]() |
0 |
![]() |
There are a number of issues with this code, as it doesn't work for quite a large number of entries in the IMDB database. I'm not going to guarantee that mine is that much better a version, and the regex code could certainly be improved.
To use it, call the InternetMovieDBParser.RetrieveRecord, passing in a string containing 7 numeric digits. This will either
(a) return you an InternetMovieDBRecord object with the fields that it knows filled in. You can then display these on a form, or
(b) throw an exception at you because something's wrong with the request, the server is down, etc.
Personally, I wouldn't use this code on any form of production server, but if it's just for fun then no problems. I will reiterate the disclaimer from before:
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.Also, please don't forget to credit the original author of the code on which this is derived. If you intend to use the code, you MUST obey the terms and conditions of the copyright notice posted above.
Imports
System
Imports System.IO
Imports System.Net
Imports System.Text.RegularExpressionsPublic
Class InternetMovieDBRecord
Public ReadOnly Title As String
Public ReadOnly Director As String
Public ReadOnly Year As String
Public ReadOnly Genre As String
Public ReadOnly PlotOutline As String
Public ReadOnly TagLine As String
Public ReadOnly AustralianCertificate As String
Public ReadOnly RunningTime As String Friend Sub New(ByVal title As String, ByVal director As String, ByVal year As String, ByVal genre As String, _
ByVal plotOutline As String, ByVal tagLine As String, ByVal ausCert As String, ByVal runTime As String)
Me.Title = title
Me.Director = director
Me.Year = year
Me.Genre = genre
Me.PlotOutline = plotOutline
Me.TagLine = tagLine
Me.AustralianCertificate = ausCert
Me.RunningTime = runTime
End Sub
End ClassPublic
Class InternetMovieDBParser
Public Shared Function RetrieveRecord(ByVal cmd As String) As InternetMovieDBRecord
Dim req As System.Net.HttpWebRequest
Dim res As System.Net.HttpWebResponse
Dim sr As System.IO.StreamReader
Dim html As String
Dim m As Match
Dim title As String
Dim year As String
Dim director As String
Dim genre As String
Dim tagLine As String
Dim plotOutline As String
Dim ausCert As String
Dim runTime As StringTry
req = CType(System.Net.WebRequest.Create("http://imdb.com/title/tt" & cmd & "/"), HttpWebRequest)
res = CType(req.GetResponse(), HttpWebResponse)
'read in the page
sr = New StreamReader(res.GetResponseStream())
html = sr.ReadToEnd()
html = Regex.Replace(html, " ", "")
m = Regex.Match(html, "<title>(.*) \((\d{4})\)</title>")
title = m.Groups(1).Value
year = m.Groups(2).Valuem = Regex.Match(html, "/name/nm\d{7}/"">(.*)</a>")
director = StripHTML(Regex.Replace(m.Groups(1).Value, "\(more\)", ""), " / ").TrimEnd(New Char() {"/"c, " "c})m = Regex.Match(html, "Genre:</b>\n(.*)(\(more\)|\n)")
genre = StripHTML(Regex.Replace(m.Groups(1).Value, "\(more\)", ""), "")m = Regex.Match(html, "Tagline:</b> ?(.*)(( <a)|\n)")
tagLine = Regex.Replace(m.Groups(1).Value, "\(more\)", "")m = Regex.Match(html, "Plot (?:(?:Outline)|(?:Summary)):</b> ?([^<]*)(( <a)|\n)")
plotOutline = Regex.Replace(m.Groups(1).Value, "\(more\)", "")m = Regex.Match(html, "Runtime:</b>\n(\d*) min")
runTime = m.Groups(1).Valuem = Regex.Match(html, ">Australia:(.)<")
ausCert = m.Groups(1).ValueReturn New InternetMovieDBRecord(title, director, year, genre, plotOutline, tagLine, ausCert, runTime)
Finally
If Not sr Is Nothing Then sr.Close()
If Not res Is Nothing Then res.Close()
End Try
End Function' Strip the HTML from the given string
Private Shared Function StripHTML(ByVal strHTML As String, ByVal replace As String) As String
Return (Regex.Replace(strHTML, "(<[^>]*>)+", replace))
End Function
End Class
Regards
Dave
![]() |
0 |
![]() |
I do believe that Amazon provides access to IMDB through their webservices API.
![]() |
0 |
![]() |
It's just for fun. I have another IMDB program from sourceforge that it java based, and I can't seem to get it to do what I want. I saw this while looking around and thought it was interesting. I am using a web application to access the Access Database that my movies go into.
I tried using the Amazon webservices API, but havn't gotten far on that yet.
Thanks.
![]() |
0 |
![]() |