Sunday, August 05, 2007

HTML Parser for VBA

'I successfully ran this program using Microsoft Excel to
'parse some html files and extract information from them
'into a text file

Option Explicit

'This program parses through html files
'and copies only the information of interest to a new file
Sub EditTheFiles()
Dim FSO As FileSystemObject
'need reference to Microsoft Scripting Runtime
Dim SourceFolder As Scripting.Folder, SubFolder As Scripting.Folder
Dim FileItem As Scripting.file
Dim SourceFolderName As String
SourceFolderName = "C:\Folder\SubFolder"
'SourceFolderName = BrowseFolder("Select A Folder")
Dim strLineParse As String
Dim bolKeepTheLine As Boolean
Dim strGet As String
Dim i As Long
Dim j As Long
Dim dblFileLength As Double
'a running variable that counts the number of bytes
'we've parsed; use double because it might get big

Set FSO = New Scripting.FileSystemObject
Set SourceFolder = FSO.GetFolder(SourceFolderName)

On Error Resume Next
Kill (SourceFolderName & "\Output_File.txt") 'start afresh

Open SourceFolderName & "\Output_File.txt" For Output As #2
'we only use one file to hold all the important information
'from all the files in the directory
For Each FileItem In SourceFolder.Files
'it seems to order the files by date/time
'with earliest first
If Right(FileItem.Name, 5) = ".html" Then

Open FileItem.Path For Binary As #1
'we use binary for the "Get" statement

'some initilizations
bolKeepTheLine = False
'by default, don't add the lines
'to our new file
dblFileLength = 0 'byte number
i = 0 'line number
j = 0
strLineParse = ""

While dblFileLength < LOF(1)
'LOF gives the size of the file in bytes
strLineParse = ""
'start it off with nothing so the first
'character will concatenate correctly
strGet = "A"
'dummy value to start the loop

'the html files we are parsing don't have 0D 0A
'at the end of the lines so we can't use the
'Line Input statment;
'they only have 0A at the ends, so we have to
'make our own "Line Input" statement
While Asc(strGet) <> 10 And dblFileLength < LOF(1)
'added the 2nd condition to make sure
'the loop ends
Get #1, , strGet
'runs through the file byte by byte
dblFileLength = dblFileLength + 1
strLineParse = strLineParse & strGet
Wend
strLineParse = Left(strLineParse, _
Len(strLineParse) - 1)
'take out the last LF character
i = i + 1

'in our case, the stuff we want to keep
'is two lines below this text
'so if we find this in the old file,
'go ahead and edit the new file
If strLineParse = "BEGIN TEXT" Then
'marks the beginning of our information
j = i + 2
'we don't start keeping the lines till two
'lines down
Print #2, ""
Print #2, ""
Print #2, Left(FileItem.Name, _
Len(FileItem.Name) - 5)
Print #2, ""
End If

If i = j Then
bolKeepTheLine = True
End If

If strLineParse = "END TEXT" Then
'marks the end of our information
bolKeepTheLine = False
End If

If bolKeepTheLine Then
Print #2, strLineParse
End If
Wend 'get next line in file
Close #1
End If
Next FileItem 'get the next file in the source directory
Close #2

'housekeeping
Set FileItem = Nothing
Set SourceFolder = Nothing
Set FSO = Nothing

End Sub

Labels:

Thursday, July 19, 2007

Drive Your Dog Crazy with VBA


Sub DriveTheDogCrazy()

Dim i As Integer
Dim j As Integer
Randomize (Timer)

For i = 1 To 500
j = Round(20 * Rnd()) 'sec. between doorbell dings
Application.Wait (Now + j / 86400)
Shell ("C:\Program Files\Winamp\winamp.exe c:\doorbell.mp3")
Next i

End Sub

Labels: