0% found this document useful (0 votes)
3 views8 pages

Genericfile

The document is a VBA script designed to convert Word documents into Excel files by extracting specific paragraphs based on defined rules. It initializes variables, processes each file listed in an Excel sheet, and logs activities while transferring data. The script includes functions for logging, adding output to Excel rows, and extracting content from Word paragraphs based on various conditions.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views8 pages

Genericfile

The document is a VBA script designed to convert Word documents into Excel files by extracting specific paragraphs based on defined rules. It initializes variables, processes each file listed in an Excel sheet, and logs activities while transferring data. The script includes functions for logging, adding output to Excel rows, and extracting content from Word paragraphs based on various conditions.
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 8

'Option Explicit

Dim nLogLine As Integer, nParaCnt As Integer, nOutputRow As Integer, nOutputCol As


Integer, TotalParas As Integer
Dim ExcelBook As Object, ExcelSheet As Object, FilesSheet As Object, LogSheet As
Object, Rulesheet As Object
Dim strParaText As String, strFirstWord As String, strStartTxt As String,
strNextExcelfile As String
Dim currPara, currRng As Object

Dim Keystrings As Variant, Actiontbl As Variant, FileList As ListObject


Dim FileNum As Integer
Dim strSlokanum As String, strOutRowText(1 To 10) As String
Dim oWord As Object ' Create a Word object.
Dim oDoc

Dim strMoolam As String

Sub ConvertWordToExcelFiles()
On Error GoTo Closesection

Dim Keystr As String

' Set the sheets and load file conversion table


Set FilesSheet = ActiveWorkbook.Worksheets("FILES")
Set LogSheet = ActiveWorkbook.Worksheets("LOG")
Set Rulesheet = ActiveWorkbook.Worksheets("RULES")
Set FileList = FilesSheet.ListObjects("FILES_LIST")

' initialize variables & log


strNextExcelfile = ""
bNewExcelFile = True
Set oWord = Nothing
Set oDoc = Nothing
Set ExcelBook = Nothing
strMoolam = ChrW(2990) & ChrW(3010) & ChrW(2994) & ChrW(2990) & ChrW(3021)

Call InitiateLog

' Proces each file for conversion


For FileNum = 2 To FileList.ListRows.Count + 1

' load the values for the specific file row


strProcessFlg = FileList.Range.Cells(FileNum, 1).Value
strWordfile = FileList.Range.Cells(FileNum, 2).Value
strExcelfile = FileList.Range.Cells(FileNum, 3).Value
strExcelSheet = FileList.Range.Cells(FileNum, 4).Value
strStartTxt = FileList.Range.Cells(FileNum, 5).Value

Keystrings = Rulesheet.ListObjects("Table1").DataBodyRange.Value
Actiontbl = Rulesheet.ListObjects("Table2").DataBodyRange.Value

strFolderpath = FilesSheet.Range("Folder_path").Text

If strProcessFlg = "Y" Then

Set oWord = CreateObject("Word.Application")


oWord.Visible = False ' Do not show the Word file.
Set oDoc = oWord.Documents.Open(strFolderpath & "/" & strWordfile)

If (bNewExcelFile = True) Then


Set ExcelBook = Workbooks.Open(strFolderpath & "/" & strExcelfile)
bNewExcelFile = False

Dim iterator As Long


Application.DisplayAlerts = False
For iterator = ExcelBook.Worksheets.Count To 1 Step -1
With ExcelBook.Worksheets(iterator)
If .Name <> "Template" Then .Delete
End With
Next iterator
Application.DisplayAlerts = True
End If

ExcelBook.Windows(1).Visible = True
ExcelBook.Worksheets("Template").Copy
after:=ExcelBook.Worksheets(ExcelBook.Worksheets.Count)
ExcelBook.Windows(1).Visible = False
ExcelBook.Worksheets(ExcelBook.Worksheets.Count).Name = strExcelSheet
Set ExcelSheet = ExcelBook.Worksheets(strExcelSheet)

' Call main routine to extract from word to populate excel


Call ExtractWord_To_Excel

Closesection:

oWord.Quit
Set oWord = Nothing
Set oDoc = Nothing

If (FileNum < FileList.ListRows.Count) Then


strNextExcelfile = FileList.Range.Cells(FileNum + 1, 3).Value
End If

' if new excel file, save & close the current Excel file, Also close the last file
If (strNextExcelfile <> strExcelfile) Or (FileNum = FileList.ListRows.Count) Then
ExcelBook.Save
ExcelBook.Windows(1).Visible = True
ExcelBook.Close
bNewExcelFile = True
Set ExcelBook = Nothing
End If

End If

Next FileNum

If Not ExcelBook Is Nothing Then


ExcelBook.Save
ExcelBook.Windows(1).Visible = True
ExcelBook.Close
End If

End Sub

Private Sub InitiateLog()


nOutputCol = 6
nLogLine = 2
LogSheet.Cells.ClearContents

LogSheet.Cells(1, 1).Value = "Para #"


LogSheet.Cells(1, 2).Value = "Activity Log"
LogSheet.Cells(1, 3).Value = "Output Row"
LogSheet.Cells(1, 4).Value = "Output Col"
LogSheet.Cells(1, 5).Value = "Details"
LogSheet.Cells(1, 6).Value = "Paragraph text"
LogSheet.Cells(1, 7).Value = Now()

' ----------------- DECLARATION AND LOADING WORD & EXCEL FILES -------------------
End Sub
Private Sub LogToSheet(ByVal strCategory As String, ByVal strText1 As String)

LogSheet.Cells(nLogLine, 1).Value = nParaCnt


LogSheet.Cells(nLogLine, 2).Value = strCategory
LogSheet.Cells(nLogLine, 3).Value = Str(nOutputRow)
LogSheet.Cells(nLogLine, 4).Value = Str(nOutputCol)
LogSheet.Cells(nLogLine, 5).Value = strText1
LogSheet.Cells(nLogLine, 6).Value = strParaText

nLogLine = nLogLine + 1

End Sub
' copy the text to excel row cells from the array
Private Sub AddOutputToXLRow()

For nCol = 1 To 10
ExcelSheet.Cells(nOutputRow, nCol).Value = strOutRowText(nCol)
strOutRowText(nCol) = ""
Next nCol

' Call LogToSheet("AddOutputToXLRow", nOutputRow)

End Sub

Private Sub ExtractWord_To_Excel()


'Application.ScreenUpdating = False

Dim Wordlist() As String


Set currPara = oDoc.Paragraphs(1)
TotalParas = oDoc.Paragraphs.Count
nParaCnt = 0
bstartflg = False
nOutputRow = 1
strSlokanum = "0"
nSlokCnt = 1
nAlignParaNum = 0
bNumberPara = False

If Len(strStartTxt) = 0 Then
bstartflg = True
End If
Erase strOutRowText()

For Each currPara In oDoc.Paragraphs ' Loop through the paragraphs in the active
document.
' Selects the current paragraph, so that the search is conducted paragraph by
paragraph
Set currRng = oDoc.Range(currPara.Range.Start, currPara.Range.End)
strFirstWord = ""
' strParaText = Trim(Replace(Replace(Replace(currPara.Range.Text, Chr(10), ""),
Chr(13), ""), Chr(9), ""))
strParaText = currPara.Range.Text

If strParaText <> "" Then


strFirstWord = Split(strParaText, " ")(0)
End If
nParaCnt = nParaCnt + 1

' if starting text is not reached, check if is matching with current para
If Not bstartflg Then
If Left(strParaText, Len(strStartTxt)) = strStartTxt Then
Call LogToSheet("Starting text ", 1)
bstartflg = True
End If
End If

If Not bstartflg Or strFirstWord = "" Then


GoTo ContinueNextPara
End If

bParaProcessed = False
'Match text from list of texts and output to the column
For nCol = 1 To UBound(Keystrings)
If Left(strParaText, Len(Keystrings(nCol, 1))) = Keystrings(nCol, 1) Then
nOutputCol = Keystrings(nCol, 2)
If Keystrings(nCol, 3) = 1 Then
Call AddOutputToXLRow
nOutputRow = nOutputRow + 1
End If
If strOutRowText(nOutputCol) <> "" Then
strOutRowText(nOutputCol) = strOutRowText(nOutputCol) & vbCrLf &
strParaText
Else
strOutRowText(nOutputCol) = strParaText
End If
Call LogToSheet(Keystrings(nCol, 1), strSlokanum)
bParaProcessed = True
bNumberPara = False
Exit For
End If
Next nCol

If bParaProcessed Then
GoTo ContinueNextPara
End If

' specific actions to be addressed


For nActn = 1 To UBound(Actiontbl)
Select Case Actiontbl(nActn, 1)
Case 1: ' Font size GR 15
If currPara.Range.Words(1).Font.Size >= 15 Then
nOutputCol = Actiontbl(nActn, 2)
If strOutRowText(nOutputCol) <> "" Then
strOutRowText(nOutputCol) = strOutRowText(nOutputCol) & " " &
strParaText
Else
strOutRowText(nOutputCol) = strParaText
End If
Call LogToSheet("Font-size GR15", strSlokanum)
bParaProcessed = True
End If

Case 2: ' extract text in brackets


nPos1 = InStr(1, strParaText, "(")
If nPos1 <> 0 Then
extractsloka:
nPos2 = InStr(nPos1, strParaText, ")")
If nPos2 <> 0 Then
strSlokanum = Mid(strParaText, nPos1 + 1, nPos2 - nPos1 - 1) '
return the sloka number inside ()
If IsNumeric(Left(Trim(strSlokanum), 2)) Then
nOutputCol = Actiontbl(nActn, 2)
nSlokCnt = nSlokCnt + 1
strOutRowText(nOutputCol) = strSlokanum
Call LogToSheet("In Brackets ", strSlokanum)
bParaProcessed = True
Else
nPos1 = InStr(nPos2, strParaText, "(")
If nPos1 <> 0 Then
GoTo extractsloka
End If
End If
strSlokanum = ""
End If
End If
Case 3: 'English text
If IsEngText(strFirstWord) Then
' earlier output was WBW meaning, consider this output as Simple En
column
If nOutputCol = 4 Then
nOutputCol = 5
Else ' Else use from table
nOutputCol = Actiontbl(nActn, 2)
End If
If strOutRowText(nOutputCol) <> "" Then
strOutRowText(nOutputCol) = strOutRowText(nOutputCol) & " " &
strParaText
Else
strOutRowText(nOutputCol) = strParaText
End If
Call LogToSheet("English text ", strFirstWord)
bParaProcessed = True
End If

Case 4: ' if Centered text alignment & bold, consider as sutra & extract x-x-x
as sutra no.
If currPara.Alignment = 1 And currPara.Range.Font.Bold = True Then
If nAlignParaNum = 0 Or nParaCnt - nAlignParaNum > 5 Then
Call AddOutputToXLRow
nOutputRow = nOutputRow + 1 ' move to next row for first sutram
line
nAlignParaNum = nParaCnt
Call LogToSheet("Centered text sutram ", currPara.Alignment)
End If

strLastword = Right(strParaText, Len(strParaText) -


(InStrRev(strParaText, " ")))
nposn = InStr(1, strLastword, "-")
If nposn <> 0 Then
If InStr(nposn + 1, strLastword, "-") <> 0 Then
nOutputCol = 1
nSlokCnt = nSlokCnt + 1
strOutRowText(nOutputCol) = strLastword
Call LogToSheet("x-x-x format ", strLastword)
End If
End If

nOutputCol = Actiontbl(nActn, 2)
If strOutRowText(nOutputCol) <> "" Then
strOutRowText(nOutputCol) = strOutRowText(nOutputCol) & " " &
strParaText
Else
strOutRowText(nOutputCol) = strParaText
End If

bParaProcessed = True
End If
Case 5: 'Bold or normal text starting with number
nPos1 = InStr(1, strParaText, ".")
' In some cases number is not like 234. but 234)
If nPos1 = 0 Or nPos1 > 5 Then
nPos1 = InStr(1, strParaText, ")")
End If
If nPos1 <> 0 Then
strNumb = Left(Trim(strParaText), nPos1 - 1)
nNumber = 0
If IsNumeric(strNumb) Then
nNumber = CDec(strNumb)
End If

' para starts with number for first time. number should be greater than
the sloka count
If nNumber >= nSlokCnt And Not bNumberPara Then
nSlokCnt = nNumber
bNumberPara = True
bParaProcessed = True

If Actiontbl(nActn, 3) = 1 Then
Call AddOutputToXLRow
nOutputRow = nOutputRow + 1
End If

' output number in first column (hardcoded)


nOutputCol = 1
strOutRowText(nOutputCol) = strNumb
strSlokanum = strNumb

Call LogToSheet("1st para starting with number ", strFirstWord)

nOutputCol = Actiontbl(nActn, 2)
If strOutRowText(nOutputCol) <> "" Then
strOutRowText(nOutputCol) = strOutRowText(nOutputCol) & " " &
strParaText
Else
strOutRowText(nOutputCol) = strParaText
End If

End If
End If

Case 6: 'Bold or normal text starting with number and moolam


nPos1 = InStr(1, strParaText, ".")
' In some cases number is not like 234. but 234)
If nPos1 = 0 Or nPos1 > 5 Then
nPos1 = InStr(1, strParaText, ")")
End If
If nPos1 <> 0 Then
strNumb = Left(Trim(strParaText), nPos1 - 1)
' para starts with number
If IsNumeric(strNumb) Then
strTextAfterNum = Trim(Mid(Trim(strParaText), nPos1 + 1))
If (Left(strTextAfterNum, Len(strMoolam)) = strMoolam) Then
nOutputCol = Actiontbl(nActn, 2)
If strOutRowText(nOutputCol) <> "" Then
strOutRowText(nOutputCol) = strOutRowText(nOutputCol) & " "
& strParaText
Else
strOutRowText(nOutputCol) = strParaText
End If
bParaProcessed = True
bNumberPara = False
Call LogToSheet("Text with number moolam ", strFirstWord)
End If
End If
End If

Case 7: 'number para after number text starting with number and not having
'Moolam'
If bNumberPara Then
bNumberPara = False
bParaProcessed = True
nOutputCol = Actiontbl(nActn, 2)
If strOutRowText(nOutputCol) <> "" Then
strOutRowText(nOutputCol) = strOutRowText(nOutputCol) & " " &
strParaText
Else
strOutRowText(nOutputCol) = strParaText
End If
Call LogToSheet("2nd para starting with number ", strFirstWord)
End If

End Select

If bParaProcessed = True Then


Exit For
End If
Next nActn

If Not bParaProcessed Then


strOutRowText(nOutputCol) = strOutRowText(nOutputCol) & " " & strParaText
Call LogToSheet("Appending to column ", strSlokanum)
End If

ContinueNextPara:
FilesSheet.Range("Status_Value").Value = Str(nParaCnt / TotalParas * 100) & " % "

Next currPara

' add last row to excel


Call AddOutputToXLRow

LogSheet.Cells(nLogLine, 7).Value = Now()


Application.ScreenUpdating = True

End Sub

Function IsEngText(strValue As String) As Boolean


Dim intPos As Integer
bEngMatch = False
For intPos = 1 To Len(strValue)
Select Case Asc(Mid(strValue, intPos, 1))
Case 65 To 90, 97 To 122
bEngMatch = True
Exit For
Case 34, 39, 40, 41, 45 ' special characters " ' ( ) -
bEngMatch = False
Case Else
If Not bEngMatch Then
bEngMatch = False
End If
Exit For
End Select
Next
IsEngText = bEngMatch

End Function

You might also like