17 use-cases of extracting tables from PDF with UiPath Studio.
0:00 Intro
1:10 Install PDF Activities
2:00 GitHub free code for all the files
2:20 Logic of general workflow
4:40 File 1 simple PDF
9:50 File 2 PDF with a column with multiple lines
20:10 File 3 PDF with a column with multiple words ON the LAST column
27:00 File 5 PDF with a column with multiple words ON inside column (2 columns)
31:40 File 6 PDF with a column with multiple lines
39:10 File 8 simple PDF
42:15 File 9 PDF with multiple spaces on that need to be correct
45:50 File 10 PDF with multiple columns that have multiple lines + multiple pages
55:50 File 11 simple PDF with protection empty Cells
58:35 File 12 Big PDF with an empty line and Empty columns and partial total
1:02:25 File 13 PDF with multiple columns that have multiple words and hard to define a rule
1:10:15 File 15 PDF with multiple columns that have multiple lines
1:12:50 File 17 simple PDF remove spaces from headers also remove space from Data
1:16:05 File 18 simple PDF
1:17:10 File 19 PDF with multiple pages and columns with multiple lines
1:22:10 File 20 PDF with multiple columns that have multiple lines
1:25:00 File 21 PDF with empty columns and subtotal
1:28:25 Subscribe to my channel
Blockquote
'FILE1
Dim strtmp As String
strtmp = strin.Substring(strin.IndexOf(“Number”), strin.IndexOf(“Subtotal”) - strin.IndexOf(“Number”)).Trim
strout = strtmp.Replace(" ", “|”)
strtmp = strin.Substring(strin.IndexOf(“Subtotal”) + 8)
strpar = strtmp.Substring(0, strtmp.IndexOf(Environment.NewLine)).Trim
Blockquote
'FILE2
Dim strtmp As String
Dim strout2 As String
strout = “Col1|Col2|Col3|Col4”
strtmp = strin.Substring(strin.IndexOf(“Vacancies”) + 11).Trim
For Each line As String In strtmp.Split(New String() {Environment.NewLine}, StringSplitOptions.RemoveEmptyEntries)
If (line.Length > 3) Then
If (IsNumeric(line(0))) And (line(1) = " ") And (line(2) = " ") Then
strout = strout + Environment.NewLine + line.Replace(" “, “”).Replace(” ", “|”).Trim
ElseIf (line(0) = “”) And (line(1) = " ") And (line(2) = " ") Then
strout = strout + line.Replace(" ", “$”).Trim()
Else
strout = strout + line.Trim
End If
End If
Next
strout = strout + Environment.NewLine
strout2 = strout
Blockquote
'FILE3
Dim strtmp As String
Dim idx As Int32 = 0
strout = “col1|col2|col3|col4|col5|col6|col7|col8|col9|col10|col11|col12|col13|col14|col15|col16|col17|col18|col19|col20|col21|col22|col23” + Environment.NewLine
strtmp = strin.Substring(strin.IndexOf(“state name”) + 11, strin.IndexOf(“authorized”) - 10 - strin.IndexOf(“state name”) - 11).Trim
For Each line As String In strtmp.Split(New String() {Environment.NewLine}, StringSplitOptions.RemoveEmptyEntries)
For Each word As String In line.Split(" "c)
If (idx < 22) Then
strout = strout + word + “|”
Else
strout = strout + word + “_”
End If
idx = +1
Next
strout = +Environment.NewLine
idx = 0
Next
Blockquote
'FILE5
Dim strtmp As String
Dim Idx As Int32 = 0
Dim strtmp2 As String
strtmp = strin.Substring(strin.IndexOf(“Hrs/Qty”), strin.LastIndexOf(“This is”) - strin.IndexOf(“Hrs/Qty”) - 1).Trim
strtmp = strtmp.Replace(“Sub Total”, “Sub_Total”)
Dim first As Boolean = True
For Each line As String In strtmp.Split(New String() {Environment.NewLine}, StringSplitOptions.None)
If first Then
first = False
strout = line.Replace(" “, “|”) + Environment.NewLine
Else
idx = line.IndexOf(” “c)
strtmp2 = line.Remove(idx, 1).Insert(idx, “|”).Trim
idx = strtmp2.LastIndexOf(” “c)
strtmp2 = strtmp2.Remove(idx, 1).Insert(idx, “|”)
idx = strtmp2.LastIndexOf(” “c)
strtmp2 = strtmp2.Remove(idx, 1).Insert(idx, “|”)
idx = strtmp2.LastIndexOf(” "c)
strtmp2 = strtmp2.Remove(idx, 1).Insert(idx, “|”)
strout = strout + strtmp2 + Environment.NewLine
End If
Next
strtmp = strin.Substring(strin.IndexOf(“Due Date”) + 8)
strpar = strtmp.Substring(0, strtmp.IndexOf(Environment.NewLine)).Trim
Code: