I have two tables consists of some data both have the headers as customer name and we need to ckeck if the customer name of first table is present in one of the customers of second table. The names can be different like, in first table it can be ABCDEF while in second table it can be seen as ABCD .
Or in first table it can be as c.r.o and in second table it can be c.r.o.
We need a logic to implement fuzzy logic here.
You would need to use one of the many possible string comparison algorithms to do the comparison and get back a percentage match.
Here is the code I’ve used to implement these algorithms in a custom activity:
Levenshtein Distance
Jaro-Winkler Distance
Cosine Similarity
Try
Select Case algorithm
Case "Levenshtein Distance"
Dim n As Integer = s1.Length
Dim m As Integer = s2.Length
Dim d(n + 1, m + 1) As Integer
' Initializing the distance matrix
For i As Integer = 0 To n
d(i, 0) = i
Next
For j As Integer = 0 To m
d(0, j) = j
Next
' Calculating distances
For i As Integer = 1 To n
For j As Integer = 1 To m
Dim cost As Integer = If(s1(i - 1) = s2(j - 1), 0, 1)
d(i, j) = Math.Min(Math.Min(d(i - 1, j) + 1, d(i, j - 1) + 1), d(i - 1, j - 1) + cost)
Next
Next
Dim levenshteinDistance As Integer = d(n, m)
Dim maxLength As Integer = Math.Max(n, m)
similarity = ((maxLength - levenshteinDistance) / maxLength) * 100
Case "Jaro-Winkler Distance"
Dim matchWindow As Integer = Math.Max(s1.Length, s2.Length) \ 2 - 1
Dim matches1 As Boolean() = New Boolean(s1.Length - 1) {}
Dim matches2 As Boolean() = New Boolean(s2.Length - 1) {}
Dim matches As Integer = 0
Dim transpositions As Integer = 0
' Count matches & transpositions
For i As Integer = 0 To s1.Length - 1
For j As Integer = Math.Max(0, i - matchWindow) To Math.Min(s2.Length - 1, i + matchWindow)
If s1(i) = s2(j) AndAlso Not matches2(j) Then
matches1(i) = True
matches2(j) = True
matches += 1
Exit For
End If
Next
Next
Dim k As Integer = 0
For i As Integer = 0 To s1.Length - 1
If matches1(i) Then
While Not matches2(k)
k += 1
End While
If s1(i) <> s2(k) Then
transpositions += 1
End If
k += 1
End If
Next
If matches = 0 Then
similarity = 0
Else
Dim jaro As Double = (matches / s1.Length + matches / s2.Length + (matches - transpositions \ 2) / matches) / 3
Dim prefix As Integer = 0
For i As Integer = 0 To Math.Min(Math.Min(4, s1.Length), s2.Length) - 1
If s1(i) = s2(i) Then
prefix += 1
Else
Exit For
End If
Next
Dim p As Double = 0.1
similarity = (jaro + prefix * p * (1 - jaro)) * 100
End If
Case "Cosine Similarity"
Dim freq1 As Dictionary(Of Char, Integer) = s1.GroupBy(Function(c) c).ToDictionary(Function(g) g.Key, Function(g) g.Count())
Dim freq2 As Dictionary(Of Char, Integer) = s2.GroupBy(Function(c) c).ToDictionary(Function(g) g.Key, Function(g) g.Count())
Dim dotProduct As Integer = freq1.Keys.Union(freq2.Keys).Sum(Function(key) freq1.GetValueOrDefault(key, 0) * freq2.GetValueOrDefault(key, 0))
Dim magnitude1 As Double = Math.Sqrt(freq1.Values.Sum(Function(v) v * v))
Dim magnitude2 As Double = Math.Sqrt(freq2.Values.Sum(Function(v) v * v))
If magnitude1 = 0 OrElse magnitude2 = 0 Then
similarity = 0
Else
similarity = (dotProduct / (magnitude1 * magnitude2)) * 100
End If
End Select
Catch ex As Exception
Throw(New ApplicationException(ex.Message))
End Try