Below is the VB code generated by GPT.
Let me know if it didn’t work.
Imports System.Data
Imports System.Linq
Module Module1
Sub Main()
' Sample DataTable
Dim dt As New DataTable()
dt.Columns.Add("Name", GetType(String))
dt.Columns.Add("DupeCol", GetType(String))
dt.Columns.Add("DateCol", GetType(DateTime))
' Sample Data
dt.Rows.Add("John", "A123", DBNull.Value)
dt.Rows.Add("Alice", "A123", #2025-01-01#)
dt.Rows.Add("Bob", "B456", DBNull.Value)
dt.Rows.Add("Eve", "B456", DBNull.Value)
dt.Rows.Add("Frank", "C789", #2025-05-01#)
dt.Rows.Add("Grace", "C789", DBNull.Value)
' Group by DupeCol
Dim duplicateGroups = dt.AsEnumerable() _
.GroupBy(Function(row) row("DupeCol")) _
.Where(Function(group) group.Count() > 1)
Dim rowsToRemove As New List(Of DataRow)
For Each group In duplicateGroups
Dim groupRows = group.ToList()
If groupRows.Count = 2 Then
Dim row1 = groupRows(0)
Dim row2 = groupRows(1)
Dim date1Populated = Not IsDBNull(row1("DateCol"))
Dim date2Populated = Not IsDBNull(row2("DateCol"))
If date1Populated AndAlso date2Populated Then
' Both populated: Remove either
rowsToRemove.Add(row1)
ElseIf date1Populated Xor date2Populated Then
' Only one populated: Remove the one without a date
rowsToRemove.Add(If(date1Populated, row2, row1))
Else
' Neither populated: Remove the first
rowsToRemove.Add(row1)
End If
End If
Next
' Remove the rows
For Each row In rowsToRemove
dt.Rows.Remove(row)
Next
' Display the updated DataTable
Console.WriteLine("Updated DataTable:")
For Each row As DataRow In dt.Rows
Console.WriteLine($"{row("Name")}, {row("DupeCol")}, {row("DateCol")}")
Next
End Sub
End Module
Easiest solution i could think of is as given below
Sort the Datatable using Column DateCol in descending order ’
Use linq query to remove duplicate based on a specific column
dtTable = dtTable.AsEnumerable().GroupBy(Function(r) r.Field(of String)(“DupeCol”)).Select(Function(s) s.First()).CopyToDataTable