import win32com.client
import pandas as pd
from datetime import datetime
import pytz
import re
Function to extract table data from text
def extract_table_data(text):
lines = text.split(‘\n’)
# Identify the start and end of the table
start_row = next((i for i, line in enumerate(lines) if "Variant Number" in line), None)
end_row = next((i for i, line in enumerate(lines[start_row:]) if not line.strip()), len(lines))
# Extract table content
table_content = lines[start_row:end_row]
return "\n".join(table_content)
Set the date range
start_date = datetime(2024, 1, 28, tzinfo=pytz.UTC)
end_date = datetime(2024, 1, 29, tzinfo=pytz.UTC)
Create Outlook application object
outlook = win32com.client.Dispatch(“Outlook.Application”)
namespace = outlook.GetNamespace(“MAPI”)
inbox = namespace.GetDefaultFolder(6) # 6 refers to the inbox folder
Get all items in the inbox
items = inbox.Items
Filter items based on the date range
filtered_items = [item for item in items if start_date <= item.ReceivedTime <= end_date]
Initialize an empty list to store extracted data
data =
Define the columns for the DataFrame
columns = [“Number”, “Description”, “Name”]
Iterate through filtered items and extract table data
for item in filtered_items:
# Extract table data from the email body
table_data = extract_table_data(item.Body)
# If table data is found, parse it
if table_data:
# Create a list of dictionaries where each dictionary represents a row
rows = [dict(zip(columns, [value.strip() for value in line.split()])) for line in table_data.split('\n') if line.strip()]
# Append the data to the list
data.extend(rows)
Create a DataFrame using the extracted data and columns
df = pd.DataFrame(data, columns=columns)
Save the DataFrame to an Excel file
df.to_excel(“output1.xlsx”, index=False)
this is the code I have to extract email body which is a table data to excel.
But the code is extracting only headers which I already gave under columns list.