How to remove watermark from pdf

How to remove watermark from pdf

IR21AirtelIndia_INDAT_23 circles 10 March 2023IR.21_Document_in_RAEX_format.pdf (946.7 KB)

Thanks
Shaik

1 Like

Hi,

Check this:-

Thanks

Hi @shaik.muktharvalli1 ,

I think there is no such activity to remove watermark in UiPath, You can use PYTHON script to remove watermark…

Required Library: pip install PyPDF4

from PyPDF4 import PdfFileReader, PdfFileWriter
from PyPDF4.pdf import ContentStream
from PyPDF4.generic import TextStringObject, NameObject
from PyPDF4.utils import b_
import os
import time
import shutil

def remove_watermark(wmText, inputFile, outputFile):
    # This Function Reads PDF file and Removes the WATERMARK TEXT
    with open(inputFile, "rb") as f:
        source = PdfFileReader(f, "rb")
        output = PdfFileWriter()

        for page in range(source.getNumPages()):
            page = source.getPage(page)
            content_object = page["/Contents"].getObject()
            content = ContentStream(content_object, source)

            for operands, operator in content.operations:
                if operator == b_("Tj"):
                    text = operands[0]

                    for i in wmText:
                        if isinstance(text, str) and text.startswith(i):
                            operands[0] = TextStringObject('')

            page.__setitem__(NameObject('/Contents'), content)
            output.addPage(page)

        with open(outputFile, "wb") as outputStream:
            output.write(outputStream)

def watermark_text(inputFile, waterMarkTextStarting):
    # This Function reads the PDF file and searches for input string and deletes the WaterMark
    wmText = []
    pdfFileObj = open(inputFile, 'rb')
    pdfReader = PdfFileReader(pdfFileObj)
    pageObj = pdfReader.getPage(0)
    watermark = pageObj.extractText()
    pdfFileObj.close()
    x = watermark.find(waterMarkTextStarting)
    lengthWmText = len(waterMarkTextStarting)
    wmText.append(watermark[x:x+lengthWmText])
    wmText.append(watermark[x+lengthWmText:])
    return wmText

def main():
    # Driver Function
    input_pdf_file = r"D:\Test\PythonCode\IR21AirtelIndia_INDAT_23 circles 10 March 2023IR.21_Document_in_RAEX_format.pdf"
    output_pdf_file = r"D:\Test\PythonCode\testout.pdf"
    waterMarkTextStarting = "Non-binding draft document"

    with open(input_pdf_file, "rb") as f:
        wm_text = watermark_text(input_pdf_file, waterMarkTextStarting)
        remove_watermark(wm_text, input_pdf_file, output_pdf_file)
        print("Watermark removed successfully.")

if __name__ == "__main__":
    main()

Regards,
Vinit Mhatre