Code for How to Replace Text in Docx Files in Python Tutorial

file_text_replacer.py

# Import re for regex functions
import re

# Import sys for getting the command line arguments
import sys

# Import docx to work with .docx files.
# Must be installed: pip install python-docx
from docx import Document

# Check if Command Line Arguments are passed.
if len(sys.argv) < 3:
    print('Not Enough arguments where supplied')
    sys.exit()

# Check if replacers are in a valid schema
for replaceArg in sys.argv[2:]:
    if len(replaceArg.split('=')) != 2:
        print('Faulty replace argument given')
        print('-> ', replaceArg)
        sys.exit()

# Store file path from CL Arguments.
file_path = sys.argv[1]

if file_path.endswith('.docx'):
    doc = Document(file_path)
    # Loop through replacer arguments
    occurences = {}
    for replaceArgs in sys.argv[2:]:
        # split the word=replacedword into a list
        replaceArg = replaceArgs.split('=')
        # initialize the number of occurences of this word to 0
        occurences[replaceArg[0]] = 0
        # Loop through paragraphs
        for para in doc.paragraphs:
            # Loop through runs (style spans)
            for run in para.runs:
                # if there is text on this run, replace it
                if run.text:
                    # get the replacement text
                    replaced_text = re.sub(replaceArg[0], replaceArg[1], run.text, 999)
                    if replaced_text != run.text:
                        # if the replaced text is not the same as the original
                        # replace the text and increment the number of occurences
                        run.text = replaced_text
                        occurences[replaceArg[0]] += 1
                    
    # print the number of occurences of each word
    for word, count in occurences.items():
        print(f"The word {word} was found and replaced {count} times.")
    
    # make a new file name by adding "_new" to the original file name
    new_file_path = file_path.replace(".docx", "_new.docx")
    # save the new docx file
    doc.save(new_file_path)
else:
    print('The file type is invalid, only .docx are supported')

Ethical Hacking with Python EBook - Author - Top

New Tutorials

How to Track Your AI Visibility with Python

How to Automate Excel Reports in Python using Openpyxl

How to Build a Website Blocker in Python

How to Analyze the Most In-Demand Tech Skills from Job Postings in Python

How to Generate and Visualize Text Embeddings in Python

Code for How to Replace Text in Docx Files in Python Tutorial

Tags

New Tutorials

Popular Tutorials