Batch File Formatting with Java

Below is an example of how paragraphs of text in Microsoft Word documents can be formatted in a batch, based on specified paragraph text. For this to work the dependencies Apache POI and Apache Commons must be used.

Firstly, the file path is set, together with a list of paragraphs containing particular text to format. A check is then made to see if the file path exists and whether there are any files to format. Each file is then processed one by one. All files without a ‘.docx’ extension are ignored. For each paragraph of text within a document, a check is made to see if its text matches any item in the paragraphs to format list. If a match is found the text is formatted.

Note that in Java, a paragraph of text is made up on one or more ‘runs’. A run is a section of text where the formatting is the same, so, for example, if a paragraph has a section of bold text in the middle, the paragraph would be made up of three runs, one up to the bold text, the bold text itself and another for the text after. In the below example an assumption is made that the paragraphs being formatted contain only one run. The first run in a paragraph has an index value of zero and not one.

Feedback is given as to the number of paragraphs that have been formatted in each file, along with a total count of files formatted at the end.

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.commons.io.FilenameUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;

public class BatchFileFormatting {

    public static void main(String[] args) {

        // File path.
        File path = new File("c:/demo");

        // List of paragraphs to format.
        List<String> parasToFormat = new ArrayList<String>();
        Collections.addAll(parasToFormat,
                "Example Heading 1",
                "Example Heading 2",
                "Example Heading 3");

        // Check to see if the file path exists.
        if (path.isDirectory()) {

            // Check if there are any files at the path.
            File [] files = path.listFiles();
            String fileExtension;

            if (files.length == 0) {

                System.out.println("There are no files to format.");

            } else {

                // Microsoft Word file variables.
                FileInputStream fis;
                FileOutputStream fos;
                XWPFDocument currentDoc;

                // Formatted file and paragraph counts.
                int filesFormatted = 0;
                int paragraphsFormatted = 0;

                // Process the files at the path.
                for (File file : files) {

                    // Check if the item is a file.
                    if (file.isFile()) {

                        // Extract the file extension from the name.
                        fileExtension = FilenameUtils.getExtension(file.getName());

                        // Check if the file is a Word file.
                        if (fileExtension.contains("docx")) {

                            try {

                                // Open the Word file.
                                fis = new FileInputStream(file);
                                currentDoc = new XWPFDocument(fis);

                                // Process the paragraphs in the document.
                                for (XWPFParagraph para : currentDoc.getParagraphs()) {

                                    // Check if the paragraph text is one that needs formatting.
                                    if (parasToFormat.contains(para.getText())) {

                                        // Format the paragraph font, weight and size.
                                        para.getRuns().get(0).setFontFamily("Arial");
                                        para.getRuns().get(0).setBold(true);
                                        para.getRuns().get(0).setFontSize(14);

                                        // Indicate a paragraph has been formatted.
                                        paragraphsFormatted += 1;

                                    }

                                }

                                // Check if any paragraphs have been formatted.
                                if (paragraphsFormatted > 0) {

                                    // Increment the files formatted count.
                                    filesFormatted += 1;

                                    // Save the document.
                                    fos = new FileOutputStream(file);
                                    currentDoc.write(fos);

                                    // Message displaying file formatting information.
                                    if (paragraphsFormatted == 1) {

                                        System.out.printf("%s paragraph formatted in the file " +
                                                "\"%s\".\n", paragraphsFormatted, file.getName());

                                    } else {

                                        System.out.printf("%s paragraphs formatted in the file " +
                                                "\"%s\".\n", paragraphsFormatted, file.getName());

                                    }

                                    // Close the output stream.
                                    fos.close();

                                    // Reset the paragraphs formatted variable.
                                    paragraphsFormatted = 0;

                                }

                                // Close the Word file.
                                currentDoc.close();
                                fis.close();

                            } catch(Exception e) {

                                // If error, state the file could not be formatted.
                                System.out.printf("The file \"%s\" could not be formatted.\n",
                                        file.getName());

                            }

                        }

                    }

                }

                // Message displaying the number of files formatted.
                if (filesFormatted == 0) {
                    System.out.println("No files have been formatted.");
                } else if (filesFormatted == 1) {
                    System.out.printf("%s file has been formatted.", filesFormatted);
                } else {
                    System.out.printf("%s files have been formatted.", filesFormatted);
                }

            }

        } else {

            // Display a message stating file path does not exist.
            System.out.println("File path does not exist.");

        }

    }

}