Merging Files with C#

Below is an example of how multiple PDF documents, containing a varying number of pages, can be merged together into one file, with all page ones together, followed by all page twos and so on. For this to work the library iTextSharp needs to be used.

Firstly, the file path is set and a check is made to see if it exists. This is followed by another check to verify that there are files to merge. The files are then processed one by one to find the number of pages that each PDF contains and this information is stored in a sorted dictionary along with the corresponding file name. All files without a ‘.pdf’ extension are ignored. Whilst doing this, a record is made of the maximum number of pages in an individual file. The sorted dictionary containing file name and page information, along with the maximum number of pages figure, is then used to access pages in each file and check that the desired page actually exists in a particular file, which allows for PDFs of varying sizes to be merged. Finally, a confirmation message is displayed stating how many files have been merged.

using System;
using System.Collections.Generic;
using System.IO;
using iTextSharp.text;
using iTextSharp.text.pdf;

namespace MergingFiles
{
    class Program
    {
        static void Main(string[] args)
        {

            // File path.
            string filePath = @"C:\demo\";
            
            // File extension.
            string fileExtension;

            // Check to see if the file path exists.
            if (Directory.Exists(filePath))
            {


                // Return the names of the files at the specified path.
                string[] dirFiles = Directory.GetFiles(filePath);

                // Check if there are any files at the path.
                if (dirFiles.Length == 0)
                {

                    // Message stating there are no files to merge.
                    Console.WriteLine("There are no files to merge.");

                }
                else
                {

                    // Source PDF.
                    PdfReader pdfReader;

                    // Maximum number of pages.
                    int maxPages = 0;

                    // Files to process with number of pages.
                    SortedDictionary<string, int> filesToProcess = 
                    new SortedDictionary<string, int>();

                    // Process the files at the path.
                    foreach (string dirFile in dirFiles)
                    {

                        // Extract the file extension from the name.
                        fileExtension = System.IO.Path.GetExtension(dirFile);

                        // Check if the file is a PDF file.
                        if (fileExtension == ".pdf" && !dirFile.Contains("~"))
                        {

                            try
                            {

                                // Assign the current PDF.
                                pdfReader = new PdfReader(dirFile);

                                // Assign the number of pages to the maximum if greater
                                // than current value.
                                if (pdfReader.NumberOfPages > maxPages)
                                {

                                    maxPages = pdfReader.NumberOfPages;

                                }

                                // Add the file information to the sorted dictionary.
                                filesToProcess.Add(dirFile, pdfReader.NumberOfPages);

                                // Close the PDF.
                                pdfReader.Close();

                            }
                            catch (Exception e)
                            {

                                // Message confirming the file could not be merged.
                                Console.WriteLine("The file \"{0}\" cannot be merged.",
                                    dirFile.ToString());

                            }

                        }

                    }
                    
                    // If there are PDFs to merge, process them.
                    if (maxPages > 0 && filesToProcess.Count > 1)
                    {

                        using (var output = new MemoryStream())
                        {

                            // Create and open new document.
                            Document document = new Document();
                            PdfSmartCopy writer = new PdfSmartCopy(document, output);
                            document.Open();

                            try
                            {

                                // Process PDF files up to the maximum number of pages.
                                for (int pageIndex = 1; pageIndex <= maxPages; pageIndex++)
                                {

                                    // Add the desired page from each PDF to the new PDF.
                                    foreach (KeyValuePair<string, int> file in filesToProcess)
                                    {

                                        // Check if current file has the desired page to merge.
                                        if (pageIndex <= file.Value)
                                        {

                                            // Assign the current PDF to a reader object.
                                            PdfReader reader = new PdfReader(file.Key);

                                            // Extract the desired page.
                                            PdfImportedPage page;
                                            page = writer.GetImportedPage(reader, pageIndex);

                                            // Add the extracted page to the combined PDF.
                                            writer.AddPage(page);

                                        }

                                    }

                                }

                                // Close the document and save the new combined PDF.
                                document.Close();
                                File.WriteAllBytes(filePath + @"\combined.pdf", output.ToArray());

                                // Feedback that file merge has been successful.
                                Console.WriteLine("{0} files merged successfully.",
                                    filesToProcess.Count.ToString());

                            }
                            catch(Exception e)
                            {

                                // Display a message stating the merge was unsuccessful.
                                Console.WriteLine("The file merge was unsuccessful.");

                            }

                        }


                    }
                    else
                    {

                        // Display a message stating there are no files to merge.
                        Console.WriteLine("There are no files to merge.");

                    }

                }

            }
            else
            {

                // Display a message stating file path does not exist.
                Console.WriteLine("File path does not exist.");

            }

            // Force console window to stay open until a key is pressed.
            Console.ReadKey();

        }
    }
}

Further Resources