Most users of iTextSharp’s PdfReader are used to using the constructor that takes a single string representing a file path. For small files or only a couple of files this is fine but if you have a document with a large number of pages or just a large number of documents then you might run into some performance programs.
Luckily there’s already a built-in albeit non-obvious solution to the problem : iTextSharp.text.pdf.RandomAccessFileOrArray
. When you create a PdfReader
using the PdfReader(string)
constructor you are actually creating one of these behind the scenes, just not an optimal one. The default one basically sets up a standard FileStream
object that reads your file, nothing too special. But there’s an overload called RandomAccessFileOrArray(string fileName, bool forceRead)
that will (generally) give you a giant performance boost if you pass true to the second parameter. When forceRead
is true
the entire file that you are reading will be read into memory as a byte array. You can understand why the default is false
, hopefully. But if you’ve got a fairly modern machine you should hopefully have enough memory to be able to take advantage of this overload. Obviously test this and stress test this in a product environment. One person loading a 500MB file into memory isn’t a big deal but 100 people doing it is.
Below is a proof-of-concept WinForms app targeting iTextSharp 5.1.1.0. Just create a blank C# WinForms app (VS2010) and paste this into the source. Modify the variables at the top to your liking for testing. On my machine, the regular PdfReader
constructor takes about 22 seconds for 4,000 files and between 1 and 2 seconds using a RandomAccessFileOrArray
.
using System; using System.Diagnostics; using System.Drawing; using System.IO; using System.Linq; using System.Threading; using System.Windows.Forms; using iTextSharp.text; using iTextSharp.text.pdf; namespace WindowsFormsApplication1 { public partial class Form1 : Form { public Form1() { InitializeComponent(); } //Location to create temporary files. NOTE: This folder will get DELETED when cleaned up! private readonly string workingFolder = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "Many Files"); //Number of test files to create private readonly int fileCount = 4000; //Maximum number of pages in each test file private readonly int maxNumberOfPages = 20; //Will hold our threads private Thread tw; private Thread tm; private void Form1_Load(object sender, EventArgs e) { //Resize the main form this.Width = 350; this.Height = 150; //Create various buttons var btn1 = new Button(); btn1.Text = "Create sample files"; btn1.Click += (a, b) => BtnClick_CreateSampleFiles(); btn1.Location = new Point(0, 0); btn1.Width = 150; this.Controls.Add(btn1); var btn2 = new Button(); btn2.Text = "Count pages old way"; btn2.Click += (a, b) => BtnClick_CountPages_Slow(); btn2.Location = new Point(0, 25); btn2.Width = 150; this.Controls.Add(btn2); var btn3 = new Button(); btn3.Text = "Count pages new way"; btn3.Click += (a, b) => BtnClick_CountPages_Fast(); btn3.Location = new Point(150, 25); btn3.Width = 150; this.Controls.Add(btn3); var btn4 = new Button(); btn4.Text = "Clean up"; btn4.Click += (a, b) => CleanUp(true); btn4.Location = new Point(0, 50); btn4.Width = 150; this.Controls.Add(btn4); var pbFileCreated = new ProgressBar(); pbFileCreated.Name = "pbFileCreated"; pbFileCreated.Location = new Point(0, 75); pbFileCreated.Width = 300; this.Controls.Add(pbFileCreated); } /// <summary> /// Enable/Disable buttons on the main form /// </summary> private void SetFormState(bool enabled){ //If we are called outside of the main UI thread then we need to invoke into it if (this.InvokeRequired){ this.Invoke(new MethodInvoker(delegate() { SetFormState(enabled); })); }else{ //Disable all buttons foreach (Control c in this.Controls){ if (c is Button) c.Enabled = enabled; } } } #region Button Click Events private void BtnClick_CreateSampleFiles(){ //Disable the UI SetFormState(false); //Create a thread to do our work tw = new Thread(new ThreadStart(this.CreateSampleFiles)); //Start the thread tw.Start(); //Create a thread to monitor our progress tm = new Thread(new ThreadStart(this.Monitor)); //Start the thread tm.Start(); } private void BtnClick_CountPages_Slow(){ //Disable the UI SetFormState(false); //Create a thread to do our work tw = new Thread(new ThreadStart(this.CountPages_Slow)); //Start the thread tw.Start(); //Create a thread to monitor our progress tm = new Thread(new ThreadStart(this.Monitor)); tm.Start(); } private void BtnClick_CountPages_Fast(){ //Disable the UI SetFormState(false); //Create a thread to do our work tw = new Thread(new ThreadStart(this.CountPages_Fast)); //Start the thread tw.Start(); tm = new Thread(new ThreadStart(this.Monitor)); //Create a thread to monitor our progress tm.Start(); } #endregion #region Monitor And ProgressBar /// <summary> /// Used to monitor the progress of the worker thread so that we know when to re-enable the form's UI /// </summary> private void Monitor() { while (tw != null && tw.ThreadState == System.Threading.ThreadState.Running) { Thread.Sleep(250); } SetFormState(true); } /// <summary> /// Called from various methods on various threads to update the main progress bar /// </summary> private void updatePB(int value, int max){ //Get the progress bar, there should only be only var pb = (ProgressBar)this.Controls.Find("pbFileCreated", false)[0]; //See if we are on another thread if (pb.InvokeRequired){ //If so, have the main thread invoke our method with the same paremeters for us pb.Invoke(new MethodInvoker(delegate() { updatePB(value, max); })); }else{ //Otherwise update the progress bar's values pb.Maximum = fileCount; pb.Value = value; } } #endregion private void CreateSampleFiles(){ //Just in case, erase current files CleanUp(false); //Create our output directory Directory.CreateDirectory(workingFolder); //Placeholder for our random number of pages to create int pageCount; //Random number generator Random r = new Random(); //Loop through each file that we need to create for (int i = 1; i <= fileCount; i++){ //Ever 100 files update the main progress bar if (i % 100 == 0){ updatePB(i, fileCount); } //Create our temporary PDF using (FileStream fs = new FileStream(Path.Combine(workingFolder, String.Format("{0}.pdf", i.ToString().PadLeft(8, '0'))), FileMode.Create, FileAccess.Write, FileShare.None)){ using (Document doc = new Document(PageSize.LETTER)){ using (PdfWriter w = PdfWriter.GetInstance(doc, fs)){ doc.Open(); //Get a random number of pages to create pageCount = r.Next(1, maxNumberOfPages + 1); for (int j = 1; j <= pageCount; j++){ //Add a page doc.NewPage(); //Add some content on the page, just to give the page a little "weight" doc.Add(new Paragraph(String.Format("File {0}, Page {1}", i, j))); } doc.Close(); } } } } //Give an alert to let people know we're done MessageBox.Show(String.Format("Created {0} Files", fileCount)); } /// <summary> /// Clean up the files we created by erasing the entire directory /// </summary> /// <param name="msg">Whether to show a message alerting when done</param> private void CleanUp(bool msg){ if (Directory.Exists(workingFolder)){ Directory.Delete(workingFolder, true); } if (msg){ MessageBox.Show("Test files deleted"); } } /// <summary> /// Make sure we have a working folder and the correct number of files in it /// </summary> private bool SanityCheck() { if (!Directory.Exists(workingFolder)){ MessageBox.Show("Folder not found, please create first"); return false; } if (Directory.EnumerateFiles(workingFolder, "*.pdf").Count() != fileCount){ MessageBox.Show("Not enough files exist in source folder, please create files before using."); return false; } return true; } private void CountPages_Slow(){ //Make sure we've got files to work with if (!SanityCheck()) return; //Create a timer var st = new Stopwatch(); //Start it st.Start(); //Get our files var files = Directory.EnumerateFiles(workingFolder, "*.pdf"); //Total number of pages found int totalPageCount = 0; //Used to update the progress bar int i = 0; int localFileCount = files.Count(); //Loop through each file foreach (string f in files){ //This is a total perf hit but the differences between the two methods is so great it doesn't really matter //Every 100 pages update the progress bar i++; if (i % 100 == 0){ updatePB(i, localFileCount); } //Add the page count to the total totalPageCount += new PdfReader(f).NumberOfPages; } //Stop our timer st.Stop(); MessageBox.Show(String.Format("Found {0:N0} pages in {1:N0} seconds", totalPageCount, st.Elapsed.Seconds)); } private void CountPages_Fast(){ //Make sure we've got files to work with if (!SanityCheck()) return; //Create a timer var st = new Stopwatch(); //Start it st.Start(); //Get our files var files = Directory.EnumerateFiles(workingFolder, "*.pdf"); //Total number of pages found int totalPageCount = 0; //Used to update the progress bar int i = 0; int localFileCount = files.Count(); //Loop through each file foreach (string f in files){ //This is a total perf hit but the differences between the two methods is so great it doesn't really matter //Every 100 pages update the progress bar i++; if (i % 100 == 0){ updatePB(i, localFileCount); } //Add the page count to the total totalPageCount += new PdfReader(new RandomAccessFileOrArray(f, true), null).NumberOfPages; } //Stop our timer st.Stop(); MessageBox.Show(String.Format("Found {0:N0} pages in {1:N0} seconds", totalPageCount, st.Elapsed.Seconds)); } } }
Hi,
Thanks for the solution but i am facing another issue in PdfStamper obj.close() method.
Is is taking too much time to close the object of PdfStamper.
If you have any idea regarding this so please help me.
var pdfReaderTest = new PdfReader(new RandomAccessFileOrArray(FileName,true),null);
var pdfReader = new PdfReader(pdfTemplate);
var pdfStamper = new PdfStamper(pdfReader,
new FileStream(fileName, FileMode.Create,
FileAccess.ReadWrite));
pdfStamper .close() // It is taking too much time.
pdfReaderTest .close();
Your sample shows two PdfReaders although you are only using one, is there a reason for that? How long is “too long”? How big is your PDF?