Most users of iTextSharp’s PdfReader are used to using the constructor that takes a single string representing a file path. For small files or only a couple of files this is fine but if you have a document with a large number of pages or just a large number of documents then you might run into some performance programs.
Luckily there’s already a built-in albeit non-obvious solution to the problem : iTextSharp.text.pdf.RandomAccessFileOrArray. When you create a PdfReader using the PdfReader(string) constructor you are actually creating one of these behind the scenes, just not an optimal one. The default one basically sets up a standard FileStream object that reads your file, nothing too special. But there’s an overload called RandomAccessFileOrArray(string fileName, bool forceRead) that will (generally) give you a giant performance boost if you pass true to the second parameter. When forceRead is true the entire file that you are reading will be read into memory as a byte array. You can understand why the default is false, hopefully. But if you’ve got a fairly modern machine you should hopefully have enough memory to be able to take advantage of this overload. Obviously test this and stress test this in a product environment. One person loading a 500MB file into memory isn’t a big deal but 100 people doing it is.
Below is a proof-of-concept WinForms app targeting iTextSharp 5.1.1.0. Just create a blank C# WinForms app (VS2010) and paste this into the source. Modify the variables at the top to your liking for testing. On my machine, the regular PdfReader constructor takes about 22 seconds for 4,000 files and between 1 and 2 seconds using a RandomAccessFileOrArray.
using System;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Threading;
using System.Windows.Forms;
using iTextSharp.text;
using iTextSharp.text.pdf;
namespace WindowsFormsApplication1
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
//Location to create temporary files. NOTE: This folder will get DELETED when cleaned up!
private readonly string workingFolder = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.Desktop), "Many Files");
//Number of test files to create
private readonly int fileCount = 4000;
//Maximum number of pages in each test file
private readonly int maxNumberOfPages = 20;
//Will hold our threads
private Thread tw;
private Thread tm;
private void Form1_Load(object sender, EventArgs e)
{
//Resize the main form
this.Width = 350;
this.Height = 150;
//Create various buttons
var btn1 = new Button();
btn1.Text = "Create sample files";
btn1.Click += (a, b) => BtnClick_CreateSampleFiles();
btn1.Location = new Point(0, 0);
btn1.Width = 150;
this.Controls.Add(btn1);
var btn2 = new Button();
btn2.Text = "Count pages old way";
btn2.Click += (a, b) => BtnClick_CountPages_Slow();
btn2.Location = new Point(0, 25);
btn2.Width = 150;
this.Controls.Add(btn2);
var btn3 = new Button();
btn3.Text = "Count pages new way";
btn3.Click += (a, b) => BtnClick_CountPages_Fast();
btn3.Location = new Point(150, 25);
btn3.Width = 150;
this.Controls.Add(btn3);
var btn4 = new Button();
btn4.Text = "Clean up";
btn4.Click += (a, b) => CleanUp(true);
btn4.Location = new Point(0, 50);
btn4.Width = 150;
this.Controls.Add(btn4);
var pbFileCreated = new ProgressBar();
pbFileCreated.Name = "pbFileCreated";
pbFileCreated.Location = new Point(0, 75);
pbFileCreated.Width = 300;
this.Controls.Add(pbFileCreated);
}
/// <summary>
/// Enable/Disable buttons on the main form
/// </summary>
private void SetFormState(bool enabled){
//If we are called outside of the main UI thread then we need to invoke into it
if (this.InvokeRequired){
this.Invoke(new MethodInvoker(delegate() { SetFormState(enabled); }));
}else{
//Disable all buttons
foreach (Control c in this.Controls){
if (c is Button) c.Enabled = enabled;
}
}
}
#region Button Click Events
private void BtnClick_CreateSampleFiles(){
//Disable the UI
SetFormState(false);
//Create a thread to do our work
tw = new Thread(new ThreadStart(this.CreateSampleFiles));
//Start the thread
tw.Start();
//Create a thread to monitor our progress
tm = new Thread(new ThreadStart(this.Monitor));
//Start the thread
tm.Start();
}
private void BtnClick_CountPages_Slow(){
//Disable the UI
SetFormState(false);
//Create a thread to do our work
tw = new Thread(new ThreadStart(this.CountPages_Slow));
//Start the thread
tw.Start();
//Create a thread to monitor our progress
tm = new Thread(new ThreadStart(this.Monitor));
tm.Start();
}
private void BtnClick_CountPages_Fast(){
//Disable the UI
SetFormState(false);
//Create a thread to do our work
tw = new Thread(new ThreadStart(this.CountPages_Fast));
//Start the thread
tw.Start();
tm = new Thread(new ThreadStart(this.Monitor));
//Create a thread to monitor our progress
tm.Start();
}
#endregion
#region Monitor And ProgressBar
/// <summary>
/// Used to monitor the progress of the worker thread so that we know when to re-enable the form's UI
/// </summary>
private void Monitor()
{
while (tw != null && tw.ThreadState == System.Threading.ThreadState.Running)
{
Thread.Sleep(250);
}
SetFormState(true);
}
/// <summary>
/// Called from various methods on various threads to update the main progress bar
/// </summary>
private void updatePB(int value, int max){
//Get the progress bar, there should only be only
var pb = (ProgressBar)this.Controls.Find("pbFileCreated", false)[0];
//See if we are on another thread
if (pb.InvokeRequired){
//If so, have the main thread invoke our method with the same paremeters for us
pb.Invoke(new MethodInvoker(delegate() { updatePB(value, max); }));
}else{
//Otherwise update the progress bar's values
pb.Maximum = fileCount;
pb.Value = value;
}
}
#endregion
private void CreateSampleFiles(){
//Just in case, erase current files
CleanUp(false);
//Create our output directory
Directory.CreateDirectory(workingFolder);
//Placeholder for our random number of pages to create
int pageCount;
//Random number generator
Random r = new Random();
//Loop through each file that we need to create
for (int i = 1; i <= fileCount; i++){
//Ever 100 files update the main progress bar
if (i % 100 == 0){
updatePB(i, fileCount);
}
//Create our temporary PDF
using (FileStream fs = new FileStream(Path.Combine(workingFolder, String.Format("{0}.pdf", i.ToString().PadLeft(8, '0'))), FileMode.Create, FileAccess.Write, FileShare.None)){
using (Document doc = new Document(PageSize.LETTER)){
using (PdfWriter w = PdfWriter.GetInstance(doc, fs)){
doc.Open();
//Get a random number of pages to create
pageCount = r.Next(1, maxNumberOfPages + 1);
for (int j = 1; j <= pageCount; j++){
//Add a page
doc.NewPage();
//Add some content on the page, just to give the page a little "weight"
doc.Add(new Paragraph(String.Format("File {0}, Page {1}", i, j)));
}
doc.Close();
}
}
}
}
//Give an alert to let people know we're done
MessageBox.Show(String.Format("Created {0} Files", fileCount));
}
/// <summary>
/// Clean up the files we created by erasing the entire directory
/// </summary>
/// <param name="msg">Whether to show a message alerting when done</param>
private void CleanUp(bool msg){
if (Directory.Exists(workingFolder)){
Directory.Delete(workingFolder, true);
}
if (msg){
MessageBox.Show("Test files deleted");
}
}
/// <summary>
/// Make sure we have a working folder and the correct number of files in it
/// </summary>
private bool SanityCheck()
{
if (!Directory.Exists(workingFolder)){
MessageBox.Show("Folder not found, please create first");
return false;
}
if (Directory.EnumerateFiles(workingFolder, "*.pdf").Count() != fileCount){
MessageBox.Show("Not enough files exist in source folder, please create files before using.");
return false;
}
return true;
}
private void CountPages_Slow(){
//Make sure we've got files to work with
if (!SanityCheck()) return;
//Create a timer
var st = new Stopwatch();
//Start it
st.Start();
//Get our files
var files = Directory.EnumerateFiles(workingFolder, "*.pdf");
//Total number of pages found
int totalPageCount = 0;
//Used to update the progress bar
int i = 0;
int localFileCount = files.Count();
//Loop through each file
foreach (string f in files){
//This is a total perf hit but the differences between the two methods is so great it doesn't really matter
//Every 100 pages update the progress bar
i++;
if (i % 100 == 0){
updatePB(i, localFileCount);
}
//Add the page count to the total
totalPageCount += new PdfReader(f).NumberOfPages;
}
//Stop our timer
st.Stop();
MessageBox.Show(String.Format("Found {0:N0} pages in {1:N0} seconds", totalPageCount, st.Elapsed.Seconds));
}
private void CountPages_Fast(){
//Make sure we've got files to work with
if (!SanityCheck()) return;
//Create a timer
var st = new Stopwatch();
//Start it
st.Start();
//Get our files
var files = Directory.EnumerateFiles(workingFolder, "*.pdf");
//Total number of pages found
int totalPageCount = 0;
//Used to update the progress bar
int i = 0;
int localFileCount = files.Count();
//Loop through each file
foreach (string f in files){
//This is a total perf hit but the differences between the two methods is so great it doesn't really matter
//Every 100 pages update the progress bar
i++;
if (i % 100 == 0){
updatePB(i, localFileCount);
}
//Add the page count to the total
totalPageCount += new PdfReader(new RandomAccessFileOrArray(f, true), null).NumberOfPages;
}
//Stop our timer
st.Stop();
MessageBox.Show(String.Format("Found {0:N0} pages in {1:N0} seconds", totalPageCount, st.Elapsed.Seconds));
}
}
}
Hi,
Thanks for the solution but i am facing another issue in PdfStamper obj.close() method.
Is is taking too much time to close the object of PdfStamper.
If you have any idea regarding this so please help me.
var pdfReaderTest = new PdfReader(new RandomAccessFileOrArray(FileName,true),null);
var pdfReader = new PdfReader(pdfTemplate);
var pdfStamper = new PdfStamper(pdfReader,
new FileStream(fileName, FileMode.Create,
FileAccess.ReadWrite));
pdfStamper .close() // It is taking too much time.
pdfReaderTest .close();
Your sample shows two PdfReaders although you are only using one, is there a reason for that? How long is “too long”? How big is your PDF?