由于项目需要将pdf转化为jpg,所以花了一点时间研究了一下,主要采用以下两种方式实现:
1.ImageMagick
ImageMagick 是一个用来创建、编辑、合成图片的软件。它可以读取、转换、写入多种格式的图片。图片切割、颜色替换、各种效果的应用,图片的旋转、组合,文本,直线, 多边形,椭圆,曲线,附加到图片伸展旋转。ImageMagick是免费软件:全部源码开放,可以自由使用,复制,修改,发布。最主要的是,其提供了.net平台的一个支持库:Magick.NET,这个库相对比较稳定,使用也很简单,转化的主要代码如下:
public void TransformToJPG(string filePath,bool isThumbnails) {
var fileInfo = new FileInfo(filePath);
var dirInfo = fileInfo.Directory;
var settings = new MagickReadSettings();
if(isThumbnails){
settings.Density = new MagickGeometry(15, 15);
}else{
settings.Density = new MagickGeometry(300, 300);
}
using (MagickImageCollection images = new MagickImageCollection()) {
var fileName = "";
images.Read(filePath,settings);
int page = 1;
foreach (MagickImage image in images) {
if(isThumbnails){
fileName = String.Format("_page-{0}.jpg",page.ToString().PadLeft(4,'0'));;
}else{
fileName = String.Format("page-{0}.jpg",page.ToString().PadLeft(4,'0'));
}
image.Write(fileName);
++page;
}
}
}
其中,主要的几个参数说明如下:
-trim:裁剪图像四周空白区域;
-transparent color:去除图像中指定的颜色;
-density geometry:设定图像的 DPI 值;
-antialias:让图像具有抗锯齿的效果;
-quality:图像压缩等级。
2.Acrobat的一个接口
这个方法需要安装Adobe Acrobat,从安装文件copy出Acrobat.dll就可以,这个方式的转化方式就我看来是最好的,无论从转化的效率还是从转化的图片效果来看,本人最终也是采用的这种方式,其实现主要代码如下:
public static void ConvertPDF2Image(string pdfInputPath, string imageOutputPath,
string imageName, int startPageNum, int endPageNum, ImageFormat imageFormat, double resolution)
{
Acrobat.CAcroPDDoc pdfDoc = null;
Acrobat.CAcroPDPage pdfPage = null;
Acrobat.CAcroRect pdfRect = null;
Acrobat.CAcroPoint pdfPoint = null;
// Create the document (Can only create the AcroExch.PDDoc object using late-binding)
// Note using VisualBasic helper functions, have to add reference to DLL
pdfDoc = (Acrobat.CAcroPDDoc)Microsoft.VisualBasic.Interaction.CreateObject("AcroExch.PDDoc", "");
// validate parameter
if (!pdfDoc.Open(pdfInputPath)) { throw new FileNotFoundException(); }
if (!Directory.Exists(imageOutputPath)) { Directory.CreateDirectory(imageOutputPath); }
if (startPageNum <= 0) { startPageNum = 1; }
if (endPageNum > pdfDoc.GetNumPages() || endPageNum <= 0) {
endPageNum = pdfDoc.GetNumPages();
}
if (startPageNum > endPageNum) {
int tempPageNum = startPageNum;
startPageNum = endPageNum; endPageNum = startPageNum;
}
if (imageFormat == null) { imageFormat = ImageFormat.Jpeg; }
if (resolution <= 0) { resolution = 1; }
// start to convert each page
for (int i = startPageNum; i <= endPageNum; i++){
pdfPage = (Acrobat.CAcroPDPage)pdfDoc.AcquirePage(i - 1);
pdfPoint = (Acrobat.CAcroPoint)pdfPage.GetSize();
pdfRect = (Acrobat.CAcroRect)Microsoft.VisualBasic.Interaction.CreateObject("AcroExch.Rect", "");
int imgWidth = (int)((double)pdfPoint.x * resolution);
int imgHeight = (int)((double)pdfPoint.y * resolution);
pdfRect.Left = 0;
pdfRect.right = (short)imgWidth;
pdfRect.Top = 0;
pdfRect.bottom = (short)imgHeight;
// Render to clipboard, scaled by 100 percent (ie. original size)
// Even though we want a smaller image, better for us to scale in .NET
// than Acrobat as it would greek out small text
pdfPage.CopyToClipboard(pdfRect, 0, 0, (short)(100 * resolution));
IDataObject clipboardData = Clipboard.GetDataObject();
if (clipboardData.GetDataPresent(DataFormats.Bitmap)){
Bitmap pdfBitmap = (Bitmap)clipboardData.GetData(DataFormats.Bitmap);
pdfBitmap.Save(Path.Combine(imageOutputPath, imageName) + ".jpg", imageFormat);
pdfBitmap.Dispose();
}
}
pdfDoc.Close();
Marshal.ReleaseComObject(pdfPage);
Marshal.ReleaseComObject(pdfRect);
Marshal.ReleaseComObject(pdfDoc);
Marshal.ReleaseComObject(pdfPoint);
}
说明:如果是在新开的Thread中运行此代码,需要设置Thread的ApartmentState为ApartmentState.STA。