c# PdfSharp 判断PDF某页是否存在图片和文本内容

使用PDFSharp 判断某一页PDF文件 是否有图片,是否有文本内容,我使用的是c# pdfsharp 版本:1.50.5147

private void Button_Click(object sender, RoutedEventArgs e)
        {
        //this.tb_html.Text.Trim()  是获取pdf本地路径
            using (PdfDocument document = PdfReader.Open(this.tb_html.Text.Trim(), PdfDocumentOpenMode.Modify))
            {
                foreach (PdfPage page in document.Pages)
                {
                    try
                    {
                        bool isImg = false;
                        // Get resources dictionary
                        PdfDictionary resources = page.Elements.GetDictionary("/Resources");
                        if (resources != null)
                        {
                            // Get external objects dictionary
                            PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");
                            if (xObjects != null)
                            {
                                ICollection items = xObjects.Elements.Values;

                                // Iterate references to external objects
                                foreach (PdfItem item in items)
                                {
                                    PdfReference reference = item as PdfReference;
                                    if (reference != null)
                                    {
                                        PdfDictionary xObject = reference.Value as PdfDictionary;
                                        // Is external object an image?
                                        if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
                                        {

                                            //TODO 有图片
                                            isImg = true;
                                            break;
                                            // do something with your image here 
                                             only the first image is handled here
                                            //var bitmap = ExportImage(xObject);
                                            //bmp.Save(@"c:\temp\exported.png", System.Drawing.Imaging.ImageFormat.Bmp);
                                        }
                                    }
                                }
                            }
                        }
                        // 解析页面内容
                        CObject content = ContentReader.ReadContent(page);
                        var text = ExtractText(content)?.ToList();
                        Debug.WriteLine($"当前页是否存在文本内容{text?.Count > 0}#是否存在图片{isImg}");

                    }
                    catch (Exception ex)
                    {
                    }
                }
            }
        }

你可能感兴趣的:(c#,pdf,javascript)