Qt 数字报阅读器

Qt 数字报阅读器,使用 MuPDF 库渲染 Python 下载的数字报 PDF 文件,闲的时候看下报纸,既能培养阅读的习惯,也能了解国内外的新闻及国家大事,还能有效利用时间,不把时间浪费在无用的事上。

Qt 数字报阅读器_第1张图片

Qt 数字报阅读器_第2张图片

epaper.py

#!/usr/bin/python3 
# coding: utf-8

from urllib import request
from urllib import error
from urllib.parse import quote
import re
import threading
import os
import sys
import string
import time
import datetime
import schedule


pdf_dir = "/home/pi/数字报/"
error_pdf_lst = []
epapers_done_dict = {}


def download_pdf(pdf_urls, file_dir, paper_name, date):
    pdf_lst = []
    count = 1
    for pdf_url in pdf_urls:
        pdf_file_dir = "%s%02d/" %(file_dir, count)
        pdf_file_name = "%s%s" %(pdf_file_dir, pdf_url.split("/")[-1])
        pdf = "%s %s" %(pdf_file_name, pdf_url)
        pdf_lst.append(pdf)
        count += 1
        if not os.path.isdir(pdf_file_dir):
            os.makedirs(pdf_file_dir)

        if os.path.isfile(pdf_file_name) and os.path.getsize(pdf_file_name) and (pdf not in error_pdf_lst):
            print("download %s OK, File Exists" %(pdf))
        else:
            print("start download %s" %(pdf))
            t = threading.Thread(target=my_thread, args=(pdf_file_name, pdf_url))
            t.start()
            t.join()

    flag = True 
    for pdf in pdf_lst:
        if pdf in error_pdf_lst:
            flag = False 
            break 

    if flag:
        global epapers_done_dict 
        if paper_name not in epapers_done_dict.get(date, []):
            epapers_done_dict.setdefault(date, []).append(paper_name)


def my_thread(pdf_file_name, pdf_url):
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
    req = request.Request(url=quote(pdf_url, safe=string.printable), headers=headers)
    pdf = "%s %s" %(pdf_file_name, pdf_url)
    global error_pdf_lst 
    try:
        total = 0
        with request.urlopen(req) as f:
            with open("%s" %(pdf_file_name), "wb") as f2:
                while True:
                    buff = f.read(1024 * 100)
                    if not buff:
                        break 

                    f2.write(buff)
                    total = total + 1024 * 100
                    sys.stdout.write("download %d KB\r" %(total / 1024))
                    sys.stdout.flush()
                    
        if pdf in error_pdf_lst:
            error_pdf_lst.remove(pdf)
        print("download %s OK [%d KB]" %(pdf, total / 1024))
    except error.HTTPError as e:
        if e.code == 404:
            print("download %s ERROR [404]" %(pdf))
    except Exception:
        if pdf not in error_pdf_lst:
            error_pdf_lst.append(pdf)
        print("download %s ERROR" %(pdf))


def get_rmrb(paper_name, date):
    index_url = "http://paper.people.com.cn/rmrb/html/%s-%s/%s/nbs.D110000renmrb_01.htm" %(date.split("-")[0], date.split("-")[1], date.split("-")[2])
    print("start get %s %s" %(paper_name, index_url))

    start_tag = "
" end_tag = "" pdf_urls = [] start = False headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} req = request.Request(url=index_url, headers=headers) try: with request.urlopen(req) as f: data = f.read().decode("utf-8", "ignore").split("\r\n") for line in data: if line.find(start_tag) != -1: start = True if start: if line.find(end_tag) != -1: break m = re.findall(r"/page(.+?)\.pdf>", line) if m: for match in m: pdf_url = "http://paper.people.com.cn/rmrb/page%s.pdf" %(match) pdf_urls.append(pdf_url) except: pass file_dir = "%s%s/PDF/%s/%s%s%s/" %(pdf_dir, paper_name, date.split("-")[0], date.split("-")[0], date.split("-")[1], date.split("-")[2]) if pdf_urls: print("get %s %s OK" %(paper_name, index_url)) download_pdf(pdf_urls, file_dir, paper_name, date) else: print("get %s %s ERROR" %(paper_name, index_url)) def get_zqcn(paper_name, date): index_url = "http://epaper.zqcn.com.cn/content/%s-%s/%s/node_2.htm" %(date.split("-")[0], date.split("-")[1], date.split("-")[2]) print("start get %s %s" %(paper_name, index_url)) start_tag = "" pdf_urls = [] headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} req = request.Request(url=index_url, headers=headers) try: with request.urlopen(req) as f: data = f.read().decode("utf-8", "ignore").split("\r\n") for line in data: if line.find(start_tag) != -1: m = re.findall(r"/attachement(.+?)\.pdf>", line) if m: for match in m: pdf_url = "http://epaper.zqcn.com.cn/attachement%s.pdf" %(match) pdf_urls.append(pdf_url) break except: pass file_dir = "%s%s/PDF/%s/%s%s%s/" %(pdf_dir, paper_name, date.split("-")[0], date.split("-")[0], date.split("-")[1], date.split("-")[2]) if pdf_urls: print("get %s %s OK" %(paper_name, index_url)) download_pdf(pdf_urls, file_dir, paper_name, date) else: print("get %s %s ERROR" %(paper_name, index_url)) def get_cdrb(paper_name, date): index_url = "http://www.cdrb.com.cn/epaper/cdrbpc/%s%s/%s/l01.html" %(date.split("-")[0], date.split("-")[1], date.split("-")[2]) print("start get %s %s" %(paper_name, index_url)) start_tag = "
" end_tag = "" urls = [] start = False headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'} req = request.Request(url=index_url, headers=headers) try: with request.urlopen(req) as f: data = f.read().decode("utf-8", "ignore").split("\r\n") for line in data: if line.find(start_tag) != -1: start = True if start: if line.find(end_tag) != -1: break m = re.findall(r"", line) if m: for match in m: url = "http://www.cdrb.com.cn/epaper/cdrbpc/%s%s/%s/%s.html" %(date.split("-")[0], date.split("-")[1], date.split("-")[2], match) urls.append(url) except: pass start_tag = "

" pdf_urls = [] for url in urls: req = request.Request(url=url, headers=headers) try: with request.urlopen(req) as f: data = f.read().decode("utf-8", "ignore").split("\r\n") for line in data: if line.find(start_tag) != -1: m = re.findall(r"

(.+?)\.pdf

", line) if m: for match in m: pdf_url = "%s.pdf" %(match) pdf_urls.append(pdf_url) break except: pass file_dir = "%s%s/PDF/%s/%s%s%s/" %(pdf_dir, paper_name, date.split("-")[0], date.split("-")[0], date.split("-")[1], date.split("-")[2]) if pdf_urls: print("get %s %s OK" %(paper_name, index_url)) download_pdf(pdf_urls, file_dir, paper_name, date) else: print("get %s %s ERROR" %(paper_name, index_url)) def check_status(): date = time.strftime("%Y-%m-%d", time.localtime()) with open("/home/pi/python_svn/logs_%s.txt" %(date), "a", encoding="utf-8") as f: date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) f.write("[%s] epapers_done_dict\n" %(date_time)) for key, value in epapers_done_dict.items(): f.write("%s %s\n" %(key, ", ".join(value))) f.write("[%s] error_pdf_lst\n" %(date_time)) for error in error_pdf_lst: f.write("%s\n" %(error)) def get_days_in_month(year, month): if month in (1, 3, 5, 7, 8, 10, 12): return 31 elif month in (4, 6, 9, 11): return 30 elif (((year % 4 == 0) and (year % 100 != 0)) or (year % 400 == 0)): return 29 else: return 28 def get_epapers_month(year, month): for day in range(1, get_days_in_month(year, month) + 1): date = "%s-%02d-%02d" %(year, month, day) for key, value in epapers.items(): if key not in epapers_done_dict.get(date, []): t = threading.Thread(target=value, args=(key, date)) t.start() t.join() if error_pdf_lst: for error in error_pdf_lst: print("---ERROR: %s" %(error)) else: print("---All Download OK") check_status() def get_epapers_today(): today = time.strftime("%Y-%m-%d", time.localtime()) for key, value in epapers.items(): if key not in epapers_done_dict.get(today, []): t = threading.Thread(target=value, args=(key, today)) t.start() t.join() if error_pdf_lst: for error in error_pdf_lst: print("---ERROR: %s" %(error)) else: print("---All Download OK") check_status() def get_epapers_date(date): for key, value in epapers.items(): if key not in epapers_done_dict.get(date, []): t = threading.Thread(target=value, args=(key, date)) t.start() t.join() if error_pdf_lst: for error in error_pdf_lst: print("---ERROR: %s" %(error)) else: print("---All Download OK") check_status() epapers = { "人民日报": get_rmrb, "中国企业报": get_zqcn, "成都日报": get_cdrb, } if __name__ == "__main__": background = True #background = False if background: time_lst = ["06:00", "10:00", "14:00", "18:00"] for get_time in time_lst: delta = datetime.timedelta(minutes=-5) dt = datetime.datetime.strptime(get_time, "%H:%M") + delta check_time = dt.strftime("%H:%M") schedule.every().day.at(check_time).do(check_status) schedule.every().day.at(get_time).do(get_epapers_today) while True: schedule.run_pending() time.sleep(1) else: #get_epapers_month(2019, 4) get_epapers_date("2019-04-30")

MuPDF 库需要自己编译 lib 文件,官网下载源码。

Index of Downloads
https://www.mupdf.com/downloads/index.html

最初下载的是 1.14 版本,编译出来了三个库文件,但在使用的时候出现“无法解析的外部符号”,猜测和 resources\fonts\noto 的字体文件有关,docs\examples 下的 example.c 存在语法错误,在 1.13 版本是可以的。

Qt 数字报阅读器_第3张图片

Qt 数字报阅读器_第4张图片 

Qt 数字报阅读器_第5张图片

下载 1.13 版本,VS 2013 打开 platform\win32 下的 mupdf.sln,会提示进行迁移,完成迁移后,按顺序生成三个库,1.13 版本不需要 bin2coff ,只需要把三个项目的代码生成改成“多线程 DLL (/MD)”,依次生成即可,不需要其他的操作。

Qt 数字报阅读器_第6张图片

Qt 数字报阅读器_第7张图片

在生成 libresources.lib 的时候,可以看到在 1.14 报 LINK2001 错误的 obj 是成功生成的。

Qt 数字报阅读器_第8张图片

生成三个库文件后,Qt 项目根目录新建 mupdf 文件夹,放入 lib 和 include 文件,pro 文件添加

INCLUDEPATH += $$PWD/mupdf

LIBS += -L$$PWD/mupdf -llibmupdf -llibresources -llibthirdparty

三个库文件必须同时加进来,执行qmake、重新构建,会出现如下告警,是正常的。

Qt 数字报阅读器_第9张图片

使用 include 文件时,只需要包含 pdf.h 即可,也不用加 extern "C" 。

Qt 数字报阅读器_第10张图片

Compiling MuPDF DLL to Render and Edit PDF Documents - CodeProject
https://www.codeproject.com/Articles/1190061/Compiling-MuPDF-DLL-to-Render-and-Edit-PDF-Documen

基于MuPDF和Qt的PDF阅读器的开发 - 简书
https://www.jianshu.com/p/d91b26a9ae05

基于mupdf的PDF阅读器 - 吸喵高手的小窝 - CSDN博客
https://blog.csdn.net/k_wang_/article/details/82870401

qt5编写pdf阅读器,怎么编译poppler啊? 环境是win7x64+Qt5.5.1+VS2013 - 知乎
https://www.zhihu.com/question/38594052

VS 2013 x64 的 lib 和 include 可到链接:https://pan.baidu.com/s/1b76f-n0-yFelDz8HQ-sVng 提取码:4wuk 下载。

mainwindow.cpp

#include "mainwindow.h"
#include "ui_mainwindow.h"
#include "pdfshowwidget.h"
#include 
#include 
#include 
#include 
#include 
#include 

MainWindow::MainWindow(QWidget *parent) :
    QMainWindow(parent),
    ui(new Ui::MainWindow)
{
    ui->setupUi(this);
    QFont font;
    font.setPixelSize(14);
    setFont(font);
    setWindowTitle(QStringLiteral("数字报阅读器"));

    mDirPath = QStringLiteral("Z:\\");

    ui->calendarWidget->setVerticalHeaderFormat(QCalendarWidget::NoVerticalHeader);
    ui->calendarWidget->setEnabled(false);
    ui->calendarWidget->setGridVisible(true);
    ui->calendarWidget->setDateEditEnabled(false);
    ui->calendarWidget->setFirstDayOfWeek(Qt::Sunday);

    ui->listWidget->setMinimumWidth(200);
    ui->listWidget->setMaximumWidth(200);

    QTimer::singleShot(0, this, SLOT(readEPapers()));
}

MainWindow::~MainWindow()
{
    delete ui;
}

void MainWindow::on_calendarWidget_clicked(const QDate &date)
{
    QTextCharFormat format = ui->calendarWidget->dateTextFormat(date);
    if (format.background().color() == Qt::green)
    {
        QString dateStr = date.toString("yyyyMMdd");
        QStringList detailPdfLst = getDetailPdf(mDetailMap.value(dateStr));
        if (!detailPdfLst.isEmpty())
        {
            openPdf(detailPdfLst);
        }
    }
}

void MainWindow::readEPapers()
{
    QDir dir(mDirPath);
    if (!dir.exists())
    {
        return;
    }

    dir.setFilter(QDir::Dirs | QDir::NoSymLinks | QDir::NoDotAndDotDot);
    dir.setSorting(QDir::Name);
    QFileInfoList fileInfoLst = dir.entryInfoList();
    if (fileInfoLst.isEmpty())
    {
        return;
    }

    foreach (QFileInfo fileInfo, fileInfoLst)
    {
        QListWidgetItem *item = new QListWidgetItem;
        item->setText(fileInfo.fileName());
        item->setData(Qt::UserRole, fileInfo.absoluteFilePath());
        ui->listWidget->addItem(item);
    }
}

void MainWindow::on_listWidget_clicked(const QModelIndex &index)
{
    Q_UNUSED(index);

    QString dirStr = ui->listWidget->currentItem()->data(Qt::UserRole).toString();
    QDir dir(QStringLiteral("%1%2PDF").arg(dirStr).arg(QDir::separator()));
    if (!dir.exists())
    {
        dir.setPath(QStringLiteral("%1%2JPG").arg(dirStr).arg(QDir::separator()));
        if (!dir.exists())
        {
            return;
        }
    }

    dir.setFilter(QDir::Dirs | QDir::NoSymLinks | QDir::NoDotAndDotDot);
    dir.setSorting(QDir::Name);
    QFileInfoList fileInfoLst = dir.entryInfoList();
    if (fileInfoLst.isEmpty())
    {
        return;
    }

    QStringList detailLst;
    foreach (QFileInfo fileInfo, fileInfoLst)
    {
        QStringList lst = getDetailDate(fileInfo.absoluteFilePath());
        if (!lst.isEmpty())
        {
            detailLst.append(lst);
        }
    }

    if (detailLst.isEmpty())
    {
        return;
    }
    else
    {
        QString minDateStr = QDir::toNativeSeparators(detailLst.first()).split(QDir::separator()).last();
        QString maxDateStr = QDir::toNativeSeparators(detailLst.last()).split(QDir::separator()).last();
        QDate minDate = QDate::fromString(minDateStr, "yyyyMMdd");
        QDate maxDate = QDate::fromString(maxDateStr, "yyyyMMdd");

        ui->calendarWidget->setEnabled(true);
        ui->calendarWidget->setDateRange(minDate, maxDate);
        ui->calendarWidget->setSelectedDate(maxDate.addDays(-1));

        for (QDate date = minDate; date <= maxDate;)
        {
            QTextCharFormat format;
            format.setBackground(Qt::gray);
            ui->calendarWidget->setDateTextFormat(date, format);
            date = date.addDays(1);
        }

        mDetailMap.clear();
        foreach (QString detail, detailLst)
        {
            QString dateStr = QDir::toNativeSeparators(detail).split(QDir::separator()).last();
            QDate date = QDate::fromString(dateStr, "yyyyMMdd");
            QTextCharFormat format;
            format.setBackground(Qt::green);
            ui->calendarWidget->setDateTextFormat(date, format);
            mDetailMap.insert(dateStr, detail);
        }
    }
}

QStringList MainWindow::getDetailDate(const QString &dirStr)
{
    QDir dir(dirStr);
    if (!dir.exists())
    {
        return QStringList();
    }

    dir.setFilter(QDir::Dirs | QDir::NoSymLinks | QDir::NoDotAndDotDot);
    dir.setSorting(QDir::Name);
    QFileInfoList fileInfoLst = dir.entryInfoList();
    if (fileInfoLst.isEmpty())
    {
        return QStringList();
    }

    QStringList lst;
    foreach (QFileInfo fileInfo, fileInfoLst)
    {
        lst.append(fileInfo.absoluteFilePath());
    }

    return lst;
}

QStringList MainWindow::getDetailPdf(const QString &dirStr)
{
    return getDetailDate(dirStr);
}

void MainWindow::openPdf(QStringList pdfLst)
{
    QStringList fileLst;
    foreach (QString pdf, pdfLst)
    {
        QDir dir(pdf);
        if (!dir.exists())
        {
            continue;
        }

        dir.setFilter(QDir::Files);
        dir.setSorting(QDir::Name);
        QFileInfoList fileInfoLst = dir.entryInfoList();
        if (fileInfoLst.isEmpty())
        {
            continue;
        }

        foreach (QFileInfo fileInfo, fileInfoLst)
        {
            fileLst.append(fileInfo.absoluteFilePath());
        }
    }

    QStringList tmpLst = mDirPath.split("\\");
    int count = 0;
    foreach (QString tmp, tmpLst)
    {
        if (!tmp.isEmpty())
        {
            count++;
        }
    }

    QStringList lst = QDir::toNativeSeparators(fileLst.first()).split(QDir::separator());
    QString paperName = lst[0 + count];
    QString paperDate = lst[3 + count];
    QString week;
    switch (QDate::fromString(paperDate, "yyyyMMdd").dayOfWeek())
    {
    case Qt::Monday:
        week = QStringLiteral("星期一");
        break;
    case Qt::Tuesday:
        week = QStringLiteral("星期二");
        break;
    case Qt::Wednesday:
        week = QStringLiteral("星期三");
        break;
    case Qt::Thursday:
        week = QStringLiteral("星期四");
        break;
    case Qt::Friday:
        week = QStringLiteral("星期五");
        break;
    case Qt::Saturday:
        week = QStringLiteral("星期六");
        break;
    case Qt::Sunday:
        week = QStringLiteral("星期日");
        break;
    }

    QString title = QStringLiteral("%1 %2 %3 今日%4版").arg(paperName).arg(QDate::fromString(paperDate, "yyyyMMdd").toString(QStringLiteral("yyyy年M月d日"))).arg(week).arg(fileLst.size());

    PdfShowWidget *widget = new PdfShowWidget(title, fileLst);
    widget->showMaximized();
}

pdfshowwidget.cpp

#include "pdfshowwidget.h"
#include 
#include 
#include 
#include 
#include 
#include 
#include 

PdfShowWidget::PdfShowWidget(const QString &title, const QStringList &fileLst)
{
    QFont font;
    font.setPixelSize(14);
    setFont(font);
    setWindowTitle(title);
    setAttribute(Qt::WA_DeleteOnClose);

    mPdfFileLst = fileLst;

    mScrollArea = new QScrollArea(this);
    mScrollArea->setFrameShape(QFrame::NoFrame);
    mScrollArea->setWidgetResizable(true);

    QGridLayout *mainLayout = new QGridLayout(this);
    mainLayout->addWidget(mScrollArea);
    setLayout(mainLayout);

    QTimer::singleShot(0, this, SLOT(showpdf()));
}

PdfShowWidget::~PdfShowWidget()
{

}

void PdfShowWidget::showpdf()
{
    QProgressDialog progress(this);
    progress.setFont(this->font());
    progress.setWindowTitle(QStringLiteral("数字报阅读器"));
    progress.setWindowFlags(windowFlags() & (~Qt::WindowContextHelpButtonHint) & (~Qt::WindowMinMaxButtonsHint) & (~Qt::WindowCloseButtonHint));
    progress.setLabelText(QStringLiteral("正在处理,共%1版").arg(mPdfFileLst.size()));
    progress.setRange(0, mPdfFileLst.size() - 1);
    progress.setModal(true);
    progress.setCancelButtonText(QStringLiteral("取消"));
    progress.setMinimumDuration(0);
    connect(&progress, SIGNAL(canceled()), this, SLOT(progressCanceled()));
    int count = 0;

    QWidget *widget = new QWidget(this);
    QVBoxLayout *layout = new QVBoxLayout(widget);

    foreach (QString pdf, mPdfFileLst)
    {
        if ((!pdf.endsWith(".pdf")) && (pdf.endsWith("jpg") || pdf.endsWith("png")))
        {
            progress.setValue(count++);
            qApp->processEvents(QEventLoop::ExcludeUserInputEvents);

            QImage image(pdf);
            QLabel *label = new QLabel(this);
            QPixmap pixmap = QPixmap::fromImage(image);
            QPixmap fitPixmap = pixmap.scaled(this->width() - 67, this->height(), Qt::KeepAspectRatioByExpanding, Qt::SmoothTransformation);
            label->setPixmap(fitPixmap);
            layout->addWidget(label);
        }
        else if (pdf.endsWith(".pdf"))
        {
            progress.setValue(count++);
            qApp->processEvents(QEventLoop::ExcludeUserInputEvents);

            int page_count;
            fz_context *ctx;
            fz_document *doc;
            fz_pixmap *pix;
            fz_matrix ctm;

            ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
            if (!ctx)
            {
                continue;
            }

            fz_try(ctx)
                    fz_register_document_handlers(ctx);
            fz_catch(ctx)
            {
                fz_drop_context(ctx);
                continue;
            }

            fz_try(ctx)
                    doc = fz_open_document(ctx, pdf.toStdString().c_str());
            fz_catch(ctx)
            {
                fz_drop_context(ctx);
                continue;
            }

            fz_try(ctx)
                    page_count = fz_count_pages(ctx, doc);
            fz_catch(ctx)
            {
                fz_drop_document(ctx, doc);
                fz_drop_context(ctx);
                continue;
            }

            fz_scale(&ctm, 1, 1);
            fz_pre_rotate(&ctm, 0);

            for (int i = 0; i < page_count; ++i)
            {
                fz_try(ctx)
                        pix = fz_new_pixmap_from_page_number(ctx, doc, i, &ctm, fz_device_rgb(ctx), 0);
                fz_catch(ctx)
                {
                    continue;
                }

                unsigned char *samples = pix->samples;
                int width = fz_pixmap_width(ctx, pix);
                int height = fz_pixmap_height(ctx, pix);
                QImage image(samples, width, height, pix->stride, QImage::Format_RGB888);
                QLabel *label = new QLabel(this);
                QPixmap pixmap = QPixmap::fromImage(image);
                QPixmap fitPixmap = pixmap.scaled(this->width() - 67, this->height(), Qt::KeepAspectRatioByExpanding, Qt::SmoothTransformation);
                label->setPixmap(fitPixmap);
                layout->addWidget(label);

                fz_drop_pixmap(ctx, pix);
            }

            fz_drop_document(ctx, doc);
            fz_drop_context(ctx);
        }
    }

    widget->setLayout(layout);
    mScrollArea->setWidget(widget);
}

void PdfShowWidget::progressCanceled()
{
    // dummy
}

 

你可能感兴趣的:(Qt,Python)