Qt 数字报阅读器,使用 MuPDF 库渲染 Python 下载的数字报 PDF 文件,闲的时候看下报纸,既能培养阅读的习惯,也能了解国内外的新闻及国家大事,还能有效利用时间,不把时间浪费在无用的事上。
epaper.py
#!/usr/bin/python3
# coding: utf-8
from urllib import request
from urllib import error
from urllib.parse import quote
import re
import threading
import os
import sys
import string
import time
import datetime
import schedule
pdf_dir = "/home/pi/数字报/"
error_pdf_lst = []
epapers_done_dict = {}
def download_pdf(pdf_urls, file_dir, paper_name, date):
pdf_lst = []
count = 1
for pdf_url in pdf_urls:
pdf_file_dir = "%s%02d/" %(file_dir, count)
pdf_file_name = "%s%s" %(pdf_file_dir, pdf_url.split("/")[-1])
pdf = "%s %s" %(pdf_file_name, pdf_url)
pdf_lst.append(pdf)
count += 1
if not os.path.isdir(pdf_file_dir):
os.makedirs(pdf_file_dir)
if os.path.isfile(pdf_file_name) and os.path.getsize(pdf_file_name) and (pdf not in error_pdf_lst):
print("download %s OK, File Exists" %(pdf))
else:
print("start download %s" %(pdf))
t = threading.Thread(target=my_thread, args=(pdf_file_name, pdf_url))
t.start()
t.join()
flag = True
for pdf in pdf_lst:
if pdf in error_pdf_lst:
flag = False
break
if flag:
global epapers_done_dict
if paper_name not in epapers_done_dict.get(date, []):
epapers_done_dict.setdefault(date, []).append(paper_name)
def my_thread(pdf_file_name, pdf_url):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
req = request.Request(url=quote(pdf_url, safe=string.printable), headers=headers)
pdf = "%s %s" %(pdf_file_name, pdf_url)
global error_pdf_lst
try:
total = 0
with request.urlopen(req) as f:
with open("%s" %(pdf_file_name), "wb") as f2:
while True:
buff = f.read(1024 * 100)
if not buff:
break
f2.write(buff)
total = total + 1024 * 100
sys.stdout.write("download %d KB\r" %(total / 1024))
sys.stdout.flush()
if pdf in error_pdf_lst:
error_pdf_lst.remove(pdf)
print("download %s OK [%d KB]" %(pdf, total / 1024))
except error.HTTPError as e:
if e.code == 404:
print("download %s ERROR [404]" %(pdf))
except Exception:
if pdf not in error_pdf_lst:
error_pdf_lst.append(pdf)
print("download %s ERROR" %(pdf))
def get_rmrb(paper_name, date):
index_url = "http://paper.people.com.cn/rmrb/html/%s-%s/%s/nbs.D110000renmrb_01.htm" %(date.split("-")[0], date.split("-")[1], date.split("-")[2])
print("start get %s %s" %(paper_name, index_url))
start_tag = ""
end_tag = ""
pdf_urls = []
start = False
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
req = request.Request(url=index_url, headers=headers)
try:
with request.urlopen(req) as f:
data = f.read().decode("utf-8", "ignore").split("\r\n")
for line in data:
if line.find(start_tag) != -1:
start = True
if start:
if line.find(end_tag) != -1:
break
m = re.findall(r"/page(.+?)\.pdf>", line)
if m:
for match in m:
pdf_url = "http://paper.people.com.cn/rmrb/page%s.pdf" %(match)
pdf_urls.append(pdf_url)
except:
pass
file_dir = "%s%s/PDF/%s/%s%s%s/" %(pdf_dir, paper_name, date.split("-")[0], date.split("-")[0], date.split("-")[1], date.split("-")[2])
if pdf_urls:
print("get %s %s OK" %(paper_name, index_url))
download_pdf(pdf_urls, file_dir, paper_name, date)
else:
print("get %s %s ERROR" %(paper_name, index_url))
def get_zqcn(paper_name, date):
index_url = "http://epaper.zqcn.com.cn/content/%s-%s/%s/node_2.htm" %(date.split("-")[0], date.split("-")[1], date.split("-")[2])
print("start get %s %s" %(paper_name, index_url))
start_tag = ""
pdf_urls = []
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
req = request.Request(url=index_url, headers=headers)
try:
with request.urlopen(req) as f:
data = f.read().decode("utf-8", "ignore").split("\r\n")
for line in data:
if line.find(start_tag) != -1:
m = re.findall(r"/attachement(.+?)\.pdf>", line)
if m:
for match in m:
pdf_url = "http://epaper.zqcn.com.cn/attachement%s.pdf" %(match)
pdf_urls.append(pdf_url)
break
except:
pass
file_dir = "%s%s/PDF/%s/%s%s%s/" %(pdf_dir, paper_name, date.split("-")[0], date.split("-")[0], date.split("-")[1], date.split("-")[2])
if pdf_urls:
print("get %s %s OK" %(paper_name, index_url))
download_pdf(pdf_urls, file_dir, paper_name, date)
else:
print("get %s %s ERROR" %(paper_name, index_url))
def get_cdrb(paper_name, date):
index_url = "http://www.cdrb.com.cn/epaper/cdrbpc/%s%s/%s/l01.html" %(date.split("-")[0], date.split("-")[1], date.split("-")[2])
print("start get %s %s" %(paper_name, index_url))
start_tag = ""
end_tag = ""
urls = []
start = False
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
req = request.Request(url=index_url, headers=headers)
try:
with request.urlopen(req) as f:
data = f.read().decode("utf-8", "ignore").split("\r\n")
for line in data:
if line.find(start_tag) != -1:
start = True
if start:
if line.find(end_tag) != -1:
break
m = re.findall(r"", line)
if m:
for match in m:
url = "http://www.cdrb.com.cn/epaper/cdrbpc/%s%s/%s/%s.html" %(date.split("-")[0], date.split("-")[1], date.split("-")[2], match)
urls.append(url)
except:
pass
start_tag = "MuPDF 库需要自己编译 lib 文件,官网下载源码。
Index of Downloads
https://www.mupdf.com/downloads/index.html
最初下载的是 1.14 版本,编译出来了三个库文件,但在使用的时候出现“无法解析的外部符号”,猜测和 resources\fonts\noto 的字体文件有关,docs\examples 下的 example.c 存在语法错误,在 1.13 版本是可以的。
下载 1.13 版本,VS 2013 打开 platform\win32 下的 mupdf.sln,会提示进行迁移,完成迁移后,按顺序生成三个库,1.13 版本不需要 bin2coff ,只需要把三个项目的代码生成改成“多线程 DLL (/MD)”,依次生成即可,不需要其他的操作。
在生成 libresources.lib 的时候,可以看到在 1.14 报 LINK2001 错误的 obj 是成功生成的。
生成三个库文件后,Qt 项目根目录新建 mupdf 文件夹,放入 lib 和 include 文件,pro 文件添加
INCLUDEPATH += $$PWD/mupdf
LIBS += -L$$PWD/mupdf -llibmupdf -llibresources -llibthirdparty
三个库文件必须同时加进来,执行qmake、重新构建,会出现如下告警,是正常的。
使用 include 文件时,只需要包含 pdf.h 即可,也不用加 extern "C" 。
Compiling MuPDF DLL to Render and Edit PDF Documents - CodeProject
https://www.codeproject.com/Articles/1190061/Compiling-MuPDF-DLL-to-Render-and-Edit-PDF-Documen
基于MuPDF和Qt的PDF阅读器的开发 - 简书
https://www.jianshu.com/p/d91b26a9ae05
基于mupdf的PDF阅读器 - 吸喵高手的小窝 - CSDN博客
https://blog.csdn.net/k_wang_/article/details/82870401
qt5编写pdf阅读器,怎么编译poppler啊? 环境是win7x64+Qt5.5.1+VS2013 - 知乎
https://www.zhihu.com/question/38594052
VS 2013 x64 的 lib 和 include 可到链接:https://pan.baidu.com/s/1b76f-n0-yFelDz8HQ-sVng 提取码:4wuk 下载。
mainwindow.cpp
#include "mainwindow.h"
#include "ui_mainwindow.h"
#include "pdfshowwidget.h"
#include
#include
#include
#include
#include
#include
MainWindow::MainWindow(QWidget *parent) :
QMainWindow(parent),
ui(new Ui::MainWindow)
{
ui->setupUi(this);
QFont font;
font.setPixelSize(14);
setFont(font);
setWindowTitle(QStringLiteral("数字报阅读器"));
mDirPath = QStringLiteral("Z:\\");
ui->calendarWidget->setVerticalHeaderFormat(QCalendarWidget::NoVerticalHeader);
ui->calendarWidget->setEnabled(false);
ui->calendarWidget->setGridVisible(true);
ui->calendarWidget->setDateEditEnabled(false);
ui->calendarWidget->setFirstDayOfWeek(Qt::Sunday);
ui->listWidget->setMinimumWidth(200);
ui->listWidget->setMaximumWidth(200);
QTimer::singleShot(0, this, SLOT(readEPapers()));
}
MainWindow::~MainWindow()
{
delete ui;
}
void MainWindow::on_calendarWidget_clicked(const QDate &date)
{
QTextCharFormat format = ui->calendarWidget->dateTextFormat(date);
if (format.background().color() == Qt::green)
{
QString dateStr = date.toString("yyyyMMdd");
QStringList detailPdfLst = getDetailPdf(mDetailMap.value(dateStr));
if (!detailPdfLst.isEmpty())
{
openPdf(detailPdfLst);
}
}
}
void MainWindow::readEPapers()
{
QDir dir(mDirPath);
if (!dir.exists())
{
return;
}
dir.setFilter(QDir::Dirs | QDir::NoSymLinks | QDir::NoDotAndDotDot);
dir.setSorting(QDir::Name);
QFileInfoList fileInfoLst = dir.entryInfoList();
if (fileInfoLst.isEmpty())
{
return;
}
foreach (QFileInfo fileInfo, fileInfoLst)
{
QListWidgetItem *item = new QListWidgetItem;
item->setText(fileInfo.fileName());
item->setData(Qt::UserRole, fileInfo.absoluteFilePath());
ui->listWidget->addItem(item);
}
}
void MainWindow::on_listWidget_clicked(const QModelIndex &index)
{
Q_UNUSED(index);
QString dirStr = ui->listWidget->currentItem()->data(Qt::UserRole).toString();
QDir dir(QStringLiteral("%1%2PDF").arg(dirStr).arg(QDir::separator()));
if (!dir.exists())
{
dir.setPath(QStringLiteral("%1%2JPG").arg(dirStr).arg(QDir::separator()));
if (!dir.exists())
{
return;
}
}
dir.setFilter(QDir::Dirs | QDir::NoSymLinks | QDir::NoDotAndDotDot);
dir.setSorting(QDir::Name);
QFileInfoList fileInfoLst = dir.entryInfoList();
if (fileInfoLst.isEmpty())
{
return;
}
QStringList detailLst;
foreach (QFileInfo fileInfo, fileInfoLst)
{
QStringList lst = getDetailDate(fileInfo.absoluteFilePath());
if (!lst.isEmpty())
{
detailLst.append(lst);
}
}
if (detailLst.isEmpty())
{
return;
}
else
{
QString minDateStr = QDir::toNativeSeparators(detailLst.first()).split(QDir::separator()).last();
QString maxDateStr = QDir::toNativeSeparators(detailLst.last()).split(QDir::separator()).last();
QDate minDate = QDate::fromString(minDateStr, "yyyyMMdd");
QDate maxDate = QDate::fromString(maxDateStr, "yyyyMMdd");
ui->calendarWidget->setEnabled(true);
ui->calendarWidget->setDateRange(minDate, maxDate);
ui->calendarWidget->setSelectedDate(maxDate.addDays(-1));
for (QDate date = minDate; date <= maxDate;)
{
QTextCharFormat format;
format.setBackground(Qt::gray);
ui->calendarWidget->setDateTextFormat(date, format);
date = date.addDays(1);
}
mDetailMap.clear();
foreach (QString detail, detailLst)
{
QString dateStr = QDir::toNativeSeparators(detail).split(QDir::separator()).last();
QDate date = QDate::fromString(dateStr, "yyyyMMdd");
QTextCharFormat format;
format.setBackground(Qt::green);
ui->calendarWidget->setDateTextFormat(date, format);
mDetailMap.insert(dateStr, detail);
}
}
}
QStringList MainWindow::getDetailDate(const QString &dirStr)
{
QDir dir(dirStr);
if (!dir.exists())
{
return QStringList();
}
dir.setFilter(QDir::Dirs | QDir::NoSymLinks | QDir::NoDotAndDotDot);
dir.setSorting(QDir::Name);
QFileInfoList fileInfoLst = dir.entryInfoList();
if (fileInfoLst.isEmpty())
{
return QStringList();
}
QStringList lst;
foreach (QFileInfo fileInfo, fileInfoLst)
{
lst.append(fileInfo.absoluteFilePath());
}
return lst;
}
QStringList MainWindow::getDetailPdf(const QString &dirStr)
{
return getDetailDate(dirStr);
}
void MainWindow::openPdf(QStringList pdfLst)
{
QStringList fileLst;
foreach (QString pdf, pdfLst)
{
QDir dir(pdf);
if (!dir.exists())
{
continue;
}
dir.setFilter(QDir::Files);
dir.setSorting(QDir::Name);
QFileInfoList fileInfoLst = dir.entryInfoList();
if (fileInfoLst.isEmpty())
{
continue;
}
foreach (QFileInfo fileInfo, fileInfoLst)
{
fileLst.append(fileInfo.absoluteFilePath());
}
}
QStringList tmpLst = mDirPath.split("\\");
int count = 0;
foreach (QString tmp, tmpLst)
{
if (!tmp.isEmpty())
{
count++;
}
}
QStringList lst = QDir::toNativeSeparators(fileLst.first()).split(QDir::separator());
QString paperName = lst[0 + count];
QString paperDate = lst[3 + count];
QString week;
switch (QDate::fromString(paperDate, "yyyyMMdd").dayOfWeek())
{
case Qt::Monday:
week = QStringLiteral("星期一");
break;
case Qt::Tuesday:
week = QStringLiteral("星期二");
break;
case Qt::Wednesday:
week = QStringLiteral("星期三");
break;
case Qt::Thursday:
week = QStringLiteral("星期四");
break;
case Qt::Friday:
week = QStringLiteral("星期五");
break;
case Qt::Saturday:
week = QStringLiteral("星期六");
break;
case Qt::Sunday:
week = QStringLiteral("星期日");
break;
}
QString title = QStringLiteral("%1 %2 %3 今日%4版").arg(paperName).arg(QDate::fromString(paperDate, "yyyyMMdd").toString(QStringLiteral("yyyy年M月d日"))).arg(week).arg(fileLst.size());
PdfShowWidget *widget = new PdfShowWidget(title, fileLst);
widget->showMaximized();
}
pdfshowwidget.cpp
#include "pdfshowwidget.h"
#include
#include
#include
#include
#include
#include
#include
PdfShowWidget::PdfShowWidget(const QString &title, const QStringList &fileLst)
{
QFont font;
font.setPixelSize(14);
setFont(font);
setWindowTitle(title);
setAttribute(Qt::WA_DeleteOnClose);
mPdfFileLst = fileLst;
mScrollArea = new QScrollArea(this);
mScrollArea->setFrameShape(QFrame::NoFrame);
mScrollArea->setWidgetResizable(true);
QGridLayout *mainLayout = new QGridLayout(this);
mainLayout->addWidget(mScrollArea);
setLayout(mainLayout);
QTimer::singleShot(0, this, SLOT(showpdf()));
}
PdfShowWidget::~PdfShowWidget()
{
}
void PdfShowWidget::showpdf()
{
QProgressDialog progress(this);
progress.setFont(this->font());
progress.setWindowTitle(QStringLiteral("数字报阅读器"));
progress.setWindowFlags(windowFlags() & (~Qt::WindowContextHelpButtonHint) & (~Qt::WindowMinMaxButtonsHint) & (~Qt::WindowCloseButtonHint));
progress.setLabelText(QStringLiteral("正在处理,共%1版").arg(mPdfFileLst.size()));
progress.setRange(0, mPdfFileLst.size() - 1);
progress.setModal(true);
progress.setCancelButtonText(QStringLiteral("取消"));
progress.setMinimumDuration(0);
connect(&progress, SIGNAL(canceled()), this, SLOT(progressCanceled()));
int count = 0;
QWidget *widget = new QWidget(this);
QVBoxLayout *layout = new QVBoxLayout(widget);
foreach (QString pdf, mPdfFileLst)
{
if ((!pdf.endsWith(".pdf")) && (pdf.endsWith("jpg") || pdf.endsWith("png")))
{
progress.setValue(count++);
qApp->processEvents(QEventLoop::ExcludeUserInputEvents);
QImage image(pdf);
QLabel *label = new QLabel(this);
QPixmap pixmap = QPixmap::fromImage(image);
QPixmap fitPixmap = pixmap.scaled(this->width() - 67, this->height(), Qt::KeepAspectRatioByExpanding, Qt::SmoothTransformation);
label->setPixmap(fitPixmap);
layout->addWidget(label);
}
else if (pdf.endsWith(".pdf"))
{
progress.setValue(count++);
qApp->processEvents(QEventLoop::ExcludeUserInputEvents);
int page_count;
fz_context *ctx;
fz_document *doc;
fz_pixmap *pix;
fz_matrix ctm;
ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED);
if (!ctx)
{
continue;
}
fz_try(ctx)
fz_register_document_handlers(ctx);
fz_catch(ctx)
{
fz_drop_context(ctx);
continue;
}
fz_try(ctx)
doc = fz_open_document(ctx, pdf.toStdString().c_str());
fz_catch(ctx)
{
fz_drop_context(ctx);
continue;
}
fz_try(ctx)
page_count = fz_count_pages(ctx, doc);
fz_catch(ctx)
{
fz_drop_document(ctx, doc);
fz_drop_context(ctx);
continue;
}
fz_scale(&ctm, 1, 1);
fz_pre_rotate(&ctm, 0);
for (int i = 0; i < page_count; ++i)
{
fz_try(ctx)
pix = fz_new_pixmap_from_page_number(ctx, doc, i, &ctm, fz_device_rgb(ctx), 0);
fz_catch(ctx)
{
continue;
}
unsigned char *samples = pix->samples;
int width = fz_pixmap_width(ctx, pix);
int height = fz_pixmap_height(ctx, pix);
QImage image(samples, width, height, pix->stride, QImage::Format_RGB888);
QLabel *label = new QLabel(this);
QPixmap pixmap = QPixmap::fromImage(image);
QPixmap fitPixmap = pixmap.scaled(this->width() - 67, this->height(), Qt::KeepAspectRatioByExpanding, Qt::SmoothTransformation);
label->setPixmap(fitPixmap);
layout->addWidget(label);
fz_drop_pixmap(ctx, pix);
}
fz_drop_document(ctx, doc);
fz_drop_context(ctx);
}
}
widget->setLayout(layout);
mScrollArea->setWidget(widget);
}
void PdfShowWidget::progressCanceled()
{
// dummy
}
", line)
if m:
for match in m:
pdf_url = "%s.pdf" %(match)
pdf_urls.append(pdf_url)
break
except:
pass
file_dir = "%s%s/PDF/%s/%s%s%s/" %(pdf_dir, paper_name, date.split("-")[0], date.split("-")[0], date.split("-")[1], date.split("-")[2])
if pdf_urls:
print("get %s %s OK" %(paper_name, index_url))
download_pdf(pdf_urls, file_dir, paper_name, date)
else:
print("get %s %s ERROR" %(paper_name, index_url))
def check_status():
date = time.strftime("%Y-%m-%d", time.localtime())
with open("/home/pi/python_svn/logs_%s.txt" %(date), "a", encoding="utf-8") as f:
date_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
f.write("[%s] epapers_done_dict\n" %(date_time))
for key, value in epapers_done_dict.items():
f.write("%s %s\n" %(key, ", ".join(value)))
f.write("[%s] error_pdf_lst\n" %(date_time))
for error in error_pdf_lst:
f.write("%s\n" %(error))
def get_days_in_month(year, month):
if month in (1, 3, 5, 7, 8, 10, 12):
return 31
elif month in (4, 6, 9, 11):
return 30
elif (((year % 4 == 0) and (year % 100 != 0)) or (year % 400 == 0)):
return 29
else:
return 28
def get_epapers_month(year, month):
for day in range(1, get_days_in_month(year, month) + 1):
date = "%s-%02d-%02d" %(year, month, day)
for key, value in epapers.items():
if key not in epapers_done_dict.get(date, []):
t = threading.Thread(target=value, args=(key, date))
t.start()
t.join()
if error_pdf_lst:
for error in error_pdf_lst:
print("---ERROR: %s" %(error))
else:
print("---All Download OK")
check_status()
def get_epapers_today():
today = time.strftime("%Y-%m-%d", time.localtime())
for key, value in epapers.items():
if key not in epapers_done_dict.get(today, []):
t = threading.Thread(target=value, args=(key, today))
t.start()
t.join()
if error_pdf_lst:
for error in error_pdf_lst:
print("---ERROR: %s" %(error))
else:
print("---All Download OK")
check_status()
def get_epapers_date(date):
for key, value in epapers.items():
if key not in epapers_done_dict.get(date, []):
t = threading.Thread(target=value, args=(key, date))
t.start()
t.join()
if error_pdf_lst:
for error in error_pdf_lst:
print("---ERROR: %s" %(error))
else:
print("---All Download OK")
check_status()
epapers = {
"人民日报": get_rmrb, "中国企业报": get_zqcn, "成都日报": get_cdrb,
}
if __name__ == "__main__":
background = True
#background = False
if background:
time_lst = ["06:00", "10:00", "14:00", "18:00"]
for get_time in time_lst:
delta = datetime.timedelta(minutes=-5)
dt = datetime.datetime.strptime(get_time, "%H:%M") + delta
check_time = dt.strftime("%H:%M")
schedule.every().day.at(check_time).do(check_status)
schedule.every().day.at(get_time).do(get_epapers_today)
while True:
schedule.run_pending()
time.sleep(1)
else:
#get_epapers_month(2019, 4)
get_epapers_date("2019-04-30")
你可能感兴趣的:(Qt,Python)