QWebKit-QWebElement

实现读取页面功能,用到QWebElement

头文件:

#ifndef ACCESSWEATHER_H
#define ACCESSWEATHER_H

#include <QObject>

class QNetworkReply;
class QNetworkRequest;
class QNetworkAccessManager;

class AccessWeather : public QObject
{
	Q_OBJECT

signals:
	void sgnFinished();
public:
	AccessWeather(QObject *parent =0);
	~AccessWeather();

	QString getWeather(const QString& city);

private slots:
	void slotFinished ( QNetworkReply * );
private:
	void populateRequest(const QString& city, QNetworkRequest* req);
	QString parseWeather(const QString& htmlData);
	void saveToFile(const QString& strData, const QString& filename);
private:
	QNetworkAccessManager *m_accessWeather;
	QString m_curWeather;
};

#endif // ACCESSWEATHER_H


源文件:

//made by davidsu33
//2014-10-12 22:54

#include "accessweather.h"

#include <QDebug>
#include <QEventLoop>
#include <QNetworkReply>
#include <QNetworkRequest>
#include <QNetworkAccessManager>
#include <QWebElement>
#include <QWebFrame>
#include <QWebPage>
#include <QFile>
#include <QPixmap>

#include <boost/config.hpp>
#include <boost/property_tree/ptree.hpp>
#include <boost/property_tree/xml_parser.hpp>
#include <boost/typeof/typeof.hpp>
#include <boost/foreach.hpp>

#include <sstream>

namespace
{
	//活动指数
	struct ActiveIndex
	{
		//程度
		QString m_degree;

		//建议
		QString m_advices;
	};

	//天气提示
	struct WeatherTooltip
	{
		//PM2.5
		QString m_pm25;

		//晨练指数
		ActiveIndex m_MorningExerciseIndex;

		//洗车指数
		ActiveIndex m_WashingCarIndex;

		//旅游指数
		ActiveIndex m_tourismIndex;

	};

	struct SimpleWeatherInfo
	{
		QString m_desc;
		QString m_temperature;
		QString m_windForce;
		QString m_weather;
		QPixmap m_temperatureImg;
	};

	//天气信息
	struct WeatherInfo
	{
		~WeatherInfo()
		{
			qDeleteAll(m_weekInfo);
		}

		QString m_title;
		QString m_time;
		QPixmap m_tqImg;
		QString m_temperature;
		QString m_cdRed;
		QString m_windForce;
		QString m_city;
		WeatherTooltip m_tooltips;
		QVector<SimpleWeatherInfo*> m_weekInfo;
	};

	//下载图片(同步)
	void downloadImg(const QString& imgURL, QPixmap &pix)
	{
		QScopedPointer<QNetworkAccessManager> accessNetIcon(new QNetworkAccessManager);
		QNetworkReply * reply = accessNetIcon->get(QNetworkRequest(QUrl(imgURL)));
		QEventLoop loop;
		QObject::connect(reply, SIGNAL(finished ()), &loop, SLOT(quit()));
		loop.exec();

		pix.loadFromData(reply->readAll());
	}

	//提取今日天气信息
	void pickTodayWeather(const QWebElement &e, WeatherInfo& wi)
	{
		wi.m_title = e.findFirst("h3").toPlainText();
		QWebElement elemTime = e.findFirst("li.time");
		wi.m_time = elemTime.toPlainText();
		QWebElement img = e.findFirst("img");
		QString imgURL = img.attribute("src");

		//下载图片
		downloadImg(imgURL, wi.m_tqImg);

		wi.m_temperature = e.findFirst("span").toPlainText();
		wi.m_cdRed = e.findFirst("li.cDRed").toPlainText();
		
		BOOST_AUTO(liCollect, e.findAll("li"));
		Q_FOREACH(const QWebElement&c, liCollect)
		{
			if(c.hasAttribute("style") && c.attribute("style")=="height:18px;overflow:hidden")
			{
				wi.m_windForce = c.toPlainText();
				break;
			}
		}

		wi.m_city = e.findFirst("input").attribute("value");
	}

	void pickTodayTooltips(const QWebElement &e, WeatherTooltip& wt)
	{
		BOOST_AUTO(liCollect, e.findAll("li"));
		Q_FOREACH(const QWebElement&c, liCollect)
		{
			if(!c.hasAttribute("class"))
			{
				wt.m_pm25 = c.toPlainText();
			}

			if (c.toPlainText().contains("晨练指数"))
			{
				wt.m_MorningExerciseIndex.m_advices= c.findFirst("div").toPlainText();
				wt.m_MorningExerciseIndex.m_degree = c.findFirst("span").toPlainText();
			}

			if (c.toPlainText().contains("洗车指数"))
			{
				wt.m_WashingCarIndex.m_advices= c.findFirst("div").toPlainText();
				wt.m_WashingCarIndex.m_degree = c.findFirst("span").toPlainText();
			}

			if (c.toPlainText().contains("旅游指数"))
			{
				wt.m_tourismIndex.m_advices= c.findFirst("div").toPlainText();
				wt.m_tourismIndex.m_degree = c.findFirst("span").toPlainText();
			}
		}
	}

	void appendNextWeather(const QWebElement &e, QVector<SimpleWeatherInfo*>& wi)
	{
		SimpleWeatherInfo *swi = NULL;
		BOOST_AUTO(liCollect, e.findAll("div"));
		Q_FOREACH(const QWebElement&c, liCollect)
		{
			swi = new SimpleWeatherInfo;
			swi->m_desc = c.findFirst("h3").toPlainText();

			QStringList strList;
			BOOST_AUTO(divCollect, c.findAll("li"));
			Q_FOREACH(const QWebElement&cc, divCollect)
			{
				strList.append(cc.toPlainText());
			}
			
			downloadImg(c.findFirst("img").attribute("src"), swi->m_temperatureImg);

			if(strList.size() == 4)
			{
				swi->m_temperature = strList[1];
				swi->m_weather = strList[2];
				swi->m_windForce = strList[3];
			}
			
			wi.append(swi);
		}
	}
}

AccessWeather::AccessWeather(QObject *parent)
	: QObject(parent)
{
	m_accessWeather = new QNetworkAccessManager(this);
	connect(m_accessWeather, 
		SIGNAL(finished ( QNetworkReply * )),
		this, 
		SLOT(slotFinished ( QNetworkReply * ))
		);
}

AccessWeather::~AccessWeather()
{

}

QString AccessWeather::getWeather( const QString& city )
{
	m_curWeather.clear();

	QNetworkRequest req;
	populateRequest(city, &req);
	m_accessWeather->get(req);

	//wait for get weather
	QEventLoop loop;
	connect(this, SIGNAL(sgnFinished ()), &loop, SLOT(quit()));
	loop.exec();

	QString r = parseWeather(m_curWeather);
	return r;
}

void AccessWeather::slotFinished( QNetworkReply * reply)
{
	QNetworkReply::NetworkError err = reply->error();
	if(QNetworkReply::NoError != err)
	{
		emit sgnFinished();
		return;
	}

	QUrl newUrl = reply->attribute(QNetworkRequest::RedirectionTargetAttribute).toUrl();
	if (!newUrl.isEmpty())
	{
		//读取新的地址
		m_accessWeather->get(QNetworkRequest(newUrl));
	}
	else
	{
		QByteArray ba = reply->readAll();
		m_curWeather = ba;
		qDebug()<<ba;
		emit sgnFinished();
	}
	
	int bp = 0;
}

void AccessWeather::populateRequest( const QString& city, QNetworkRequest* req )
{
	QString percentEncode = city.toLocal8Bit().toPercentEncoding();
	QString strReqestLine = QString("http://www.tianqi.com/index.php?c=tianqi&a=search&city=%1").arg(percentEncode);

	//如果不使用这个,则会导致url.encodedQuery()重新的错误解析而导致错误(编码格式问题)
	QUrl url = QUrl::fromEncoded(strReqestLine.toAscii());

	QString rurl = url.encodedPath();
	QString rurl2 = url.encodedQuery();

	req->setUrl(url);
	
	//req->setRawHeader("Host", "http://www.tianqi.com/");//不需要设置Host,默认会有,否则会导致错误的信息提交而获取不到正确信息

	//下列信息可有可无,如果没有则会有默认的值
	req->setRawHeader("Connection", "keep-alive");
	req->setRawHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
	req->setRawHeader("Referer", "http://www.tianqi.com/");
	req->setRawHeader("Accept-Encoding", "gzip,deflate,sdch");
	req->setRawHeader("Accept-Language", "zh-CN,zh;q=0.8");
	req->setRawHeader("Cookie", "cityPy=baoding; AJSTAT_ok_pages=1; AJSTAT_ok_times=1; bdshare_firstime=1412864742830");
	req->setRawHeader("Content-Type", "text/html; charset=GBK");

	//GET /index.php?c=tianqi&a=search&city=%E7%9F%B3%E5%AE%B6%E5%BA%84 HTTP/1.1\r\n //直接使用中文'石家庄'而导致的错误
	//不使用QUrl::fromEncoded而导致的错误,会将%重新解析为%25
	//GET /index.php?c=tianqi&a=search&city=%25%CA%25%AF%25%BC%25%D2%25%D7%25%AF HTTP/1.1\r\n
	//正确的提交结果
	//GET /index.php?c=tianqi&a=search&city=%CA%AF%BC%D2%D7%AF HTTP/1.1\r\n
}

QString AccessWeather::parseWeather( const QString& htmlData )
{
	namespace BoostPT = boost::property_tree;
	typedef BoostPT::ptree BoostPTree;

	QWebPage page;
	QWebFrame &frame = *page.mainFrame();
	frame.setHtml(htmlData);

	QEventLoop loop;
	connect(&frame, SIGNAL(loadFinished(bool)), &loop, SLOT(quit()));
	loop.exec();

	QWebElement elem = frame.documentElement();
		
	//findAll递归查找根据CSS selector selectorQuery
	BOOST_AUTO(collect, elem.findAll("div"));
	int sz = collect.count();
	QList<QWebElement> d = collect.toList();
	WeatherInfo wi;

	Q_FOREACH(const QWebElement &e, collect)
	{
		if(e.hasClass("tqshow"))
		{
			pickTodayWeather(e, wi);
		}

		if(e.hasClass("today_data_r01"))
		{
			pickTodayTooltips(e, wi.m_tooltips);
		}

		if(e.hasClass("everytqshow"))
		{
			appendNextWeather(e, wi.m_weekInfo);
		}
	}

	//GBK编码,系统默认是GBK编码
	//如果不合适可以通过setCodecForTr来调节
// 	std::string strData = htmlData.toAscii();
// 	std::istringstream istrstem(strData);

	//回传数据为HTML非标准的XML,不能正确解析,需要使用QWebKit包解析
	//BoostPTree pt;
	//try
	//{
	//	BoostPT::read_xml<BoostPTree>(istrstem, pt);
	//}
	//catch (const boost::property_tree::file_parser_error& e)
	//{
	//	qDebug()<<"what:"<<e.what()<<" message:"<<e.message().c_str();
	//	assert(false);
	//}
	//
	//QString result;
	//QString prefix = "html.body.div.div.div.div.div.div.div.div.div.div";
	//QString attrClassKey = QString("%1.<xmlattr>.class").arg(prefix);
	//QString attrIDKey = QString("%1.<xmlattr>.id").arg(prefix);

	//std::string today = pt.get<std::string>(attrClassKey.toStdString());
	//std::string todayID = pt.get<std::string>(attrIDKey.toStdString());

	return QString::null;
}

void AccessWeather::saveToFile( const QString& strData, const QString& filename )
{
	QFile file(filename);
	file.open(QFile::WriteOnly);
	file.write(strData.toLocal8Bit());
	file.close();
}


你可能感兴趣的:(QWebElement)