1
0
mirror of https://gitee.com/drabel/LibQQt.git synced 2025-01-04 10:18:44 +08:00
LibQQt/demo/QtSdkManager/qqtqtiowebpageparser.cpp

212 lines
6.3 KiB
C++
Raw Normal View History

2017-11-23 19:12:48 +08:00
#include "qqtqtiowebpageparser.h"
#include <GumboQueryDocument.h>
#include <GumboQuerySelection.h>
#include <GumboQueryNode.h>
#include <QStringList>
#include <QTextCodec>
/**
* @brief QQtQtIOWebPageParser::QQtQtIOWebPageParser
* bad xml
* @param parent
*/
QQtQtIOWebPageParser::QQtQtIOWebPageParser ( QObject* parent ) : QQtWebAccessManager ( parent )
{
m_baseUrl = "http://download.qt.io/official_releases/qt/";
connect ( this, SIGNAL ( replyFinished ( QQtWebAccessSession* ) ),
this, SLOT ( replyFinished ( QQtWebAccessSession* ) ) );
m_timer = new QTimer ( this );
m_timer->setInterval ( 2000 );
m_timer->setSingleShot ( false );
connect ( m_timer, SIGNAL ( timeout() ), this, SLOT ( detecteTimeout() ) );
m_time = QTime::currentTime();
2017-11-23 19:12:48 +08:00
}
2017-11-24 13:27:45 +08:00
void QQtQtIOWebPageParser::startNewParse ( QString url1, QString url2 )
2017-11-23 19:12:48 +08:00
{
if ( url1 == "" && url2 == "" )
{
2017-11-24 13:27:45 +08:00
sdkGroup.clear();
m_time = QTime::currentTime();
m_timer->start();
2017-11-23 19:12:48 +08:00
}
QString strUrl = QString ( "%1%2%3" ).arg ( m_baseUrl ).arg ( url1 ).arg ( url2 );
QQtQtIOWebUrlSession* session = new QQtQtIOWebUrlSession ( this );
session->url1 = url1;
session->url2 = url2;
session->setWebAccessUrl ( strUrl );
session->setWebAccessSessionName ( QUuid::createUuid().toString() );
getWebAccessSessionManager()->addWebAccessSession ( session );
sendGetRequest ( session );
}
void QQtQtIOWebPageParser::replyFinished ( QQtWebAccessSession* s0 )
{
QQtQtIOWebUrlSession* session = ( QQtQtIOWebUrlSession* ) s0;
2017-11-24 13:27:45 +08:00
/*判断返回码 200*/
2017-11-23 19:12:48 +08:00
QNetworkReply* reply = session->getWebAccessReply();
//pline() << reply->readAll();
pline() << reply->url();
int nHttpCode = reply->attribute ( QNetworkRequest::HttpStatusCodeAttribute ).toInt(); //http返回码
if ( nHttpCode == 200 ) //成功
{
pline() << "success";
}
else
{
2017-11-24 13:27:45 +08:00
pline() << "fail" << nHttpCode;
2017-11-23 19:12:48 +08:00
return;
}
2017-11-24 13:27:45 +08:00
/*读取content*/
2017-11-23 19:12:48 +08:00
QByteArray resultContent = reply->readAll();
//pline() << QString ( resultContent );
QString result1 = resultContent;
//pline() << result1.toLatin1().constData();
pline() << QTextCodec::codecForHtml ( resultContent )->name();
/*用页面源文件的编码来进行解码 GB2312 or UTF-8*/
QTextCodec* pCodec = QTextCodec::codecForName ( "GBK" );
QString strResult = pCodec->toUnicode ( resultContent );
//pline() << strResult;
QTextCodec* pCodec2 = QTextCodec::codecForName ( "UTF-8" );
QByteArray resultContent2 = pCodec2->fromUnicode ( strResult );
QString result2 = resultContent2;
//pline() << result2;
pline() << QTextCodec::codecForLocale()->name();
2017-11-24 13:27:45 +08:00
/*处理一下页面 原页面没有\n而我需要\n*/
2017-11-23 19:12:48 +08:00
//before </td> + /n
result2.replace ( "</td>", "\n</td>" );
result2.replace ( "</a>", "\n</a>" );
result2.replace ( "</th>", "\n</th>" );
GumboQueryDocument doc;
doc.parse ( result2.toLocal8Bit().constData() );
GumboQuerySelection s = doc.find ( "table" );
pline() << "node num:" << s.nodeNum();
GumboQueryNode pNode = s.nodeAt ( 0 );
QString items = QString::fromStdString ( pNode.text() );
2017-11-24 13:27:45 +08:00
//qDebug() << items;
2017-11-23 19:12:48 +08:00
QStringList itemList = items.split ( "\n", QString::SkipEmptyParts );
for ( int i = 0; i < itemList.count(); i++ )
{
QString txt = itemList.at ( i );
2017-11-24 13:27:45 +08:00
//qDebug() << txt;
2017-11-23 19:12:48 +08:00
/*
txt = txt.trimmed();
if ( txt.isEmpty() )
{
itemList.removeAt ( i );
i--;
continue;
}
itemList[i] = txt;
*/
}
QString url1 = session->url1;
QString url2 = session->url2;
2017-11-24 13:27:45 +08:00
TSdkGroup group;
2017-11-23 19:12:48 +08:00
for ( int i = 0; i < itemList.count(); i++ )
{
/*获取第一列*/
if ( i % 4 == 0 )
{
/*在根目录的时候 第一层目录 qt/*/
if ( url1 == "" )
{
QString item = itemList.at ( i );
2017-11-24 13:27:45 +08:00
/*是第二层目录 5.9*/
2017-11-23 19:12:48 +08:00
if ( item.endsWith ( '/' ) )
{
2017-11-24 13:27:45 +08:00
/*保存下来url1*/
startNewParse ( item );
2017-11-23 19:12:48 +08:00
}
}
/*进入第二层目录 e.g. 5.9*/
else if ( url2 == "" )
{
QString item = itemList.at ( i );
2017-11-24 13:27:45 +08:00
/*是第二层目录 5.9.1*/
2017-11-23 19:12:48 +08:00
if ( item.endsWith ( '/' ) )
{
2017-11-24 13:27:45 +08:00
/*保存下来url1url2*/
startNewParse ( url1, item );
2017-11-23 19:12:48 +08:00
}
}
/*最后一层目录*/
else
{
2017-11-24 13:27:45 +08:00
QString item = itemList[i];
/*new sdk node*/
if ( item.contains ( '.' ) || item.contains ( '-' ) )
{
TSdkNode node;
node.name = item;
if ( i + 1 < itemList.count() )
node.time = itemList[i + 1];
if ( i + 2 < itemList.count() )
node.size = itemList[i + 2];
if ( i + 3 < itemList.count() )
node.detail = itemList[i + 3];
pline() << node.name << node.time << node.size << node.detail;
2017-11-24 13:27:45 +08:00
group.list.push_back ( node );
//QQtDict Code
m_sdkGroup[url1][url2][0] = itemList[i + 0];
m_sdkGroup[url1][url2][1] = itemList[i + 1];
m_sdkGroup[url1][url2][2] = itemList[i + 2];
m_sdkGroup[url1][url2][3] = itemList[i + 3];
//OK Success
2017-11-24 13:27:45 +08:00
}
2017-11-23 19:12:48 +08:00
}
}
}
2017-11-24 13:27:45 +08:00
group.url1 = url1;
group.url2 = url2;
if ( url1 != "" && url2 != "" )
{
sdkGroup.push_back ( group );
pline() << sdkGroup.size();
}
2017-11-23 19:12:48 +08:00
}
void QQtQtIOWebPageParser::detecteTimeout()
{
if ( this->getWebAccessSessionManager()->getSessionCount() == 0 )
{
QTime curTime = QTime::currentTime();
if ( qAbs<int> ( curTime.secsTo ( m_time ) ) > 10 )
emit fetchTimeout();
else
emit fetchFinish();
m_timer->stop();
pline() << qAbs ( curTime.secsTo ( m_time ) );
pline() << curTime.secsTo ( m_time );
pline() << QString ( __FILE__ ).split ( "/" ).last();
}
}