2017-11-23 19:12:48 +08:00
|
|
|
|
#include "qqtqtiowebpageparser.h"
|
|
|
|
|
#include <GumboQueryDocument.h>
|
|
|
|
|
#include <GumboQuerySelection.h>
|
|
|
|
|
#include <GumboQueryNode.h>
|
|
|
|
|
#include <QStringList>
|
|
|
|
|
#include <QTextCodec>
|
|
|
|
|
/**
|
|
|
|
|
* @brief QQtQtIOWebPageParser::QQtQtIOWebPageParser
|
|
|
|
|
* bad xml
|
|
|
|
|
* @param parent
|
|
|
|
|
*/
|
|
|
|
|
QQtQtIOWebPageParser::QQtQtIOWebPageParser ( QObject* parent ) : QQtWebAccessManager ( parent )
|
|
|
|
|
{
|
|
|
|
|
m_baseUrl = "http://download.qt.io/official_releases/qt/";
|
|
|
|
|
connect ( this, SIGNAL ( replyFinished ( QQtWebAccessSession* ) ),
|
|
|
|
|
this, SLOT ( replyFinished ( QQtWebAccessSession* ) ) );
|
2017-11-25 14:37:54 +08:00
|
|
|
|
m_timer = new QTimer ( this );
|
|
|
|
|
m_timer->setInterval ( 2000 );
|
|
|
|
|
m_timer->setSingleShot ( false );
|
|
|
|
|
connect ( m_timer, SIGNAL ( timeout() ), this, SLOT ( detecteTimeout() ) );
|
|
|
|
|
m_time = QTime::currentTime();
|
2017-11-23 19:12:48 +08:00
|
|
|
|
}
|
|
|
|
|
|
2017-11-24 13:27:45 +08:00
|
|
|
|
void QQtQtIOWebPageParser::startNewParse ( QString url1, QString url2 )
|
2017-11-23 19:12:48 +08:00
|
|
|
|
{
|
|
|
|
|
if ( url1 == "" && url2 == "" )
|
|
|
|
|
{
|
2017-11-24 13:27:45 +08:00
|
|
|
|
sdkGroup.clear();
|
2017-11-25 14:37:54 +08:00
|
|
|
|
m_time = QTime::currentTime();
|
|
|
|
|
m_timer->start();
|
2017-11-23 19:12:48 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QString strUrl = QString ( "%1%2%3" ).arg ( m_baseUrl ).arg ( url1 ).arg ( url2 );
|
|
|
|
|
|
|
|
|
|
QQtQtIOWebUrlSession* session = new QQtQtIOWebUrlSession ( this );
|
|
|
|
|
session->url1 = url1;
|
|
|
|
|
session->url2 = url2;
|
|
|
|
|
session->setWebAccessUrl ( strUrl );
|
|
|
|
|
session->setWebAccessSessionName ( QUuid::createUuid().toString() );
|
|
|
|
|
getWebAccessSessionManager()->addWebAccessSession ( session );
|
|
|
|
|
sendGetRequest ( session );
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void QQtQtIOWebPageParser::replyFinished ( QQtWebAccessSession* s0 )
|
|
|
|
|
{
|
|
|
|
|
QQtQtIOWebUrlSession* session = ( QQtQtIOWebUrlSession* ) s0;
|
2017-11-24 13:27:45 +08:00
|
|
|
|
/*判断返回码 200*/
|
2017-11-23 19:12:48 +08:00
|
|
|
|
QNetworkReply* reply = session->getWebAccessReply();
|
|
|
|
|
//pline() << reply->readAll();
|
|
|
|
|
pline() << reply->url();
|
|
|
|
|
int nHttpCode = reply->attribute ( QNetworkRequest::HttpStatusCodeAttribute ).toInt(); //http返回码
|
|
|
|
|
|
|
|
|
|
if ( nHttpCode == 200 ) //成功
|
|
|
|
|
{
|
|
|
|
|
pline() << "success";
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2017-11-24 13:27:45 +08:00
|
|
|
|
pline() << "fail" << nHttpCode;
|
2017-11-23 19:12:48 +08:00
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-24 13:27:45 +08:00
|
|
|
|
/*读取content*/
|
2017-11-23 19:12:48 +08:00
|
|
|
|
QByteArray resultContent = reply->readAll();
|
|
|
|
|
//pline() << QString ( resultContent );
|
|
|
|
|
QString result1 = resultContent;
|
|
|
|
|
//pline() << result1.toLatin1().constData();
|
|
|
|
|
pline() << QTextCodec::codecForHtml ( resultContent )->name();
|
|
|
|
|
|
|
|
|
|
/*用页面源文件的编码来进行解码 GB2312 or UTF-8*/
|
|
|
|
|
QTextCodec* pCodec = QTextCodec::codecForName ( "GBK" );
|
|
|
|
|
QString strResult = pCodec->toUnicode ( resultContent );
|
|
|
|
|
//pline() << strResult;
|
|
|
|
|
|
|
|
|
|
QTextCodec* pCodec2 = QTextCodec::codecForName ( "UTF-8" );
|
|
|
|
|
QByteArray resultContent2 = pCodec2->fromUnicode ( strResult );
|
|
|
|
|
QString result2 = resultContent2;
|
|
|
|
|
//pline() << result2;
|
|
|
|
|
|
|
|
|
|
pline() << QTextCodec::codecForLocale()->name();
|
|
|
|
|
|
2017-11-24 13:27:45 +08:00
|
|
|
|
/*处理一下页面 原页面没有\n,而我需要\n*/
|
2017-11-23 19:12:48 +08:00
|
|
|
|
//before </td> + /n
|
|
|
|
|
result2.replace ( "</td>", "\n</td>" );
|
|
|
|
|
result2.replace ( "</a>", "\n</a>" );
|
|
|
|
|
result2.replace ( "</th>", "\n</th>" );
|
|
|
|
|
|
|
|
|
|
GumboQueryDocument doc;
|
|
|
|
|
doc.parse ( result2.toLocal8Bit().constData() );
|
|
|
|
|
GumboQuerySelection s = doc.find ( "table" );
|
|
|
|
|
pline() << "node num:" << s.nodeNum();
|
|
|
|
|
|
|
|
|
|
GumboQueryNode pNode = s.nodeAt ( 0 );
|
|
|
|
|
QString items = QString::fromStdString ( pNode.text() );
|
2017-11-24 13:27:45 +08:00
|
|
|
|
//qDebug() << items;
|
2017-11-23 19:12:48 +08:00
|
|
|
|
|
|
|
|
|
QStringList itemList = items.split ( "\n", QString::SkipEmptyParts );
|
|
|
|
|
|
|
|
|
|
for ( int i = 0; i < itemList.count(); i++ )
|
|
|
|
|
{
|
|
|
|
|
QString txt = itemList.at ( i );
|
2017-11-24 13:27:45 +08:00
|
|
|
|
//qDebug() << txt;
|
2017-11-23 19:12:48 +08:00
|
|
|
|
/*
|
|
|
|
|
txt = txt.trimmed();
|
|
|
|
|
|
|
|
|
|
if ( txt.isEmpty() )
|
|
|
|
|
{
|
|
|
|
|
itemList.removeAt ( i );
|
|
|
|
|
i--;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
itemList[i] = txt;
|
|
|
|
|
*/
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
QString url1 = session->url1;
|
|
|
|
|
QString url2 = session->url2;
|
|
|
|
|
|
2017-11-24 13:27:45 +08:00
|
|
|
|
TSdkGroup group;
|
2017-11-23 19:12:48 +08:00
|
|
|
|
|
|
|
|
|
for ( int i = 0; i < itemList.count(); i++ )
|
|
|
|
|
{
|
|
|
|
|
/*获取第一列*/
|
|
|
|
|
if ( i % 4 == 0 )
|
|
|
|
|
{
|
|
|
|
|
/*在根目录的时候 第一层目录 qt/*/
|
|
|
|
|
if ( url1 == "" )
|
|
|
|
|
{
|
|
|
|
|
QString item = itemList.at ( i );
|
|
|
|
|
|
2017-11-24 13:27:45 +08:00
|
|
|
|
/*是第二层目录 5.9*/
|
2017-11-23 19:12:48 +08:00
|
|
|
|
if ( item.endsWith ( '/' ) )
|
|
|
|
|
{
|
2017-11-24 13:27:45 +08:00
|
|
|
|
/*保存下来url1*/
|
|
|
|
|
startNewParse ( item );
|
2017-11-23 19:12:48 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/*进入第二层目录 e.g. 5.9*/
|
|
|
|
|
else if ( url2 == "" )
|
|
|
|
|
{
|
|
|
|
|
QString item = itemList.at ( i );
|
|
|
|
|
|
2017-11-24 13:27:45 +08:00
|
|
|
|
/*是第二层目录 5.9.1*/
|
2017-11-23 19:12:48 +08:00
|
|
|
|
if ( item.endsWith ( '/' ) )
|
|
|
|
|
{
|
2017-11-24 13:27:45 +08:00
|
|
|
|
/*保存下来url1,url2*/
|
|
|
|
|
startNewParse ( url1, item );
|
2017-11-23 19:12:48 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
/*最后一层目录*/
|
|
|
|
|
else
|
|
|
|
|
{
|
2017-11-24 13:27:45 +08:00
|
|
|
|
QString item = itemList[i];
|
|
|
|
|
|
|
|
|
|
/*new sdk node*/
|
|
|
|
|
if ( item.contains ( '.' ) || item.contains ( '-' ) )
|
|
|
|
|
{
|
|
|
|
|
TSdkNode node;
|
|
|
|
|
node.name = item;
|
|
|
|
|
|
|
|
|
|
if ( i + 1 < itemList.count() )
|
|
|
|
|
node.time = itemList[i + 1];
|
|
|
|
|
|
|
|
|
|
if ( i + 2 < itemList.count() )
|
|
|
|
|
node.size = itemList[i + 2];
|
|
|
|
|
|
|
|
|
|
if ( i + 3 < itemList.count() )
|
|
|
|
|
node.detail = itemList[i + 3];
|
|
|
|
|
|
2017-11-25 14:37:54 +08:00
|
|
|
|
pline() << node.name << node.time << node.size << node.detail;
|
2017-11-24 13:27:45 +08:00
|
|
|
|
group.list.push_back ( node );
|
2017-11-25 14:37:54 +08:00
|
|
|
|
|
|
|
|
|
//QQtDict Code
|
|
|
|
|
m_sdkGroup[url1][url2][0] = itemList[i + 0];
|
|
|
|
|
m_sdkGroup[url1][url2][1] = itemList[i + 1];
|
|
|
|
|
m_sdkGroup[url1][url2][2] = itemList[i + 2];
|
|
|
|
|
m_sdkGroup[url1][url2][3] = itemList[i + 3];
|
|
|
|
|
//OK Success
|
2017-11-24 13:27:45 +08:00
|
|
|
|
}
|
2017-11-23 19:12:48 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-11-24 13:27:45 +08:00
|
|
|
|
group.url1 = url1;
|
|
|
|
|
group.url2 = url2;
|
|
|
|
|
|
|
|
|
|
if ( url1 != "" && url2 != "" )
|
|
|
|
|
{
|
|
|
|
|
sdkGroup.push_back ( group );
|
|
|
|
|
pline() << sdkGroup.size();
|
|
|
|
|
}
|
2017-11-23 19:12:48 +08:00
|
|
|
|
}
|
2017-11-25 14:37:54 +08:00
|
|
|
|
|
|
|
|
|
void QQtQtIOWebPageParser::detecteTimeout()
|
|
|
|
|
{
|
|
|
|
|
if ( this->getWebAccessSessionManager()->getSessionCount() == 0 )
|
|
|
|
|
{
|
|
|
|
|
QTime curTime = QTime::currentTime();
|
|
|
|
|
|
|
|
|
|
if ( qAbs<int> ( curTime.secsTo ( m_time ) ) > 10 )
|
|
|
|
|
emit fetchTimeout();
|
|
|
|
|
else
|
|
|
|
|
emit fetchFinish();
|
|
|
|
|
|
|
|
|
|
m_timer->stop();
|
|
|
|
|
pline() << qAbs ( curTime.secsTo ( m_time ) );
|
|
|
|
|
pline() << curTime.secsTo ( m_time );
|
|
|
|
|
pline() << QString ( __FILE__ ).split ( "/" ).last();
|
|
|
|
|
}
|
|
|
|
|
}
|