LibQQt/demo/QtSdkManager/qqtqtiowebpageparser.cpp

#include "qqtqtiowebpageparser.h"
#include <GumboQueryDocument.h>
#include <GumboQuerySelection.h>
#include <GumboQueryNode.h>
#include <QStringList>
#include <QTextCodec>
/**
 * @brief QQtQtIOWebPageParser::QQtQtIOWebPageParser
 * bad xml
 * @param parent
 */
QQtQtIOWebPageParser::QQtQtIOWebPageParser ( QObject* parent ) : QQtWebAccessManager ( parent )
{
    m_baseUrl = "http://download.qt.io/official_releases/qt/";
    connect ( this, SIGNAL ( replyFinished (  QQtWebAccessSession* ) ),
              this, SLOT ( replyFinished ( QQtWebAccessSession* ) ) );
    m_timer = new QTimer ( this );
    m_timer->setInterval ( 2000 );
    m_timer->setSingleShot ( false );
    connect ( m_timer, SIGNAL ( timeout() ), this, SLOT ( detecteTimeout() ) );
    m_time = QTime::currentTime();
}

void QQtQtIOWebPageParser::startNewParse ( QString url1, QString url2 )
{
    if ( url1 == "" && url2 == "" )
    {
        sdkGroup.clear();
        m_time = QTime::currentTime();
        m_timer->start();
    }

    QString strUrl = QString ( "%1%2%3" ).arg ( m_baseUrl ).arg ( url1 ).arg ( url2 );

    QQtQtIOWebUrlSession* session = new QQtQtIOWebUrlSession ( this );
    session->url1 = url1;
    session->url2 = url2;
    session->setWebAccessUrl ( strUrl );
    session->setWebAccessSessionName ( QUuid::createUuid().toString() );
    getWebAccessSessionManager()->addWebAccessSession ( session );
    sendGetRequest ( session );
}

void QQtQtIOWebPageParser::replyFinished ( QQtWebAccessSession* s0 )
{
    QQtQtIOWebUrlSession* session = ( QQtQtIOWebUrlSession* ) s0;
    /*判断返回码 200*/
    QNetworkReply* reply = session->getWebAccessReply();
    //pline() << reply->readAll();
    pline() << reply->url();
    int nHttpCode = reply->attribute ( QNetworkRequest::HttpStatusCodeAttribute ).toInt(); //http返回码

    if ( nHttpCode == 200 ) //成功
    {
        pline() << "success";
    }
    else
    {
        pline() << "fail" << nHttpCode;
        return;
    }

    /*读取content*/
    QByteArray resultContent = reply->readAll();
    //pline() << QString ( resultContent );
    QString result1 = resultContent;
    //pline() << result1.toLatin1().constData();
    pline() << QTextCodec::codecForHtml ( resultContent )->name();

    /*用页面源文件的编码来进行解码 GB2312 or UTF-8*/
    QTextCodec* pCodec = QTextCodec::codecForName ( "GBK" );
    QString strResult = pCodec->toUnicode ( resultContent );
    //pline() << strResult;

    QTextCodec* pCodec2 = QTextCodec::codecForName ( "UTF-8" );
    QByteArray resultContent2 = pCodec2->fromUnicode ( strResult );
    QString result2 = resultContent2;
    //pline() << result2;

    pline() << QTextCodec::codecForLocale()->name();

    /*处理一下页面 原页面没有\n，而我需要\n*/
    //before </td> + /n
    result2.replace ( "</td>", "\n</td>" );
    result2.replace ( "</a>", "\n</a>" );
    result2.replace ( "</th>", "\n</th>" );

    GumboQueryDocument doc;
    doc.parse ( result2.toLocal8Bit().constData() );
    GumboQuerySelection s = doc.find ( "table" );
    pline() << "node num:" << s.nodeNum();

    GumboQueryNode pNode = s.nodeAt ( 0 );
    QString items = QString::fromStdString ( pNode.text() );
    //qDebug() << items;

    QStringList itemList = items.split ( "\n", QString::SkipEmptyParts );

    for ( int i = 0; i < itemList.count(); i++ )
    {
        QString txt = itemList.at ( i );
        //qDebug() << txt;
        /*
        txt = txt.trimmed();

        if ( txt.isEmpty() )
        {
            itemList.removeAt ( i );
            i--;
            continue;
        }

        itemList[i] = txt;
        */
    }

    QString url1 = session->url1;
    QString url2 = session->url2;

    TSdkGroup group;

    for ( int i = 0; i < itemList.count(); i++ )
    {
        /*获取第一列*/
        if ( i % 4 == 0 )
        {
            /*在根目录的时候 第一层目录 qt/*/
            if ( url1 == "" )
            {
                QString item = itemList.at ( i );

                /*是第二层目录 5.9*/
                if ( item.endsWith ( '/' ) )
                {
                    /*保存下来url1*/
                    startNewParse ( item );
                }
            }
            /*进入第二层目录 e.g. 5.9*/
            else if ( url2 == "" )
            {
                QString item = itemList.at ( i );

                /*是第二层目录 5.9.1*/
                if ( item.endsWith ( '/' ) )
                {
                    /*保存下来url1，url2*/
                    startNewParse ( url1, item );
                }
            }
            /*最后一层目录*/
            else
            {
                QString item = itemList[i];

                /*new sdk node*/
                if ( item.contains ( '.' ) || item.contains ( '-' ) )
                {
                    TSdkNode node;
                    node.name = item;

                    if ( i + 1 < itemList.count() )
                        node.time = itemList[i + 1];

                    if ( i + 2 < itemList.count() )
                        node.size = itemList[i + 2];

                    if ( i + 3 < itemList.count() )
                        node.detail = itemList[i + 3];

                    pline() << node.name << node.time << node.size << node.detail;
                    group.list.push_back ( node );

                    //QQtDict Code
                    m_sdkGroup[url1][url2][0] = itemList[i + 0];
                    m_sdkGroup[url1][url2][1] = itemList[i + 1];
                    m_sdkGroup[url1][url2][2] = itemList[i + 2];
                    m_sdkGroup[url1][url2][3] = itemList[i + 3];
                    //OK Success
                }
            }
        }
    }

    group.url1 = url1;
    group.url2 = url2;

    if ( url1 != "" && url2 != "" )
    {
        sdkGroup.push_back ( group );
        pline() << sdkGroup.size();
    }
}

void QQtQtIOWebPageParser::detecteTimeout()
{
    if ( this->getWebAccessSessionManager()->getSessionCount() == 0 )
    {
        QTime curTime = QTime::currentTime();

        if ( qAbs<int> ( curTime.secsTo ( m_time ) ) > 10 )
            emit fetchTimeout();
        else
            emit fetchFinish();

        m_timer->stop();
        pline() << qAbs ( curTime.secsTo ( m_time ) );
        pline() << curTime.secsTo ( m_time );
        pline() << QString ( __FILE__ ).split ( "/" ).last();
    }
}