Logo Search packages:      
Sourcecode: cb2bib version File versions

networkQuery.cpp

/***************************************************************************
 *   Copyright (C) 2004-2009 by Pere Constans
 *   constans@molspaces.com
 *   cb2Bib version 1.3.0. Licensed under the GNU GPL version 3.
 *   See the LICENSE file that comes with this distribution.
 ***************************************************************************/
#include "networkQuery.h"

#include "bibParser.h"
#include "cb2bib_utilities.h"
#include "network.h"
#include "settings.h"

#include <QTimer>


networkQuery::networkQuery(bibParser* bp, QObject* parento) : QObject(parento), _bpP(bp)
{
    Q_ASSERT_X(_bpP, "networkQuery", "bibParser was not instantiated");
    _networkP = new network(this);
    init();
}

networkQuery::networkQuery(bibParser* bp, network* net, QObject* parento) : QObject(parento), _bpP(bp), _networkP(net)
{
    Q_ASSERT_X(_bpP, "networkQuery", "bibParser was not instantiated");
    Q_ASSERT_X(_networkP, "networkQuery", "network was not instantiated");
    init();
}

networkQuery::~networkQuery()
{}


void networkQuery::init()
{
    _settingsP = settings::instance();
    _settingsP->setValue("networkQuery/isSupervised", true);
    _networkquery_tmp_fn1 = _settingsP->tempPath() + "/cb2bib_query_tmp_html1_" + _settingsP->applicationPid();
    _networkquery_tmp_fn2 = _settingsP->tempPath() + "/cb2bib_query_tmp_html2_" + _settingsP->applicationPid();
}

void networkQuery::submitQuery(const bibReference& reference, const QString& raw_reference)
{
    // Submission Initialization
    _error_string.clear();
    _query_parameter_count.clear();
    _is_end_of_file = false;

    // Query data
    _Qtitle = reference.value("title");
    _Qjournal = _bpP->fullJournal(reference.value("journal"));
    _Qvolume = reference.value("volume");
    _Qpage = c2bUtils::firstPage(reference.value("pages"));
    _Qdoi = reference.value("doi").trimmed();
    if (!_Qdoi.isEmpty() && !_Qdoi.contains(QRegExp("^10.[\\d\\.]+/\\S+$")))
    {
        _Qdoi.clear();
        qDebug(qPrintable(tr("[cb2bib] Warning: DOI skipped: '%1' is not a valid DOI.").arg(_Qdoi)));
    }
    _Qauthor = reference.value("author");
    _Qauthor.replace(QRegExp("(?:\\b\\w\\b|\\band\\b|\\W)"), " ");
    _Qauthor = _Qauthor.simplified();
    _Qexcerpt = _bpP->excerpt(raw_reference, QStringList() << _Qdoi << _Qauthor << _Qtitle);

    submitQuery1();
}

void networkQuery::submitQuery1()
{
    // Submission, first step, setting journal codes
    if (!setQueryParameters())
    {
        _error_string = tr("No data for query.");
        emit queryEnded(false, _targetQ, _networkquery_tmp_fn1);
        return;
    }
    if (_is_end_of_file)
    {
        _error_string = tr("Performed %1 queries: No reference found.").arg(_query_parameter_count.count());
        emit queryEnded(false, _targetQ, _networkquery_tmp_fn1);
        return;
    }

    c2bUtils::debug(tr("Query Number = %1").arg(_query_parameter_count.count()));
    c2bUtils::debug(tr("targetQ[%1]").arg(_targetQ));
    c2bUtils::debug(tr("captionQ[%1]").arg(_captionQ));
    c2bUtils::debug(tr("referenceurl_prefix[%1]").arg(_referenceurl_prefix));
    c2bUtils::debug(tr("referenceurl_sufix[%1]").arg(_referenceurl_sufix));
    c2bUtils::debug(tr("pdfurl_prefix[%1]").arg(_pdfurl_prefix));
    c2bUtils::debug(tr("pdfurl_sufix[%1]").arg(_pdfurl_sufix));
    c2bUtils::debug(tr("action[%1]").arg(_action));
    c2bUtils::debug(tr("POST1[%1]").arg(_targetQ));

    if (_action == "browse_query")
    {
        if (openFile(encodeUrl(_targetQ)))
        {
            _error_string = tr("Browsing query.");
            emit queryEnded(true, "", "");
        }
        else
        {
            _error_string = tr("Could not open Url '%1'.").arg(encodeUrl(_targetQ));
            emit queryEnded(false, "", "");
        }
        return;
    }
    if (_action == "htm2txt_query")
        emit statusMessage(tr("Importing: %1.").arg(_targetQ));
    else
        emit statusMessage(tr("Query: %1.").arg(_targetQ));
    _networkP->getFile(_targetQ, _networkquery_tmp_fn1, "copy",
                       this, SLOT(submitQuery2(bool)), !_settingsP->value("cb2Bib/KeepTmpNQFiles").toBool());
}

void networkQuery::submitQuery2(bool stat)
{
    // Submission, second part: extracting reference location
    if (!stat)
    {
        _error_string = _networkP->errorString();
        emit queryEnded(false, _targetQ, _networkquery_tmp_fn1);
        return;
    }

    QString lines = c2bUtils::fileToString(_networkquery_tmp_fn1, !_settingsP->value("cb2Bib/KeepTmpNQFiles").toBool());
    if (_action == "htm2txt_query")
    {
        _error_string = tr("Importing query Url.");
        emit queryEnded(true, "", fromHtmlString(lines, true));
        return;
    }

    QRegExp rx(_captionQ);
    rx.setMinimal(true);
    if (!rx.isValid())
        qDebug(qPrintable(tr("[cb2bib] Warning: RegExp '%1' is not valid.").arg(_captionQ)));
    int ncap = rx.indexIn(lines);
    if (ncap > -1)
        lines = fromHtmlString(rx.cap(1));
    else
    {
        QTimer::singleShot(10, this, SLOT(submitQuery1()));
        return;
    }
    _targetBib = _referenceurl_prefix + lines + _referenceurl_sufix;
    if (_pdfurl_prefix.isEmpty() && _pdfurl_sufix.isEmpty())
        _targetPDF.clear();
    else if (_settingsP->value("cb2Bib/AutomaticPdfDownload").toBool())
        _targetPDF = _pdfurl_prefix + lines + _pdfurl_sufix;

    c2bUtils::debug(tr("CAPTURED[%1]").arg(lines));
    c2bUtils::debug(tr("POST2[%1]").arg(_targetBib));
    c2bUtils::debug(tr("POST3[%1]").arg(_targetPDF));

    if (_action == "browse_referenceurl")
    {
        if (openFile(encodeUrl(_targetBib)))
        {
            _error_string = tr("Browsing reference.");
            emit queryEnded(true, "", "");
        }
        else
        {
            _error_string = tr("Could not open Url '%1'.").arg(encodeUrl(_targetBib));
            emit queryEnded(false, "", "");
        }
        return;
    }
    if (_action == "htm2txt_referenceurl")
        emit statusMessage(tr("Importing: %1.").arg(_targetBib));
    else
        emit statusMessage(tr("Retrieving: %1.").arg(_targetBib));
    _networkP->getFile(_targetBib, _networkquery_tmp_fn2, "copy",
                       this, SLOT(queryDone(bool)), !_settingsP->value("cb2Bib/KeepTmpNQFiles").toBool());
}

void networkQuery::queryDone(bool stat)
{
    // Submission Done
    if (!stat)
    {
        _error_string = _networkP->errorString();
        emit queryEnded(false, _targetBib, _networkquery_tmp_fn2);
        return;
    }

    QString lines = c2bUtils::fileToString(_networkquery_tmp_fn2, !_settingsP->value("cb2Bib/KeepTmpNQFiles").toBool());
    if (_action == "htm2txt_referenceurl")
    {
        _error_string = tr("Importing reference Url.");
        emit queryEnded(true, _targetPDF, fromHtmlString(lines, true));
        return;
    }

    if (_targetBib.contains("MEDLINE"))
    {
        QRegExp med_rx;
        med_rx.setMinimal(true);
        med_rx.setPattern("^.+\"pre_replace\">(.+)</table><.+$");
        lines.replace(med_rx, "\\1");
        lines.replace("</tr>", "_NEW_LINE_");
        med_rx.setPattern("<.+>");
        lines.replace(med_rx, " ");
        lines = fromHtmlString(lines); // Convert to unicode the text-related HTML tags in Pubmed, keep lines
        lines.replace("_NEW_LINE_", "\n");
    }
    _targetBib = lines.trimmed();
    emit queryEnded(true, _targetPDF, _targetBib);
}

bool networkQuery::setQueryParameters()
{
    if (!checkQueryFile(_settingsP->fileName("cb2Bib/NetworkFile")))
        return false;

    QFile file(_settingsP->fileName("cb2Bib/NetworkFile"));
    file.open(QIODevice::ReadOnly | QIODevice::Text);
    QTextStream stream(&file);
    stream.setCodec("UTF-8");
    stream.setAutoDetectUnicode(true);
    QRegExp Journal("journal=" + _Qjournal + "\\|");
    QRegExp AnyJournal("journal=\\s*$");
    uint readQueryParams = 0;
    _is_end_of_file = false;
    QString line;
    while (!stream.atEnd())
    {
        line = stream.readLine();
        // Skip comments and blanks
        if (!(line.isEmpty() || line.contains(QRegExp("^#"))))
        {
            if (line.contains(Journal))
            {
                const QStringList lc = line.split("|");
                if (lc.count() > 1)
                    _QjournalCode = lc.at(1);
                else
                    _QjournalCode.clear();
            }
            else if (line.contains(AnyJournal))
                _QjournalCode = _Qjournal;
            // Get appropiate parameters for Journal or AnyJournal
            if (line.contains(Journal) || line.contains(AnyJournal))
            {
                // Skip if already performed
                if (!_query_parameter_count.contains(++readQueryParams))
                {
                    while (line.contains(QRegExp("^journal=")))
                        line = stream.readLine();
                    _targetQ = line.remove(QRegExp("^query="));
                    line = stream.readLine();
                    _captionQ = line.remove(QRegExp("^capture_from_query="));
                    line = stream.readLine();
                    _referenceurl_prefix = line.remove(QRegExp("^referenceurl_prefix="));
                    line = stream.readLine();
                    _referenceurl_sufix = line.remove(QRegExp("^referenceurl_sufix="));
                    line = stream.readLine();
                    _pdfurl_prefix = line.remove(QRegExp("^pdfurl_prefix="));
                    line = stream.readLine();
                    _pdfurl_sufix = line.remove(QRegExp("^pdfurl_sufix="));
                    line = stream.readLine();
                    _action = line.remove(QRegExp("^action="));
                    // Setting Query Parameters
                    updateQueryPlaceholders();
                    // Finally, check for unresolved cb2Bib tags
                    if (areQueryParametersValid())
                    {
                        _query_parameter_count.append(readQueryParams);
                        return true;
                    }
                }
            }
        }
    }
    file.close();
    _is_end_of_file = true;
    return (!_query_parameter_count.isEmpty());
}

void networkQuery::updateQueryPlaceholders()
{
    if (!_Qtitle.isEmpty())
    {
        _targetQ.replace("<<title>>", _Qtitle);
        _captionQ.replace("<<title>>", _Qtitle);
        _referenceurl_prefix.replace("<<title>>", _Qtitle);
        _referenceurl_sufix.replace("<<title>>", _Qtitle);
        _pdfurl_prefix.replace("<<title>>", _Qtitle);
        _pdfurl_sufix.replace("<<title>>", _Qtitle);
    }
    if (!_QjournalCode.isEmpty())
    {
        _QjournalCode.replace(" & ", " and "); // Avoid sending '&' to confuse the Url
        _targetQ.replace("<<journal>>", _QjournalCode);
        _captionQ.replace("<<journal>>", _QjournalCode);
        _referenceurl_prefix.replace("<<journal>>", _QjournalCode);
        _referenceurl_sufix.replace("<<journal>>", _QjournalCode);
        _pdfurl_prefix.replace("<<journal>>", _QjournalCode);
        _pdfurl_sufix.replace("<<journal>>", _QjournalCode);
    }
    if (!_Qpage.isEmpty())
    {
        _targetQ.replace("<<pages>>", _Qpage);
        _captionQ.replace("<<pages>>", _Qpage);
        _referenceurl_prefix.replace("<<pages>>", _Qpage);
        _referenceurl_sufix.replace("<<pages>>", _Qpage);
        _pdfurl_prefix.replace("<<pages>>", _Qpage);
        _pdfurl_sufix.replace("<<pages>>", _Qpage);
    }
    if (!_Qvolume.isEmpty())
    {
        _targetQ.replace("<<volume>>", _Qvolume);
        _captionQ.replace("<<volume>>", _Qvolume);
        _referenceurl_prefix.replace("<<volume>>", _Qvolume);
        _referenceurl_sufix.replace("<<volume>>", _Qvolume);
        _pdfurl_prefix.replace("<<volume>>", _Qvolume);
        _pdfurl_sufix.replace("<<volume>>", _Qvolume);
    }
    if (!_Qdoi.isEmpty())
    {
        _targetQ.replace("<<doi>>", _Qdoi);
        _captionQ.replace("<<doi>>", _Qdoi);
        _referenceurl_prefix.replace("<<doi>>", _Qdoi);
        _referenceurl_sufix.replace("<<doi>>", _Qdoi);
        _pdfurl_prefix.replace("<<doi>>", _Qdoi);
        _pdfurl_sufix.replace("<<doi>>", _Qdoi);
    }
    if (!_Qexcerpt.isEmpty())
    {
        _targetQ.replace("<<excerpt>>", _Qexcerpt);
        _captionQ.replace("<<excerpt>>", _Qexcerpt);
        _referenceurl_prefix.replace("<<excerpt>>", _Qexcerpt);
        _referenceurl_sufix.replace("<<excerpt>>", _Qexcerpt);
        _pdfurl_prefix.replace("<<excerpt>>", _Qexcerpt);
        _pdfurl_sufix.replace("<<excerpt>>", _Qexcerpt);
    }
}

bool networkQuery::areQueryParametersValid()
{
    if (!_action.isEmpty())
    {
        if (_action == "browse_query" || _action == "browse_referenceurl")
        {
            if (!_settingsP->value("networkQuery/isSupervised").toBool())
                return false;
        }
        else if (!(_action == "htm2txt_query" || _action == "htm2txt_referenceurl"))
            return false;
    }
    const QString allParams = _targetQ + _captionQ + _referenceurl_prefix + _referenceurl_sufix + _pdfurl_prefix + _pdfurl_sufix;
    return !(allParams.contains(QRegExp("(?:<<title>>|<<journal>>|<<pages>>|<<volume>>|<<doi>>|<<excerpt>>)")));
}

const QString networkQuery::encodeUrl(const QString& url) const
{
    // Removes <<post>> tag if present and encodes Url to percent encoding
    QString encoded_url = url;
    encoded_url.remove(QRegExp("^<<post>>"));
    encoded_url = QUrl::toPercentEncoding(encoded_url, "+:/?=&\\");
    return encoded_url;
}

bool networkQuery::checkQueryFile(const QString& fn) const
{
    if (fn.isEmpty())
    {
        qDebug(QObject::tr("[cb2Bib] No network query file especified.").toLatin1());
        return false;
    }
    QFileInfo fi(fn);
    if (!fi.exists() || !fi.isReadable())
    {
        qDebug(QObject::tr("[cb2Bib] Could not open network query file %1 for reading.").arg(fn).toLatin1());
        return false;
    }
    return true;
}

Generated by  Doxygen 1.6.0   Back to index