Logo Search packages:      
Sourcecode: cb2bib version File versions

c2bPdfImport.cpp

/***************************************************************************
 *   Copyright (C) 2004-2009 by Pere Constans
 *   constans@molspaces.com
 *   cb2Bib version 1.3.0. Licensed under the GNU GPL version 3.
 *   See the LICENSE file that comes with this distribution.
 ***************************************************************************/
#include "c2bPdfImport.h"

#include "c2b.h"
#include "c2bFileDialog.h"
#include "c2bSettings.h"
#include "c2bUtils.h"

#include <document.h>

#include <QDropEvent>
#include <QPushButton>
#include <QTimer>
#include <QUrl>


/** \page c2bpdfimport PDF Reference Import

    Articles in PDF or other formats that can be converted to plain text can be
    processed and indexed by the cb2Bib. Files can be selected using the Select
    Files button, or dragging them from the desktop or the file manager to the
    PDFImport dialog panel. Files are converted to plain text by using any
    external translation tool or script. This tool, and optionally its
    parameters, are set in the cb2Bib configure dialog. See the \ref
    c2bconf_utilities section for details.

    Once the file is converted, the text, and optionally, the preparsed
    metadata, is sent to the cb2Bib for reference recognition. This is the
    usual, two step process. First, text is optionally preprocessed, using a
    simple set of rules and/or any external script.or tool. See \ref
    c2bconf_clipboard. Second, text is processed for reference extraction. The
    cb2Bib so far uses two methods. One considers the text as a full pattern,
    which is checked against the user's set of regular expressions. The better
    designed are these rules, the best and most reliable will be the
    extraction. The second method, used when no regular expression matches the
    text, considers instead a set of predefined subpatterns. See \ref
    heuristic_guess.

    At this point users can interact and supervise their references, right
    before saving them. Allowing user intervention is and has been a design
    goal in the cb2Bib. Thus, at this point, the cb2Bib invites users to check
    their references. Poorly translated characters, accented letters,
    'forgotten' words, or some minor formatting in the titles might be worth
    considering. In addition, if too few fields were extracted, one might
    perform a network query. Say, only the DOI was catch, then there are
    chances that such a query will fill the remaining fields.

    The references are saved from the cb2Bib main panel. Once Save is pressed,
    and depending on the configuration, see \ref c2bconf_documents, the
    document file will be either renamed, copied, moved or simply linked onto
    the <tt>file</tt> field of the reference. If <b>Insert BibTeX metadata to
    document files</b> is checked, the current reference will also be inserted
    into the document itself.


    When several files are going to be indexed, the sequence can be as follows:

    - <b>Process next after saving</b>\n Once files are load and Process is
    pressed, the PDFImport dialog can be minimized (but not closed). All
    required operations at this point are accessible from the main panel. The
    link in the <tt>file</tt> field will be permanent, without regard to which
    operations (e.g. clipboard copying) are needed, until the reference is
    saved. Then, the next file will be automatically processed. The source file
    can be openend at any time by right clicking the <tt>file</tt> line edit.


    - <b>Unsupervised processing</b>\n In this operation mode, all files will
    be sequentially processed, following the chosen steps and rules. If the
    processes is successful, the reference is automatically saved, and the next
    file is processed. If it is not, the file is skipped and no reference is
    saved. While processing, the clipboard is disabled for safety. Once
    finished, this box is unchecked, to avoid a possible accidental saving of a
    void reference. Network queries that require intervention, i.e., whose
    result is launching a given page, are disabled. The processes follows until
    all files are processed. However, it will stop to avoid a file being
    overwritten, as a result of a repeated key. In this case, it will resume
    after manual renaming and saving. See also \ref commandline, commands
    '--txt2bib' and '--doc2bib'.


    <p>&nbsp;</p>
*/
c2bPdfImport::c2bPdfImport(QWidget* parentw) : QDialog(parentw)
{
    ui.setupUi(this);
    settings = c2bSettingsP;
    loadSettings();

    buttonSelectFiles = new QPushButton(tr("&Select Files"));
    buttonSelectFiles->setStatusTip(tr("Select PDF files. Hint: Files can also be dragged and dropped to this window"));
    buttonSelectFiles->setMouseTracking(true);
    ui.buttonBox->addButton(buttonSelectFiles, QDialogButtonBox::ActionRole);
    buttonProcess = new QPushButton(tr("&Process"));
    ui.buttonBox->addButton(buttonProcess, QDialogButtonBox::ActionRole);
    ui.buttonBox->button(QDialogButtonBox::Help)->setAutoDefault(false);
    buttonProcess->setAutoDefault(true);
    buttonProcess->setDefault(true);
    buttonProcess->setEnabled(false);
    buttonSelectFiles->setAutoDefault(true);
    buttonSelectFiles->setDefault(true);
    buttonSelectFiles->setFocus();
    ui.buttonBox->button(QDialogButtonBox::Abort)->setAutoDefault(false);
    ui.buttonBox->button(QDialogButtonBox::Abort)->setEnabled(false);
    m_aborted = false;

    connect(ui.buttonBox->button(QDialogButtonBox::Abort), SIGNAL(clicked()), this, SLOT(abort()));
    connect(ui.buttonBox, SIGNAL(helpRequested()), this, SLOT(help()));
    connect(buttonSelectFiles, SIGNAL(clicked()), this, SLOT(selectFiles()));
    connect(buttonProcess, SIGNAL(clicked()), this, SLOT(processOneFile()));
    connect(ui.DoAll, SIGNAL(toggled(bool)), this, SIGNAL(setClipboardDisabled(bool)));
    connect(c2b::instance(), SIGNAL(statusMessage(const QString&)), this, SLOT(showMessage(const QString&)));

    ui.Log->appendPlainText(tr("PDF to Text converter: %1\nArguments: %2\n").
                            arg(settings->fileName("c2bPdfImport/Pdf2TextBin")).
                            arg(settings->value("c2bPdfImport/Pdf2TextArg").toString()));
    showMessage(tr("See cb2Bib install directory for demo c2bPdfImport files."));
}

c2bPdfImport::~c2bPdfImport()
{
    emit setClipboardDisabled(false);
    saveSettings();
}


void c2bPdfImport::processOneFile()
{
    // Converting PDF to Text
    buttonProcess->setEnabled(false);
    m_aborted = false;
    ui.buttonBox->button(QDialogButtonBox::Abort)->setEnabled(ui.DoAll->isChecked());
    settings->setValue("networkQuery/isSupervised", !ui.DoAll->isChecked());
    settings->setValue("cb2Bib/AutomaticQuery", ui.AutomaticQuery->isChecked());

    if (ui.PDFlist->currentItem() == 0)
        return;
    processedFile = ui.PDFlist->currentItem()->text();
    if (ui.OpenFiles->isChecked())
        c2bUtils::openFile(processedFile, this);

    QCoreApplication::processEvents();
    processDocument();
}

void c2bPdfImport::processNext()
{
    processedFile = "";
    if (m_aborted)
    {
        m_aborted = false;
        return;
    }
    if (ui.PDFlist->currentItem() == 0)
        return;
    if (ui.DoNextAfterSaving->isChecked() || ui.DoAll->isChecked())
        processOneFile();
}

void c2bPdfImport::processDocument()
{
    document doc(processedFile, document::FirstPage);
    QString text = doc.toString();
    const QString log = doc.logString();
    if (!log.isEmpty())
        ui.Log->appendPlainText(log);
    const QString error = doc.errorString();
    if (!error.isEmpty())
        ui.Log->appendPlainText(tr("[cb2bib] %1.").arg(error));

    QListWidgetItem* item = ui.PDFlist->currentItem();
    delete item;
    if (ui.PDFlist->currentItem() == 0)
    {
        buttonProcess->setEnabled(false);
        ui.buttonBox->button(QDialogButtonBox::Close)->setFocus();
    }
    else
    {
        buttonProcess->setEnabled(true);
        buttonProcess->setFocus();
    }

    QString metadata;
    if (settings->value("cb2Bib/AddMetadata").toBool())
        metadata = c2b::documentMetadata(processedFile);
    if (text.isEmpty() && metadata.isEmpty())
    {
        if (ui.DoAll->isChecked())
            QTimer::singleShot(500, this, SLOT(processNext()));
    }
    else
    {
        if (settings->value("cb2Bib/PreAppendMetadata").toString() == "prepend")
            text = metadata + text;
        else
            text = text + "\n" + metadata;
        ui.Log->appendPlainText(tr("[cb2bib] Conversion completed for file %1.").arg(processedFile));
        emit textProcessed(text);
        emit fileProcessed(processedFile);
    }
}

void c2bPdfImport::referenceExtacted(bool status)
{
    if (!ui.DoAll->isChecked())
        return;
    if (ui.PDFlist->currentItem() == 0)
    {
        ui.DoAll->setChecked(false);
        ui.buttonBox->button(QDialogButtonBox::Abort)->setEnabled(false);
    }

    // Delay request to make sure fileProcessed has finished
    if (status)
        QTimer::singleShot(500, this, SIGNAL(saveReferenceRequest()));
    else
        QTimer::singleShot(500, this, SLOT(processNext()));
}

void c2bPdfImport::dropEvent(QDropEvent* qevent)
{
    QList<QUrl> fns = qevent->mimeData()->urls();
    for (int i = 0; i < fns.count(); i++)
    {
        QString scheme = fns.at(i).scheme();
        QString fn;
        if (scheme == "file")
            fn = fns.at(i).toLocalFile();
        if (!fn.isEmpty())
        {
            QListWidgetItem* item = new QListWidgetItem(fn, ui.PDFlist);
            if (ui.PDFlist->currentItem() == 0)
                ui.PDFlist->setCurrentItem(item);
        }
    }
    qevent->acceptProposedAction();
    c2bUtils::setWidgetOnTop(this);
    buttonProcess->setEnabled(true);
    buttonProcess->setFocus();
    showMessage(tr("%1 files selected.").arg(ui.PDFlist->count()));
}

void c2bPdfImport::dragEnterEvent(QDragEnterEvent* qevent)
{
    if (qevent->mimeData()->hasUrls())
        qevent->acceptProposedAction();
}

bool c2bPdfImport::event(QEvent* qevent)
{
    if (qevent->type() == QEvent::StatusTip)
    {
        ui.statusBar->showMessage(static_cast<QStatusTipEvent*>(qevent)->tip());
        return true;
    }
    else
        return QWidget::event(qevent);
}

void c2bPdfImport::selectFiles()
{
    QStringList fns = c2bFileDialog::getOpenFilenames(this, "",
                      settings->fileName("c2bPdfImport/LastFile"),
                      tr("Portable Document Format (*.pdf);;All (*)"));
    if (fns.isEmpty())
        return;
    settings->setFilename("c2bPdfImport/LastFile", fns.last());
    QStringList::Iterator it = fns.begin();
    while (it != fns.end())
    {
        QListWidgetItem* item = new QListWidgetItem(*it, ui.PDFlist);
        if (ui.PDFlist->currentItem() == 0)
            ui.PDFlist->setCurrentItem(item);
        ++it;
    }
    buttonProcess->setEnabled(true);
    buttonProcess->setFocus();
    showMessage(tr("%1 files selected.").arg(ui.PDFlist->count()));
}

void c2bPdfImport::show()
{
    c2bUtils::setWidgetOnTop(this);
    if (buttonProcess->isEnabled())
        buttonProcess->setFocus();
    else
        buttonSelectFiles->setFocus();
    QDialog::show();
}

void c2bPdfImport::showMessage(const QString& ms)
{
    ui.statusBar->showMessage(ms, C2B_MESSAGE_TIME);
    if (ms.startsWith(tr("Processed as")) || ms.startsWith(tr("Unable")))
        ui.Log->appendPlainText("[cb2bib] " + ms);
}

void c2bPdfImport::loadSettings()
{
    c2bAutomaticQuery = settings->value("cb2Bib/AutomaticQuery").toBool();
    ui.AutomaticQuery->setChecked(settings->value("c2bPdfImport/AutomaticQuery").toBool());
    ui.DoNextAfterSaving->setChecked(settings->value("c2bPdfImport/DoNextAfterSaving", true).toBool());
    ui.OpenFiles->setChecked(settings->value("c2bPdfImport/OpenFiles", false).toBool());
}

void c2bPdfImport::saveSettings()
{
    settings->setValue("c2bPdfImport/AutomaticQuery", ui.AutomaticQuery->isChecked());
    settings->setValue("c2bPdfImport/DoNextAfterSaving", ui.DoNextAfterSaving->isChecked());
    settings->setValue("c2bPdfImport/OpenFiles", ui.OpenFiles->isChecked());
    settings->setValue("cb2Bib/AutomaticQuery", c2bAutomaticQuery);
    settings->setValue("networkQuery/isSupervised", true);
}

void c2bPdfImport::abort()
{
    m_aborted = true;
    ui.DoAll->setChecked(false);
    ui.buttonBox->button(QDialogButtonBox::Abort)->setEnabled(false);
}

void c2bPdfImport::help()
{
    c2bUtils::displayHelp("http://www.molspaces.com/d_cb2bib-c2bpdfimport.php");
}

Generated by  Doxygen 1.6.0   Back to index