Logo Search packages:      
Sourcecode: cb2bib version File versions

coreBibParser.cpp

/***************************************************************************
 *   Copyright (C) 2004-2009 by Pere Constans
 *   constans@molspaces.com
 *   cb2Bib version 1.3.0. Licensed under the GNU GPL version 3.
 *   See the LICENSE file that comes with this distribution.
 ***************************************************************************/
#include "coreBibParser.h"


coreBibParser::coreBibParser(QObject* parento) : QObject(parento)
{
    _settingsP = settings::instance();

    // Setting Bibliographic Types
    setTypes();

    // Setting Bibliographic Fields
    setFields();

    // Setting Regular Expressions
    setRegularExpressions();
}

coreBibParser::~coreBibParser()
{}


QString coreBibParser::referenceToFomattedBibTeX(const bibReference& ref) const
{
    // Writes a BibTeX string from struct bibReference
    QString BibString;
    BibString += "@" + ref.typeName + "{" + ref.citeidName;
    const bool ConvertReferenceToLaTeX = _settingsP->value("cb2Bib/ConvertReferenceToLaTeX").toBool();
    const bool PostprocessMonth = _settingsP->value("cb2Bib/PostprocessMonth").toBool();
    const bool UseDoubleBraces = _settingsP->value("cb2Bib/UseDoubleBraces").toBool();
    QStringList::const_iterator it = _bibliographic_fields.begin();
    while (it != _bibliographic_fields.end())
    {
        QString fvalue = ref.value(*it);
        if (!fvalue.isEmpty())
        {
            if (ConvertReferenceToLaTeX)
                c2bUtils::c2bToBib(fvalue);
            const QString fd = *it;
            const QString padding = QString().fill(' ', 12 - fd.length());
            if (fd == "title" || fd == "booktitle")
            {
                if (UseDoubleBraces)
                    if (!fvalue.contains(QRegExp("^\\{.+\\}$")))
                        fvalue = '{' + fvalue + '}';
                fvalue = '{' + fvalue + '}';
            }
            else if (fd == "month")
            {
                if (!PostprocessMonth)
                    fvalue = '{' + fvalue + '}';
            }
            else
                fvalue = '{' + fvalue + '}';
            BibString += ",\n" + fd + padding + " = " + fvalue;
        }
        it++;
    }
    BibString += "\n}\n";
    return BibString;
}

QString coreBibParser::referenceToBibTeX(const bibReference& ref) const
{
    // Writes a BibTeX string from struct bibReference
    // No special formatting is done here
    QString BibString;
    BibString += "@" + ref.typeName + "{";
    QStringList::const_iterator it = _bibliographic_fields.begin();
    while (it != _bibliographic_fields.end())
    {
        const QString fvalue = ref.value(*it);
        if (!fvalue.isEmpty())
        {
            const QString fd = *it;
            const QString padding = QString().fill(' ', 12 - fd.length());
            BibString += ",\n" + fd + padding + " = {" + fvalue + "}";
        }
        it++;
    }
    BibString += "\n}\n";
    return BibString;
}

QString coreBibParser::adjacentNumbers(const QString& numbers) const
{
    // Originary for pages. However also used for multiple volume, number and year
    QString pages = numbers;
    pages.replace(c2bUtils::nonLetter, " ");
    pages = pages.simplified();
    pages.replace(' ', '-');
    QRegExp rx1 = QRegExp("^(\\d+)-(\\d+)-*pp$");
    QRegExp rx2 = QRegExp("^(\\d+)-(\\d+)$");
    if (rx1.indexIn(pages) > -1)
    {
        pages = rx1.cap(1);
        int ilp = rx1.cap(2).toInt() - 1;
        if (ilp > 0)
            pages += '-' + QString().setNum(rx1.cap(1).toInt() + ilp); // eg, 123-7pp  ->  123 - 129
    }
    else if (rx2.indexIn(pages) > -1)
    {
        QString fp = rx2.cap(1);
        pages = fp + '-';
        QString lp = rx2.cap(2);
        uint lfp = fp.length();
        uint llp = lp.length();
        if (lfp > llp)
            lp = fp.remove(lfp - llp, llp) + lp; // eg, 123-7  ->  123 - 127
        pages += lp;
    }
    const QString separator = _settingsP->value("cb2Bib/PageNumberSeparator").toString();
    if (separator.isEmpty())
        pages.replace('-', " - ");
    else
        pages.replace('-', separator);
    return pages;
}

void coreBibParser::setFields()
{
    _bibliographic_fields <<
    "title" <<
    "author" <<
    "journal" <<
    "booktitle" <<
    "series" <<
    "chapter" <<
    "pages" <<
    "volume" <<
    "number" <<
    "edition" <<
    "institution" <<
    "organization" <<
    "school" <<
    "address" <<
    "month" <<
    "year" <<
    "editor" <<
    "publisher" <<
    "abstract" <<
    "keywords" <<
    "isbn" <<
    "issn" <<
    "doi" <<
    "eprint" <<
    "file" <<
    "url" <<
    "note" <<
    "annote";
    _sorted_bibliographic_fields = _bibliographic_fields;
    qSort(_sorted_bibliographic_fields);
}

void coreBibParser::setTypes()
{
    _bibliographic_types << "" <<
    "article" <<
    "book" <<
    "booklet" <<
    "conference" <<
    "inbook" <<
    "incollection" <<
    "inproceedings" <<
    "manual" <<
    "mastersthesis" <<
    "misc" <<
    "periodical" <<
    "phdthesis" <<
    "proceedings" <<
    "techreport" <<
    "unpublished";
}

void coreBibParser::setRegularExpressions()
{
    _field_re = QRegExp("\\b(" + _bibliographic_fields.join("|") + ")\\b");
    _bib_begin_re = QRegExp("@\\w+\\s*\\{");
    _bib_begin0_re = QRegExp("^\\s*@\\w+\\s*\\{");
    _bib_begin1_re = QRegExp("[\\r\\n]\\s*@\\w+\\s*\\{");
    _bib_key_re = QRegExp("^@\\w+\\s*\\{\\s*([\\w:\\.-]+),");
    _bib_type_re = QRegExp("^@(\\w+)\\s*\\{");

    // List of regular expressions for extracting bib fields
    for (int i = 0; i < _bibliographic_fields.count(); ++i)
    {
        QRegExp bf("\\b" + _bibliographic_fields.at(i) + "\\s*=\\s*[\\{\"]", Qt::CaseInsensitive);
        // Consider non-braces case (eg. 'year = 2000,')
        QRegExp bfNB("\\b" + _bibliographic_fields.at(i) + "\\s*=\\s*(\\w*)\\s*,", Qt::CaseInsensitive);
        // Exception: Process macros for month, to be able to read cb2Bib writing
        if (_bibliographic_fields.at(i) == "month")
        {
            bf.setPattern("\\b" + _bibliographic_fields.at(i) + "\\s*=\\s*[\\{]");
            bfNB.setPattern("\\b" + _bibliographic_fields.at(i) + "\\s*=\\s*([\\w\\s~#\"]*)\\s*,");
        }
        bf.setMinimal(true);
        _bib_fields_re.append(bf);
        bfNB.setMinimal(true);
        _bib_fields_nb_re.append(bfNB);
    }
}

void coreBibParser::initReferenceParsing(const QString& dir, const QStringList& fields, bibReference* ref)
{
    setReferenceParsingDir(dir);
    // Init file parsing for given fields
    ref->clearFields();
    ref->clearReference();
    ref->bib_fieldList = fields;
    for (int i = 0; i < fields.count(); ++i)
    {
        QRegExp bf("\\b" + fields.at(i) + "\\s*=\\s*[\\{\"]", Qt::CaseInsensitive);
        // Consider non-braces case (eg. 'year = 2000,')
        QRegExp bfNB("\\b" + fields.at(i) + "\\s*=\\s*(\\w*)\\s*,", Qt::CaseInsensitive);
        // Exception: Process macros for month, to be able to read cb2Bib writing
        if (fields.at(i) == "month")
        {
            bf.setPattern("\\b" + fields.at(i) + "\\s*=\\s*[\\{]");
            bfNB.setPattern("\\b" + fields.at(i) + "\\s*=\\s*([\\w\\s~#\"]*)\\s*,");
        }
        bf.setMinimal(true);
        ref->_bib_fields_re.append(bf);
        bfNB.setMinimal(true);
        ref->_bib_fields_nb_re.append(bfNB);
    }
}

bibReference coreBibParser::wholeReference(const QString& str)
{
    // Skip citeidName and positionValue as they are not needed here
    // Skip trimming it is done later in bibParser::parse()
    bibReference ref;
    int pos = referenceStarts(str);
    if (pos < 0)
        return ref;
    QString str_ref = referenceAt(str, &pos);
    c2bUtils::fullBibToC2b(str_ref);
    str_ref = str_ref.simplified();
    _bib_type_re.indexIn(str_ref);
    ref.typeName = _bib_type_re.cap(1).toLower();
    str_ref.replace(str_ref.length() - 1, 1, ",}");
    QString fvalue;
    for (int i = 0; i < _bib_fields_re.count(); ++i)
    {
        QRegExp* bf = &_bib_fields_re[i];
        pos = bf->indexIn(str_ref);
        if (pos > 0)
        {
            if (c2bUtils::inBraces(pos + bf->matchedLength(), str_ref, &fvalue))
                ref[_bibliographic_fields.at(i)] = fvalue;
        }
        else
        {
            bf = &_bib_fields_nb_re[i];
            if (bf->indexIn(str_ref) > -1)
                ref[_bibliographic_fields.at(i)] = bf->cap(1);
        }
    }
    return ref;
}

Generated by  Doxygen 1.6.0   Back to index