Logo Search packages:      
Sourcecode: cb2bib version File versions

cb2bib_utilities.cpp

/***************************************************************************
 *   Copyright (C) 2004-2009 by Pere Constans
 *   constans@molspaces.com
 *   cb2Bib version 1.3.0. Licensed under the GNU GPL version 3.
 *   See the LICENSE file that comes with this distribution.
 ***************************************************************************/
#include "cb2bib_utilities.h"

00009 namespace c2bUtils
{

QString setCapitalization(const QString& str)
{
    QString cap_string = str;
    if (isUpperCaseString(str))
        cap_string = cap_string.toLower();
    bool do_upper = true;
    for (int i = 0; i < cap_string.length(); i++)
        if (cap_string.at(i).isLetter())
        {
            if (do_upper)
                cap_string[i] = cap_string.at(i).toUpper();
            do_upper = false;
        }
        else if (cap_string.at(i) == '.' || cap_string.at(i) == ':')
            do_upper = true;
    return (cap_string);
}

QString toAscii(const QString& str, const AsciiConversion type)
{
    QString ascii = str;
    if (type == KeepWords)
        cleanEquations(ascii);
    // Escape diacritics
    for (int i = 0; i < ascii.length(); ++i)
    {
        QCharRef c = ascii[i];
        if (!c.isLetter())
            continue;
        if (c.decompositionTag() == QChar::NoDecomposition)
            continue;
        c = c.decomposition().at(0);
        if (c.decompositionTag() == QChar::NoDecomposition)
            continue;
        c = c.decomposition().at(0);
    }
    ascii.replace(QChar(198), "AE", Qt::CaseSensitive);
    ascii.replace(QChar(216), 'O', Qt::CaseSensitive);
    ascii.replace(QChar(223), "ss", Qt::CaseSensitive);
    ascii.replace(QChar(230), "ae", Qt::CaseSensitive);
    ascii.replace(QChar(248), 'o', Qt::CaseSensitive);
    ascii.replace(QChar(321), 'L', Qt::CaseSensitive);
    ascii.replace(QChar(322), 'l', Qt::CaseSensitive);
    ascii.replace(QChar(338), "OE", Qt::CaseSensitive);
    ascii.replace(QChar(339), "oe", Qt::CaseSensitive);
    if (type == Collation)
    {
        for (int i = 0; i < ascii.length(); ++i)
        {
            QCharRef c = ascii[i];
            if (c.category() == QChar::Punctuation_Dash)
                c = ' ';
        }
        return ascii.toCaseFolded();
    }
    // Delete possibly remaining non-ASCII characters
    if (type == KeepWords)
    {
        ascii.replace(nonAsciiWords, " ");
        ascii = ascii.simplified();
    }
    // Cleanup
    else
        ascii.remove(nonAsciiLetter);
    return ascii;
}

QString& c2bToBib(QString& str)
{
    // Escape common Extended Latin Characters
    str.replace(" &", " \\&");
    str.replace(QChar(183), "$\\cdot$");
    str.replace(QChar(192), "{\\`A}");
    str.replace(QChar(193), "{\\'A}");
    str.replace(QChar(194), "{\\^A}");
    str.replace(QChar(195), "{\\~A}");
    str.replace(QChar(196), "{\\\"A}");
    str.replace(QChar(197), "{\\AA{}}");
    str.replace(QChar(198), "{\\AE{}}");
    str.replace(QChar(199), "{\\c{C}}");
    str.replace(QChar(200), "{\\`E}");
    str.replace(QChar(201), "{\\'E}");
    str.replace(QChar(202), "{\\^E}");
    str.replace(QChar(203), "{\\\"E}");
    str.replace(QChar(204), "{\\`I}");
    str.replace(QChar(205), "{\\'I}");
    str.replace(QChar(206), "{\\^I}");
    str.replace(QChar(207), "{\\\"I}");
    str.replace(QChar(209), "{\\~N}");
    str.replace(QChar(210), "{\\`O}");
    str.replace(QChar(211), "{\\'O}");
    str.replace(QChar(212), "{\\^O}");
    str.replace(QChar(213), "{\\~O}");
    str.replace(QChar(214), "{\\\"O}");
    str.replace(QChar(216), "{\\O}");
    str.replace(QChar(217), "{\\`U}");
    str.replace(QChar(218), "{\\'U}");
    str.replace(QChar(219), "{\\^U}");
    str.replace(QChar(220), "{\\\"U}");
    str.replace(QChar(221), "{\\'Y}");
    str.replace(QChar(223), "{\\ss}");
    str.replace(QChar(224), "{\\`a}");
    str.replace(QChar(225), "{\\'a}");
    str.replace(QChar(226), "{\\^a}");
    str.replace(QChar(227), "{\\~a}");
    str.replace(QChar(228), "{\\\"a}");
    str.replace(QChar(229), "{\\aa{}}");
    str.replace(QChar(230), "{\\ae{}}");
    str.replace(QChar(231), "{\\c{c}}");
    str.replace(QChar(232), "{\\`e}");
    str.replace(QChar(233), "{\\'e}");
    str.replace(QChar(234), "{\\^e}");
    str.replace(QChar(235), "{\\\"e}");
    str.replace(QChar(236), "{\\`i}");
    str.replace(QChar(237), "{\\'i}");
    str.replace(QChar(238), "{\\^i}");
    str.replace(QChar(239), "{\\\"i}");
    str.replace(QChar(241), "{\\~n}");
    str.replace(QChar(242), "{\\`o}");
    str.replace(QChar(243), "{\\'o}");
    str.replace(QChar(244), "{\\^o}");
    str.replace(QChar(245), "{\\~o}");
    str.replace(QChar(246), "{\\\"o}");
    str.replace(QChar(248), "{\\o}");
    str.replace(QChar(249), "{\\`u}");
    str.replace(QChar(250), "{\\'u}");
    str.replace(QChar(251), "{\\^u}");
    str.replace(QChar(252), "{\\\"u}");
    str.replace(QChar(253), "{\\'y}");
    str.replace(QChar(255), "{\\\"y}");
    str.replace(QChar(263), "{\\'c}");
    str.replace(QChar(268), "{\\v{C}}");
    str.replace(QChar(269), "{\\v{c}}");
    str.replace(QChar(321), "{\\L}");
    str.replace(QChar(322), "{\\l}");
    str.replace(QChar(323), "{\\'N}");
    str.replace(QChar(324), "{\\'n}");
    str.replace(QChar(338), "{\\OE}");
    str.replace(QChar(339), "{\\oe}");
    str.replace(QChar(352), "{\\v{S}}");
    str.replace(QChar(353), "{\\v{s}}");
    str.replace(QChar(376), "{\\\"Y");
    str.replace(QChar(381), "{\\v{Z}}");
    str.replace(QChar(382), "{\\v{z}}");
    // Escape common Greek and math
    str.replace(QChar(913), "$\\Alpha$");       // Some uppercases might require engrec package
    str.replace(QChar(914), "$\\Beta$");
    str.replace(QChar(915), "$\\Gamma$");
    str.replace(QChar(916), "$\\Delta$");
    str.replace(QChar(917), "$\\Epsilon$");
    str.replace(QChar(918), "$\\Zeta$");
    str.replace(QChar(919), "$\\Eta$");
    str.replace(QChar(920), "$\\Theta$");
    str.replace(QChar(921), "$\\Iota$");
    str.replace(QChar(922), "$\\Kappa$");
    str.replace(QChar(923), "$\\Lambda$");
    str.replace(QChar(924), "$\\Mu$");
    str.replace(QChar(925), "$\\Nu$");
    str.replace(QChar(926), "$\\Xi$");
    str.replace(QChar(927), "$\\Omicron$");
    str.replace(QChar(928), "$\\Pi$");
    str.replace(QChar(929), "$\\Rho$");
    str.replace(QChar(931), "$\\Sigma$");
    str.replace(QChar(932), "$\\Tau$");
    str.replace(QChar(933), "$\\Upsilon$");
    str.replace(QChar(934), "$\\Phi$");
    str.replace(QChar(935), "$\\Chi$");
    str.replace(QChar(936), "$\\Psi$");
    str.replace(QChar(937), "$\\Omega$");
    str.replace(QChar(945), "$\\alpha$");
    str.replace(QChar(946), "$\\beta$");
    str.replace(QChar(947), "$\\gamma$");
    str.replace(QChar(948), "$\\delta$");
    str.replace(QChar(949), "$\\varepsilon$");
    str.replace(QChar(950), "$\\zeta$");
    str.replace(QChar(951), "$\\eta$");
    str.replace(QChar(952), "$\\theta$");
    str.replace(QChar(953), "$\\iota$");
    str.replace(QChar(954), "$\\kappa$");
    str.replace(QChar(955), "$\\lambda$");
    str.replace(QChar(956), "$\\mu$");
    str.replace(QChar(957), "$\\nu$");
    str.replace(QChar(958), "$\\xi$");
    str.replace(QChar(959), "$\\omicron$");
    str.replace(QChar(960), "$\\pi$");
    str.replace(QChar(961), "$\\rho$");
    str.replace(QChar(962), "$\\varsigma$");
    str.replace(QChar(963), "$\\sigma$");
    str.replace(QChar(964), "$\\tau$");
    str.replace(QChar(965), "$\\upsilon$");
    str.replace(QChar(966), "$\\phi$");
    str.replace(QChar(967), "$\\chi$");
    str.replace(QChar(968), "$\\psi$");
    str.replace(QChar(969), "$\\omega$");
    str.replace(QChar(977), "$\\vartheta$");
    str.replace(QChar(981), "$\\varphi$");
    str.replace(QChar(982), "$\\varpi$");
    str.replace(QChar(989), "$\\digamma$");
    str.replace(QChar(1008), "$\\varkappa$");
    str.replace(QChar(1009), "$\\varrho$");
    str.replace(QChar(1013), "$\\epsilon$");
    str.replace(QChar(8706), "$\\partial$");
    str.replace(QChar(8722), "-");
    str.replace(QChar(8734), "$\\infty$");
    return str;
}

QString& bibToC2b(QString& str)
{
    // Escape TeX special characters to Unicode
    str.replace("\\&", "&");
    // From \LaTeX{} syntax to {\LaTeX} for the implemented subset
    str.replace(QRegExp("\\\\(.{1,2})\\{(.{0,2})\\}"), "{\\\\1\\2}");
    if (hasLatexDiacritic.indexIn(str) >= 0)
    {
        str.replace("{\\`A}", QChar(192));
        str.replace("{\\'A}", QChar(193));
        str.replace("{\\^A}", QChar(194));
        str.replace("{\\~A}", QChar(195));
        str.replace("{\\\"A}", QChar(196));
        str.replace("{{\\AA}}", QChar(197));
        str.replace("{{\\AE}}", QChar(198));
        str.replace("{\\AA}", QChar(197));    // {\\AA{}}
        str.replace("{\\AE}", QChar(198));    // {\\AE{}}
        str.replace("{{\\cC}}", QChar(199));  // {\\c{C}}
        str.replace("{\\cC}", QChar(199));    // {\\c{C}}
        str.replace("{\\`E}", QChar(200));
        str.replace("{\\'E}", QChar(201));
        str.replace("{\\^E}", QChar(202));
        str.replace("{\\\"E}", QChar(203));
        str.replace("{\\`I}", QChar(204));
        str.replace("{\\'I}", QChar(205));
        str.replace("{\\^I}", QChar(206));
        str.replace("{\\\"I}", QChar(207));
        str.replace("{\\~N}", QChar(209));
        str.replace("{\\`O}", QChar(210));
        str.replace("{\\'O}", QChar(211));
        str.replace("{\\^O}", QChar(212));
        str.replace("{\\~O}", QChar(213));
        str.replace("{\\\"O}", QChar(214));
        str.replace("{\\O}", QChar(216));
        str.replace("{\\`U}", QChar(217));
        str.replace("{\\'U}", QChar(218));
        str.replace("{\\^U}", QChar(219));
        str.replace("{\\\"U}", QChar(220));
        str.replace("{\\'Y}", QChar(221));
        str.replace("{\\ss}", QChar(223));
        str.replace("{\\`a}", QChar(224));
        str.replace("{\\'a}", QChar(225));
        str.replace("{\\^a}", QChar(226));
        str.replace("{\\~a}", QChar(227));
        str.replace("{\\\"a}", QChar(228));
        str.replace("{{\\aa}}", QChar(229));
        str.replace("{{\\ae}}", QChar(230));
        str.replace("{\\aa}", QChar(229));    // {\\aa{}}
        str.replace("{\\ae}", QChar(230));    // {\\ae{}}
        str.replace("{{\\cc}}", QChar(231));  // {\\c{c}}
        str.replace("{\\cc}", QChar(231));    // {\\c{c}}
        str.replace("{\\`e}", QChar(232));
        str.replace("{\\'e}", QChar(233));
        str.replace("{\\^e}", QChar(234));
        str.replace("{\\\"e}", QChar(235));
        str.replace("{\\`i}", QChar(236));
        str.replace("{\\'i}", QChar(237));
        str.replace("{\\^i}", QChar(238));
        str.replace("{\\\"i}", QChar(239));
        str.replace("{\\`\\i}", QChar(236));
        str.replace("{\\'\\i}", QChar(237));
        str.replace("{\\^\\i}", QChar(238));
        str.replace("{\\\"\\i}", QChar(239));
        str.replace("{\\~n}", QChar(241));
        str.replace("{\\`o}", QChar(242));
        str.replace("{\\'o}", QChar(243));
        str.replace("{\\^o}", QChar(244));
        str.replace("{\\~o}", QChar(245));
        str.replace("{\\\"o}", QChar(246));
        str.replace("{\\o}", QChar(248));
        str.replace("{\\`u}", QChar(249));
        str.replace("{\\'u}", QChar(250));
        str.replace("{\\^u}", QChar(251));
        str.replace("{\\\"u}", QChar(252));
        str.replace("{\\'y}", QChar(253));
        str.replace("{\\\"y}", QChar(255));
        str.replace("{\\'c}", QChar(263));
        str.replace("{{\\vC}}", QChar(268));  // {\\v{C}}
        str.replace("{\\vC}", QChar(268));
        str.replace("{{\\vc}}", QChar(269));  // {\\v{c}
        str.replace("{\\vc}", QChar(269));
        str.replace("{\\L}", QChar(321));
        str.replace("{\\l}", QChar(322));
        str.replace("{\\'N}", QChar(323));
        str.replace("{\\'n}", QChar(324));
        str.replace("{\\OE}", QChar(338));
        str.replace("{\\oe}", QChar(339));
        str.replace("{{\\vS}}", QChar(352));  // {\\v{S}}
        str.replace("{\\vS}", QChar(352));
        str.replace("{{\\vs}}", QChar(353));  // {\\v{s}}
        str.replace("{\\vs}", QChar(353));
        str.replace("{\\\"Y}", QChar(376));
        str.replace("{{\\vZ}}", QChar(381));  // {\\v{Z}}
        str.replace("{\\vZ}", QChar(381));
        str.replace("{{\\vz}}", QChar(382));  // {\\v{Z}}
        str.replace("{\\vz}", QChar(382));
    }
    if (hasLatexSymbol.indexIn(str) >= 0)
    {
        str.replace("$\\cdot$", QChar(183));
        str.replace("$\\Alpha$", QChar(913));
        str.replace("$\\Beta$", QChar(914));
        str.replace("$\\Gamma$", QChar(915));
        str.replace("$\\Delta$", QChar(916));
        str.replace("$\\Epsilon$", QChar(917));
        str.replace("$\\Zeta$", QChar(918));
        str.replace("$\\Eta$", QChar(919));
        str.replace("$\\Theta$", QChar(920));
        str.replace("$\\Iota$", QChar(921));
        str.replace("$\\Kappa$", QChar(922));
        str.replace("$\\Lambda$", QChar(923));
        str.replace("$\\Mu$", QChar(924));
        str.replace("$\\Nu$", QChar(925));
        str.replace("$\\Xi$", QChar(926));
        str.replace("$\\Omicron$", QChar(927));
        str.replace("$\\Pi$", QChar(928));
        str.replace("$\\Rho$", QChar(929));
        str.replace("$\\Sigma$", QChar(931));
        str.replace("$\\Tau$", QChar(932));
        str.replace("$\\Upsilon$", QChar(933));
        str.replace("$\\Phi$", QChar(934));
        str.replace("$\\Chi$", QChar(935));
        str.replace("$\\Psi$", QChar(936));
        str.replace("$\\Omega$", QChar(937));
        str.replace("$\\alpha$", QChar(945));
        str.replace("$\\beta$", QChar(946));
        str.replace("$\\gamma$", QChar(947));
        str.replace("$\\delta$", QChar(948));
        str.replace("$\\varepsilon$", QChar(949));
        str.replace("$\\zeta$", QChar(950));
        str.replace("$\\eta$", QChar(951));
        str.replace("$\\theta$", QChar(952));
        str.replace("$\\iota$", QChar(953));
        str.replace("$\\kappa$", QChar(954));
        str.replace("$\\lambda$", QChar(955));
        str.replace("$\\mu$", QChar(956));
        str.replace("$\\nu$", QChar(957));
        str.replace("$\\xi$", QChar(958));
        str.replace("$\\omicron$", QChar(959));
        str.replace("$\\pi$", QChar(960));
        str.replace("$\\rho$", QChar(961));
        str.replace("$\\sigmaf$", QChar(962));
        str.replace("$\\varsigma$", QChar(962));   // Equal to \sigmaf
        str.replace("$\\sigma$", QChar(963));
        str.replace("$\\tau$", QChar(964));
        str.replace("$\\upsilon$", QChar(965));
        str.replace("$\\phi$", QChar(966));
        str.replace("$\\chi$", QChar(967));
        str.replace("$\\psi$", QChar(968));
        str.replace("$\\omega$", QChar(969));
        str.replace("$\\vartheta$", QChar(977));
        str.replace("$\\varphi$", QChar(981));
        str.replace("$\\varpi$", QChar(982));
        str.replace("$\\digamma$", QChar(989));
        str.replace("$\\varkappa$", QChar(1008));
        str.replace("$\\varrho$", QChar(1009));
        str.replace("$\\epsilon$", QChar(1013));
        str.replace("$\\partial$", QChar(8706));
        str.replace("$\\infty$", QChar(8734));
    }
    return str;
}

} // namespace c2bUtils

Generated by  Doxygen 1.6.0   Back to index