QJson home page
/builddir/build/BUILD/qjson-0.8.1/src/json_scanner.cpp
00001 /* This file is part of QJson
00002  *
00003  * Copyright (C) 2008 Flavio Castelli <flavio.castelli@gmail.com>
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Lesser General Public
00007  * License version 2.1, as published by the Free Software Foundation.
00008  * 
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Lesser General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Lesser General Public License
00016  * along with this library; see the file COPYING.LIB.  If not, write to
00017  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  */
00020 
00021 #include "qjson_debug.h"
00022 #include "json_scanner.h"
00023 #include "json_parser.hh"
00024 
00025 #include <ctype.h>
00026 
00027 #include <QtCore/QDebug>
00028 #include <QtCore/QRegExp>
00029 
00030 #include <cassert>
00031 
00032 bool ishexnstring(const QString& string) {
00033   for (int i = 0; i < string.length(); i++) {
00034     if (isxdigit(string[i] == 0))
00035       return false;
00036   }
00037   return true;
00038 }
00039 
00040 JSonScanner::JSonScanner(QIODevice* io)
00041   : m_allowSpecialNumbers(false),
00042     m_io (io)
00043 {
00044   m_quotmarkClosed = true;
00045   m_quotmarkCount = 0;
00046 }
00047 
00048 void JSonScanner::allowSpecialNumbers(bool allow) {
00049   m_allowSpecialNumbers = allow;
00050 }
00051 
00052 static QString unescape( const QByteArray& ba, bool* ok ) {
00053   assert( ok );
00054   *ok = false;
00055   QString res;
00056   QByteArray seg;
00057   bool bs = false;
00058   for ( int i = 0, size = ba.size(); i < size; ++i ) {
00059     const char ch = ba[i];
00060     if ( !bs ) {
00061       if ( ch == '\\' )
00062         bs = true;
00063       else
00064         seg += ch;
00065     } else {
00066       bs = false;
00067       switch ( ch ) {
00068         case 'b':
00069           seg += '\b';
00070           break;
00071         case 'f':
00072           seg += '\f';
00073           break;
00074         case 'n':
00075           seg += '\n';
00076           break;
00077         case 'r':
00078           seg += '\r';
00079           break;
00080         case 't':
00081           seg += '\t';
00082           break;
00083         case 'u':
00084         {
00085           res += QString::fromUtf8( seg );
00086           seg.clear();
00087 
00088           if ( i > size - 5 ) {
00089             //error
00090             return QString();
00091           }
00092 
00093           const QString hex_digit1 = QString::fromUtf8( ba.mid( i + 1, 2 ) );
00094           const QString hex_digit2 = QString::fromUtf8( ba.mid( i + 3, 2 ) );
00095           i += 4;
00096 
00097           if ( !ishexnstring( hex_digit1 ) || !ishexnstring( hex_digit2 ) ) {
00098             qCritical() << "Not an hex string:" << hex_digit1 << hex_digit2;
00099             return QString();
00100           }
00101           bool hexOk;
00102           const ushort hex_code1 = hex_digit1.toShort( &hexOk, 16 );
00103           if (!hexOk) {
00104             qCritical() << "error converting hex value to short:" << hex_digit1;
00105             return QString();
00106           }
00107           const ushort hex_code2 = hex_digit2.toShort( &hexOk, 16 );
00108           if (!hexOk) {
00109             qCritical() << "error converting hex value to short:" << hex_digit2;
00110             return QString();
00111           }
00112 
00113           res += QChar(hex_code2, hex_code1);
00114           break;
00115         }
00116         case '\\':
00117           seg  += '\\';
00118           break;
00119         default:
00120           seg += ch;
00121           break;
00122       }
00123     }
00124   }
00125   res += QString::fromUtf8( seg );
00126   *ok = true;
00127   return res;
00128 }
00129 
00130 int JSonScanner::yylex(YYSTYPE* yylval, yy::location *yylloc)
00131 {
00132   char ch;
00133 
00134   if (!m_io->isOpen()) {
00135     qCritical() << "JSonScanner::yylex - io device is not open";
00136     return -1;
00137   }
00138 
00139   yylloc->step();
00140 
00141   do {
00142     bool ret;
00143     if (m_io->atEnd()) {
00144       qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::END";
00145       return yy::json_parser::token::END;
00146     }
00147     else
00148       ret = m_io->getChar(&ch);
00149 
00150     if (!ret) {
00151       qCritical() << "JSonScanner::yylex - error reading from io device";
00152       return -1;
00153     }
00154 
00155     qjsonDebug() << "JSonScanner::yylex - got |" << ch << "|";
00156     yylloc->columns();
00157 
00158     if (ch == '\n' || ch == '\r')
00159       yylloc->lines();
00160   } while (m_quotmarkClosed && (isspace(ch) != 0));
00161 
00162   if (m_quotmarkClosed && ((ch == 't') || (ch == 'T'))) {
00163     const QByteArray buf = m_io->peek(3).toLower();
00164     if (buf == "rue") {
00165       m_io->read (3);
00166       yylloc->columns(3);
00167       qjsonDebug() << "JSonScanner::yylex - TRUE_VAL";
00168       return yy::json_parser::token::TRUE_VAL;
00169     }
00170   }
00171   else if (m_quotmarkClosed && ((ch == 'n') || (ch == 'N'))) {
00172     const QByteArray buf = m_io->peek(3).toLower();
00173     if (buf == "ull") {
00174       m_io->read (3);
00175       yylloc->columns(3);
00176       qjsonDebug() << "JSonScanner::yylex - NULL_VAL";
00177       return yy::json_parser::token::NULL_VAL;
00178     } else if (buf.startsWith("an") && m_allowSpecialNumbers) {
00179       m_io->read(2);
00180       yylloc->columns(2);
00181       qjsonDebug() << "JSonScanner::yylex - NAN_VAL";
00182       return yy::json_parser::token::NAN_VAL;
00183 
00184     }
00185   }
00186   else if (m_quotmarkClosed && ((ch == 'f') || (ch == 'F'))) {
00187     // check false value
00188     const QByteArray buf = m_io->peek(4).toLower();
00189     if (buf.length() == 4) {
00190       if (buf == "alse") {
00191         m_io->read (4);
00192         yylloc->columns(4);
00193         qjsonDebug() << "JSonScanner::yylex - FALSE_VAL";
00194         return yy::json_parser::token::FALSE_VAL;
00195       }
00196     }
00197   }
00198   else if (m_quotmarkClosed && ((ch == 'e') || (ch == 'E'))) {
00199     QByteArray ret(1, ch);
00200     const QByteArray buf = m_io->peek(1);
00201     if (!buf.isEmpty()) {
00202       if ((buf[0] == '+' ) || (buf[0] == '-' )) {
00203         ret += m_io->read (1);
00204         yylloc->columns();
00205       }
00206     }
00207     *yylval = QVariant(QString::fromUtf8(ret));
00208     return yy::json_parser::token::E;
00209   }
00210   else if (m_allowSpecialNumbers && m_quotmarkClosed && ((ch == 'I') || (ch == 'i'))) {
00211     QByteArray ret(1, ch);
00212     const QByteArray buf = m_io->peek(7);
00213     if (buf == "nfinity") {
00214       m_io->read(7);
00215       yylloc->columns(7);
00216       qjsonDebug() << "JSonScanner::yylex - INFINITY_VAL";
00217       return yy::json_parser::token::INFINITY_VAL;
00218     }
00219   }
00220 
00221   if (ch != '"' && !m_quotmarkClosed) {
00222     // we're inside a " " block
00223     QByteArray raw;
00224     raw += ch;
00225     char prevCh = ch;
00226     bool escape_on = (ch == '\\') ? true : false;
00227 
00228     while ( true ) {
00229       char nextCh;
00230       qint64 ret = m_io->peek(&nextCh, 1);
00231       if (ret != 1) {
00232         if (m_io->atEnd())
00233           return yy::json_parser::token::END;
00234         else
00235           return -1;
00236       } else if ( !escape_on && nextCh == '\"' ) {
00237         bool ok;
00238         const QString str = unescape( raw, &ok );
00239         *yylval = ok ? str : QString();
00240         return ok ? yy::json_parser::token::STRING : -1;
00241       }
00242 #if 0
00243       if ( prevCh == '\\' && nextCh != '"' && nextCh != '\\' && nextCh != '/' &&
00244            nextCh != 'b' && nextCh != 'f' && nextCh != 'n' &&
00245            nextCh != 'r' && nextCh != 't' && nextCh != 'u') {
00246         qjsonDebug() << "Just read" << nextCh;
00247         qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence";
00248         return -1;
00249        }
00250 #endif
00251       m_io->read(1); // consume
00252       raw += nextCh;
00253       prevCh = nextCh;
00254       if (escape_on)
00255         escape_on = false;
00256       else
00257         escape_on = (prevCh == '\\') ? true : false;
00258 #if 0
00259       if (nextCh == '\\') {
00260         char buf;
00261         if (m_io->getChar (&buf)) {
00262           yylloc->columns();
00263           if (((buf != '"') && (buf != '\\') && (buf != '/') &&
00264               (buf != 'b') && (buf != 'f') && (buf != 'n') &&
00265               (buf != 'r') && (buf != 't') && (buf != 'u'))) {
00266                 qjsonDebug() << "Just read" << buf;
00267                 qjsonDebug() << "JSonScanner::yylex - error decoding escaped sequence";
00268                 return -1;
00269           }
00270         } else {
00271           qCritical() << "JSonScanner::yylex - error decoding escaped sequence : io error";
00272           return -1;
00273         }
00274       }
00275 #endif
00276     }
00277   }
00278   else if (isdigit(ch) != 0 && m_quotmarkClosed) {
00279     bool ok;
00280     QByteArray numArray = QByteArray::fromRawData( &ch, 1 * sizeof(char) );
00281     qulonglong number = numArray.toULongLong(&ok);
00282     if (!ok) {
00283       //This shouldn't happen
00284       qCritical() << "JSonScanner::yylex - error while converting char to ulonglong, returning -1";
00285       return -1;
00286     }
00287     if (number == 0) {
00288       // we have to return immediately otherwise numbers like
00289       // 2.04 will be converted to 2.4
00290       *yylval = QVariant(number);
00291       qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DIGIT";
00292       return yy::json_parser::token::DIGIT;
00293     }
00294 
00295     char nextCh;
00296     qint64 ret = m_io->peek(&nextCh, 1);
00297     while (ret == 1 && isdigit(nextCh)) {
00298       m_io->read(1); //consume
00299       yylloc->columns(1);
00300       numArray = QByteArray::fromRawData( &nextCh, 1 * sizeof(char) );
00301       number = number * 10 + numArray.toULongLong(&ok);
00302       if (!ok) {
00303         //This shouldn't happen
00304         qCritical() << "JSonScanner::yylex - error while converting char to ulonglong, returning -1";
00305         return -1;
00306       }
00307       ret = m_io->peek(&nextCh, 1);
00308     }
00309 
00310     *yylval = QVariant(number);
00311     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DIGIT";
00312     return yy::json_parser::token::DIGIT;
00313   }
00314   else if (isalnum(ch) != 0) {
00315     *yylval = QVariant(QString(QChar::fromLatin1(ch)));
00316     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::WORD ("
00317              << ch << ")";
00318     return yy::json_parser::token::STRING;
00319   }
00320   else if (ch == ':') {
00321     // set yylval
00322     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COLON";
00323     return yy::json_parser::token::COLON;
00324   }
00325   else if (ch == '"') {
00326     // yy::json_parser::token::QUOTMARK (")
00327 
00328     // set yylval
00329     m_quotmarkCount++;
00330     if (m_quotmarkCount %2 == 0) {
00331       m_quotmarkClosed = true;
00332       m_quotmarkCount = 0;
00333       qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKCLOSE";
00334       return yy::json_parser::token::QUOTMARKCLOSE;
00335     }
00336     else {
00337       m_quotmarkClosed = false;
00338       qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::QUOTMARKOPEN";
00339       return yy::json_parser::token::QUOTMARKOPEN;
00340     }
00341   }
00342   else if (ch == ',') {
00343     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::COMMA";
00344     return yy::json_parser::token::COMMA;
00345   }
00346   else if (ch == '.') {
00347     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::DOT";
00348     return yy::json_parser::token::DOT;
00349   }
00350   else if (ch == '-') {
00351     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::MINUS";
00352     return yy::json_parser::token::MINUS;
00353   }
00354   else if (ch == '[') {
00355     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_OPEN";
00356     return yy::json_parser::token::SQUARE_BRACKET_OPEN;
00357   }
00358   else if (ch == ']') {
00359     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::SQUARE_BRACKET_CLOSE";
00360     return yy::json_parser::token::SQUARE_BRACKET_CLOSE;
00361   }
00362   else if (ch == '{') {
00363     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_OPEN";
00364     return yy::json_parser::token::CURLY_BRACKET_OPEN;
00365   }
00366   else if (ch == '}') {
00367     qjsonDebug() << "JSonScanner::yylex - yy::json_parser::token::CURLY_BRACKET_CLOSE";
00368     return yy::json_parser::token::CURLY_BRACKET_CLOSE;
00369   }
00370 
00371   //unknown char!
00372   //TODO yyerror?
00373   qCritical() << "JSonScanner::yylex - unknown char, returning -1";
00374   return -1;
00375 }
00376 
00377 

SourceForge Logo hosts this site. Send comments to:
QJson Developers