@@ -69,15 +69,13 @@ SOFTWARE.
6969#include " Poco/JSON/ParseHandler.h"
7070#include " Poco/Dynamic/Var.h"
7171#include " Poco/StreamTokenizer.h"
72+ #include < string>
7273
7374
7475namespace Poco {
7576namespace JSON {
7677
7778
78- class Source ;
79-
80-
8179class JSON_API Parser
8280 // / A RFC 4627 compatible class for parsing JSON strings or streams.
8381 // /
@@ -97,6 +95,8 @@ class JSON_API Parser
9795 // /
9896{
9997public:
98+ typedef std::char_traits<char > CharTraits;
99+ typedef CharTraits::int_type CharIntType;
100100
101101 enum Classes
102102 {
@@ -287,26 +287,332 @@ class JSON_API Parser
287287
288288 void parseBufferPopBackChar ();
289289
290- void addCharToParseBuffer (int nextChar, int nextClass);
290+ void addCharToParseBuffer (CharIntType nextChar, int nextClass);
291291
292- void addEscapedCharToParseBuffer (int nextChar);
292+ void addEscapedCharToParseBuffer (CharIntType nextChar);
293293
294- int decodeUnicodeChar ();
294+ CharIntType decodeUnicodeChar ();
295295
296296 void assertNotStringNullBool ();
297297
298298 void assertNonContainer ();
299299
300300 void parseBuffer ();
301301
302- bool parseChar (int nextChar, Source& feeder);
302+ template <typename IT>
303+ class Source
304+ {
305+ public:
306+ Source (const IT& it, const IT& end) : _it(it), _end(end)
307+ {
308+ }
309+
310+ ~Source ()
311+ {
312+ }
313+
314+ bool nextChar (CharIntType& c)
315+ {
316+ if (_it == _end) return false ;
317+ c = *_it;
318+ ++_it;
319+ return true ;
320+ }
321+
322+ private:
323+ IT _it;
324+ IT _end;
325+ };
326+
327+ template <typename S>
328+ bool parseChar (CharIntType nextChar, S& source)
303329 // / Called for each character (or partial character) in JSON string.
304330 // / It accepts UTF-8, UTF-16, or UTF-32. If the character is accepted,
305331 // / it returns true, otherwise false.
332+ {
333+ CharIntType nextClass, nextState;
334+ unsigned char ch = static_cast <unsigned char >(CharTraits::to_char_type (nextChar));
335+
336+ // Determine the character's class.
337+ if (ch < 0 || (!_allowNullByte && ch == 0 )) return false ;
338+ if (0x80 <= ch && ch <= 0xFF )
339+ {
340+ nextClass = C_ETC;
341+ CharIntType count = utf8CheckFirst (nextChar);
342+ if (!count)
343+ {
344+ throw JSONException (format (" Unable to decode byte 0x%x" , (unsigned int ) nextChar));
345+ }
346+
347+ char buffer[4 ];
348+ buffer[0 ] = nextChar;
349+ for (int i = 1 ; i < count; ++i)
350+ {
351+ int c = 0 ;
352+ if (!source.nextChar (c)) throw JSONException (" Invalid UTF8 sequence found" );
353+ buffer[i] = c;
354+ }
355+
356+ if (!UTF8Encoding::isLegal ((unsigned char *) buffer, count))
357+ {
358+ throw JSONException (" No legal UTF8 found" );
359+ }
360+
361+ for (int i = 0 ; i < count; ++i)
362+ {
363+ parseBufferPushBackChar (buffer[i]);
364+ }
365+ return true ;
366+ }
367+ else
368+ {
369+ nextClass = _asciiClass[nextChar];
370+ if (nextClass <= xx) return false ;
371+ }
372+
373+ addCharToParseBuffer (nextChar, nextClass);
374+
375+ // Get the next _state from the _state transition table.
376+ nextState = _stateTransitionTable[_state][nextClass];
377+ if (nextState >= 0 )
378+ {
379+ _state = nextState;
380+ }
381+ else
382+ {
383+ // Or perform one of the actions.
384+ switch (nextState)
385+ {
386+ // Unicode character
387+ case UC:
388+ if (!decodeUnicodeChar ()) return false ;
389+ // check if we need to read a second UTF-16 char
390+ if (_utf16HighSurrogate) _state = D1;
391+ else _state = ST;
392+ break ;
393+ // _escaped char
394+ case EX:
395+ _escaped = 1 ;
396+ _state = ES;
397+ break ;
398+ // integer detected by minus
399+ case MX:
400+ _type = JSON_T_INTEGER;
401+ _state = MI;
402+ break ;
403+ // integer detected by zero
404+ case ZX:
405+ _type = JSON_T_INTEGER;
406+ _state = ZE;
407+ break ;
408+ // integer detected by 1-9
409+ case IX:
410+ _type = JSON_T_INTEGER;
411+ _state = IT;
412+ break ;
413+ // floating point number detected by exponent
414+ case DE:
415+ assertNotStringNullBool ();
416+ _type = JSON_T_FLOAT;
417+ _state = E1 ;
418+ break ;
419+ // floating point number detected by fraction
420+ case DF:
421+ assertNotStringNullBool ();
422+ _type = JSON_T_FLOAT;
423+ _state = FX;
424+ break ;
425+ // string begin "
426+ case SB:
427+ clearBuffer ();
428+ poco_assert (_type == JSON_T_NONE);
429+ _type = JSON_T_STRING;
430+ _state = ST;
431+ break ;
432+
433+ // n
434+ case NU:
435+ poco_assert (_type == JSON_T_NONE);
436+ _type = JSON_T_NULL;
437+ _state = N1;
438+ break ;
439+ // f
440+ case FA:
441+ poco_assert (_type == JSON_T_NONE);
442+ _type = JSON_T_FALSE;
443+ _state = F1;
444+ break ;
445+ // t
446+ case TR:
447+ poco_assert (_type == JSON_T_NONE);
448+ _type = JSON_T_TRUE;
449+ _state = T1;
450+ break ;
451+
452+ // closing comment
453+ case CE:
454+ _comment = 0 ;
455+ poco_assert (_parseBufferCount == 0 );
456+ poco_assert (_type == JSON_T_NONE);
457+ _state = _beforeCommentState;
458+ break ;
459+
460+ // opening comment
461+ case CB:
462+ if (!_allowComments) return false ;
463+ parseBufferPopBackChar ();
464+ parseBuffer ();
465+ poco_assert (_parseBufferCount == 0 );
466+ poco_assert (_type != JSON_T_STRING);
467+ switch (_stack[_top])
468+ {
469+ case MODE_ARRAY:
470+ case MODE_OBJECT:
471+ switch (_state)
472+ {
473+ case VA:
474+ case AR:
475+ _beforeCommentState = _state;
476+ break ;
477+ default :
478+ _beforeCommentState = OK;
479+ break ;
480+ }
481+ break ;
482+ default :
483+ _beforeCommentState = _state;
484+ break ;
485+ }
486+ _type = JSON_T_NONE;
487+ _state = C1;
488+ _comment = 1 ;
489+ break ;
490+ // empty }
491+ case -9 :
492+ {
493+ clearBuffer ();
494+ if (_pHandler) _pHandler->endObject ();
495+
496+ if (!pop (MODE_KEY)) return false ;
497+ _state = OK;
498+ break ;
499+ }
500+ // }
501+ case -8 :
502+ {
503+ parseBufferPopBackChar ();
504+ parseBuffer ();
505+ if (_pHandler) _pHandler->endObject ();
506+ if (!pop (MODE_OBJECT)) return false ;
507+ _type = JSON_T_NONE;
508+ _state = OK;
509+ break ;
510+ }
511+ // ]
512+ case -7 :
513+ {
514+ parseBufferPopBackChar ();
515+ parseBuffer ();
516+ if (_pHandler) _pHandler->endArray ();
517+ if (!pop (MODE_ARRAY)) return false ;
518+ _type = JSON_T_NONE;
519+ _state = OK;
520+ break ;
521+ }
522+ // {
523+ case -6 :
524+ {
525+ parseBufferPopBackChar ();
526+ if (_pHandler) _pHandler->startObject ();
527+ if (!push (MODE_KEY)) return false ;
528+ poco_assert (_type == JSON_T_NONE);
529+ _state = OB;
530+ break ;
531+ }
532+ // [
533+ case -5 :
534+ {
535+ parseBufferPopBackChar ();
536+ if (_pHandler) _pHandler->startArray ();
537+ if (!push (MODE_ARRAY)) return false ;
538+ poco_assert (_type == JSON_T_NONE);
539+ _state = AR;
540+ break ;
541+ }
542+ // string end "
543+ case -4 :
544+ parseBufferPopBackChar ();
545+ switch (_stack[_top])
546+ {
547+ case MODE_KEY:
548+ {
549+ poco_assert (_type == JSON_T_STRING);
550+ _type = JSON_T_NONE;
551+ _state = CO;
552+
553+ if (_pHandler)
554+ {
555+ std::string value (_parseBuffer.begin (), _parseBufferCount);
556+ _pHandler->key (value);
557+ }
558+ clearBuffer ();
559+ break ;
560+ }
561+ case MODE_ARRAY:
562+ case MODE_OBJECT:
563+ poco_assert (_type == JSON_T_STRING);
564+ parseBuffer ();
565+ _type = JSON_T_NONE;
566+ _state = OK;
567+ break ;
568+ default :
569+ return false ;
570+ }
571+ break ;
572+
573+ // ,
574+ case -3 :
575+ {
576+ parseBufferPopBackChar ();
577+ parseBuffer ();
578+ switch (_stack[_top])
579+ {
580+ case MODE_OBJECT:
581+ // A comma causes a flip from object mode to key mode.
582+ if (!pop (MODE_OBJECT) || !push (MODE_KEY)) return false ;
583+ poco_assert (_type != JSON_T_STRING);
584+ _type = JSON_T_NONE;
585+ _state = KE;
586+ break ;
587+ case MODE_ARRAY:
588+ poco_assert (_type != JSON_T_STRING);
589+ _type = JSON_T_NONE;
590+ _state = VA;
591+ break ;
592+ default :
593+ return false ;
594+ }
595+ break ;
596+ }
597+ // :
598+ case -2 :
599+ // A colon causes a flip from key mode to object mode.
600+ parseBufferPopBackChar ();
601+ if (!pop (MODE_KEY) || !push (MODE_OBJECT)) return false ;
602+ poco_assert (_type == JSON_T_NONE);
603+ _state = VA;
604+ break ;
605+ // Bad action.
606+ default :
607+ return false ;
608+ }
609+ }
610+ return true ;
611+ }
306612
307613 bool done ();
308614
309- static int utf8_check_first (char byte);
615+ static CharIntType utf8CheckFirst (char byte);
310616
311617 static const int _asciiClass[128 ];
312618 // / This array maps the 128 ASCII characters into character classes.
@@ -390,7 +696,9 @@ inline Dynamic::Var Parser::result() const
390696
391697inline Dynamic::Var Parser::asVar () const
392698{
393- return _pHandler->asVar ();
699+ if (_pHandler) return _pHandler->asVar ();
700+
701+ return Dynamic::Var ();
394702}
395703
396704
0 commit comments