Context Navigation

← Previous Change
Next Change →

tinyxmlparser.cc

Timestamp:

Dec 31, 2007, 12:06:33 AM (17 years ago)

Author:

landauf

Message:

updated to the newest tinyXML library and changed one line in the LevelLoader

File:

: 1 edited

code/branches/FICN/src/tinyxml/tinyxmlparser.cc (modified) (56 diffs)

Legend:

: Unmodified
: Added
: Removed

code/branches/FICN/src/tinyxml/tinyxmlparser.cc

-                      r471
+                      r738
 Original code (2.0 and earlier )copyright (c) 2000-2002 Lee Thomason (www.grinninglizard.com)
 This software is provided 'as-is', without any express or implied
 warranty. In no event will the authors be held liable for any
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any
 damages arising from the use of this software.
 Permission is granted to anyone to use this software for any
 purpose, including commercial applications, and to alter it and
+Permission is granted to anyone to use this software for any
+purpose, including commercial applications, and to alter it and
 redistribute it freely, subject to the following restrictions:
 . The origin of this software must not be misrepresented; you must
+. The origin of this software must not be misrepresented; you must
 not claim that you wrote the original software. If you use this
 software in a product, an acknowledgment in the product documentation
 would be appreciated but is not required.
 . Altered source versions must be plainly marked as such, and
+. Altered source versions must be plainly marked as such, and
 must not be misrepresented as being the original software.
 . This notice may not be removed or altered from any source
+. This notice may not be removed or altered from any source
 distribution.
 */
-#include "tinyxml.h"
 #include <ctype.h>
 #include <stddef.h>
+#include "tinyxml.h"
 //#define DEBUG_PARSER
+#if defined( DEBUG_PARSER )
+#       if defined( DEBUG ) && defined( _MSC_VER )
+#               include <windows.h>
+#               define TIXML_LOG OutputDebugString
+#       else
+#               define TIXML_LOG printf
+#       endif
+#endif
 // Note tha "PutString" hardcodes the same list. This
 // is less flexible than it appears. Changing the entries
 // or order will break putstring.
 TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
+// or order will break putstring.
+TiXmlBase::Entity TiXmlBase::entity[ NUM_ENTITY ] =
+{
         { "&amp;",  5, '&' },
 …
 // sequence from the lead byte. 1 placed for invalid sequences --
 // although the result will be junk, pass it through as much as possible.
 // Beware of the non-characters in UTF-8:
+// Beware of the non-characters in UTF-8:
 //                              ef bb bf (Microsoft "lead bytes")
 //                              ef bf be
 //                              ef bf bf
+//                              ef bf bf
 const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
 …
 const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
 const int TiXmlBase::utf8ByteTable[256] =
+const int TiXmlBase::utf8ByteTable[256] =
+{
         //      0       1       2       3       4       5       6       7       8       9       a       b       c       d       e       f
 …
 ,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x70 End of ASCII range
 ,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x80 0x80 to 0xc1 invalid
 ,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90
 ,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0
 ,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0
+,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0x90
+,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xa0
+,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      1,      // 0xb0
 ,      1,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xc0 0xc2 to 0xdf 2 byte
 ,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      2,      // 0xd0
 …
         const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
         if (input < 0x80)
+        if (input < 0x80)
                 *length = 1;
         else if ( input < 0x800 )
 …
         // Scary scary fall throughs.
         switch (*length)
+        switch (*length)
+        {
                 case 4:
                         --output;
                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+                        --output;
+                        *output = (char)((input | BYTE_MARK) & BYTE_MASK);
                         input >>= 6;
                 case 3:
                         --output;
                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+                        --output;
+                        *output = (char)((input | BYTE_MARK) & BYTE_MASK);
                         input >>= 6;
                 case 2:
                         --output;
                         *output = (char)((input | BYTE_MARK) & BYTE_MASK);
+                        --output;
+                        *output = (char)((input | BYTE_MARK) & BYTE_MASK);
                         input >>= 6;
                 case 1:
                         --output;
+                        --output;
                         *output = (char)(input | FIRST_BYTE_MARK[*length]);
+        }
 …
         // This will only work for low-ascii, everything else is assumed to be a valid
         // letter. I'm not sure this is the best approach, but it is quite tricky trying
         // to figure out alhabetical vs. not across encoding. So take a very
+        // to figure out alhabetical vs. not across encoding. So take a very
         // conservative approach.
 …
         // This will only work for low-ascii, everything else is assumed to be a valid
         // letter. I'm not sure this is the best approach, but it is quite tricky trying
         // to figure out alhabetical vs. not across encoding. So take a very
+        // to figure out alhabetical vs. not across encoding. So take a very
         // conservative approach.
 …
                                 // bump down to the next line
                                 ++row;
                                 col = 0;
+                                col = 0;
                                 // Eat the character
                                 ++p;
 …
                                                 // 0-width spaces.
                                                 if ( *(pU+1)==TIXML_UTF_LEAD_1 && *(pU+2)==TIXML_UTF_LEAD_2 )
                                                         p += 3;
+                                                        p += 3;
                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbeU )
                                                         p += 3;
+                                                        p += 3;
                                                 else if ( *(pU+1)==0xbfU && *(pU+2)==0xbfU )
                                                         p += 3;
+                                                        p += 3;
                                                 else
                                                         { p +=3; ++col; }       // A normal character.
 …
+                                {
                                         // Eat the 1 to 4 byte utf8 character.
                                         int step = TiXmlBase::utf8ByteTable[*((unsigned char*)p)];
+                                        int step = TiXmlBase::utf8ByteTable[*((const unsigned char*)p)];
                                         if ( step == 0 )
                                                 step = 1;               // Error case from bad encoding, but handle gracefully.
 …
+                {
                         const unsigned char* pU = (const unsigned char*)p;
                         // Skip the stupid Microsoft UTF-8 Byte order marks
                         if (    *(pU+0)==TIXML_UTF_LEAD_0
                                  && *(pU+1)==TIXML_UTF_LEAD_1
+                                 && *(pU+1)==TIXML_UTF_LEAD_1
                                  && *(pU+2)==TIXML_UTF_LEAD_2 )
+                        {
 …
 #ifdef TIXML_USE_STL
 /*static*/ bool TiXmlBase::StreamWhiteSpace( TIXML_ISTREAM * in, TIXML_STRING * tag )
+/*static*/ bool TiXmlBase::StreamWhiteSpace( std::istream * in, TIXML_STRING * tag )
+{
         for( ;; )
 …
+}
 /*static*/ bool TiXmlBase::StreamTo( TIXML_ISTREAM * in, int character, TIXML_STRING * tag )
+/*static*/ bool TiXmlBase::StreamTo( std::istream * in, int character, TIXML_STRING * tag )
+{
         //assert( character > 0 && character < 128 );   // else it won't work in utf-8
 …
 #endif
+// One of TinyXML's more performance demanding functions. Try to keep the memory overhead down. The
+// "assign" optimization removes over 10% of the execution time.
+//
 const char* TiXmlBase::ReadName( const char* p, TIXML_STRING * name, TiXmlEncoding encoding )
+{
+        // Oddly, not supported on some comilers,
+        //name->clear();
+        // So use this:
         *name = "";
         assert( p );
 …
         // hyphens, or colons. (Colons are valid ony for namespaces,
         // but tinyxml can't tell namespaces from names.)
         if (    p && *p
+        if (    p && *p
                  && ( IsAlpha( (unsigned char) *p, encoding ) || *p == '_' ) )
+        {
+                const char* start = p;
                 while(          p && *p
                                 &&      (               IsAlphaNum( (unsigned char ) *p, encoding )
+                                &&      (               IsAlphaNum( (unsigned char ) *p, encoding )
                                                  || *p == '_'
                                                  || *p == '-'
 …
                                                  || *p == ':' ) )
+                {
                         (*name) += *p;
+                        //(*name) += *p; // expensive
                         ++p;
+                }
+                if ( p-start > 0 ) {
+                        name->assign( start, p-start );
+                }
                 return p;
 …
                                 else if ( *q >= 'A' && *q <= 'F' )
                                         ucs += mult * (*q - 'A' + 10 );
                                 else
+                                else
                                         return 0;
                                 mult *= 16;
 …
                                 if ( *q >= '0' && *q <= '9' )
                                         ucs += mult * (*q - '0');
                                 else
+                                else
                                         return 0;
                                 mult *= 10;
 …
         // So it wasn't an entity, its unrecognized, or something like that.
         *value = *p;    // Don't put back the last one, since we return it!
+        //*length = 1;  // Leave unrecognized entities - this doesn't really work.
+                                        // Just writes strange XML.
         return p+1;
+}
 …
+}
 const char* TiXmlBase::ReadText(        const char* p,
                                                                         TIXML_STRING * text,
                                                                         bool trimWhiteSpace,
                                                                         const char* endTag,
+const char* TiXmlBase::ReadText(        const char* p,
+                                                                        TIXML_STRING * text,
+                                                                        bool trimWhiteSpace,
+                                                                        const char* endTag,
                                                                         bool caseInsensitive,
                                                                         TiXmlEncoding encoding )
 …
+                }
+        }
+        return p + strlen( endTag );
+        if ( p )
+                p += strlen( endTag );
+        return p;
+}
 #ifdef TIXML_USE_STL
 void TiXmlDocument::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
+void TiXmlDocument::StreamIn( std::istream * in, TIXML_STRING * tag )
+{
         // The basic issue with a document is that we don't know what we're
 …
         // sub-tag can orient itself.
         if ( !StreamTo( in, '<', tag ) )
+        if ( !StreamTo( in, '<', tag ) )
+        {
                 SetError( TIXML_ERROR_PARSING_EMPTY, 0, 0, TIXML_ENCODING_UNKNOWN );
 …
                 if ( in->good() )
+                {
                         // We now have something we presume to be a node of
+                        // We now have something we presume to be a node of
                         // some sort. Identify it, and call the node to
                         // continue streaming.
 …
                         else if ( StringEqual( enc, "UTF8", true, TIXML_ENCODING_UNKNOWN ) )
                                 encoding = TIXML_ENCODING_UTF8; // incorrect, but be nice
                         else
+                        else
                                 encoding = TIXML_ENCODING_LEGACY;
+                }
 …
 void TiXmlDocument::SetError( int err, const char* pError, TiXmlParsingData* data, TiXmlEncoding encoding )
+{
+{
         // The first error in a chain is more accurate - don't set again!
         if ( error )
 …
+        }
         // What is this thing?
+        // What is this thing?
         // - Elements start with a letter or underscore, but xml is reserved.
         // - Comments: <!--
 …
 #ifdef TIXML_USE_STL
 void TiXmlElement::StreamIn (TIXML_ISTREAM * in, TIXML_STRING * tag)
+void TiXmlElement::StreamIn (std::istream * in, TIXML_STRING * tag)
+{
         // We're called with some amount of pre-parsing. That is, some of "this"
 …
+                }
                 (*tag) += (char) c ;
                 if ( c == '>' )
                         break;
 …
         // If not, identify and stream.
         if (    tag->at( tag->length() - 1 ) == '>'
+        if (    tag->at( tag->length() - 1 ) == '>'
                  && tag->at( tag->length() - 2 ) == '/' )
+        {
 …
                 // There is more. Could be:
                 //              text
+                //              cdata text (which looks like another node)
                 //              closing tag
                 //              another node.
 …
                         // Do we have text?
                         if ( in->good() && in->peek() != '<' )
+                        if ( in->good() && in->peek() != '<' )
+                        {
                                 // Yep, text.
 …
                                         return;
+                                }
                                 if ( c == '>' )
                                         break;
 …
                                 *tag += (char) c;
                                 in->get();
+                                // Early out if we find the CDATA id.
+                                if ( c == '[' && tag->size() >= 9 )
+                                {
+                                        size_t len = tag->size();
+                                        const char* start = tag->c_str() + len - 9;
+                                        if ( strcmp( start, "<![CDATA[" ) == 0 ) {
+                                                assert( !closingTag );
+                                                break;
+                                        }
+                                }
                                 if ( !firstCharFound && c != '<' && !IsWhiteSpace( c ) )
 …
                         if ( *p  != '>' )
+                        {
                                 if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
+                                if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY, p, data, encoding );
                                 return 0;
+                        }
 …
                         ++p;
                         p = ReadValue( p, data, encoding );             // Note this is an Element method, and will set the error if one happens.
+                        if ( !p || !*p )
+                        if ( !p || !*p ) {
+                                // We were looking for the end tag, but found nothing.
+                                // Fix for [ 1663758 ] Failure to report error on bad XML
+                                if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG, p, data, encoding );
                                 return 0;
+                        }
                         // We should find the end tag now
 …
                         attrib->SetDocument( document );
                         const char* pErr = p;
+                        pErr = p;
                         p = attrib->Parse( p, data, encoding );
 …
                         // Handle the strange case of double attributes:
+                        #ifdef TIXML_USE_STL
+                        TiXmlAttribute* node = attributeSet.Find( attrib->NameTStr() );
+                        #else
                         TiXmlAttribute* node = attributeSet.Find( attrib->Name() );
+                        #endif
                         if ( node )
+                        {
 …
                         else
                                 delete textNode;
+                }
                 else
+                }
+                else
+                {
                         // We hit a '<'
 …
                                         p = node->Parse( p, data, encoding );
                                         LinkEndChild( node );
+                                }
+                                }
                                 else
+                                {
 …
+        {
                 if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE, 0, 0, encoding );
+        }
+        }
         return p;
+}
 …
 #ifdef TIXML_USE_STL
 void TiXmlUnknown::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
+void TiXmlUnknown::StreamIn( std::istream * in, TIXML_STRING * tag )
+{
         while ( in->good() )
+        {
                 int c = in->get();
+                int c = in->get();
                 if ( c <= 0 )
+                {
 …
+                {
                         // All is well.
                         return;
+                        return;
+                }
+        }
 …
 #ifdef TIXML_USE_STL
 void TiXmlComment::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
+void TiXmlComment::StreamIn( std::istream * in, TIXML_STRING * tag )
+{
         while ( in->good() )
+        {
                 int c = in->get();
+                int c = in->get();
                 if ( c <= 0 )
+                {
 …
                 (*tag) += (char) c;
                 if ( c == '>'
+                if ( c == '>'
                          && tag->at( tag->length() - 2 ) == '-'
                          && tag->at( tag->length() - 3 ) == '-' )
+                {
                         // All is well.
                         return;
+                        return;
+                }
+        }
 …
+        }
         p += strlen( startTag );
+        p = ReadText( p, &value, false, endTag, false, encoding );
+        // [ 1475201 ] TinyXML parses entities in comments
+        // Oops - ReadText doesn't work, because we don't want to parse the entities.
+        // p = ReadText( p, &value, false, endTag, false, encoding );
+        //
+        // from the XML spec:
+        /*
+         [Definition: Comments may appear anywhere in a document outside other markup; in addition,
+                      they may appear within the document type declaration at places allowed by the grammar.
+                                  They are not part of the document's character data; an XML processor MAY, but need not,
+                                  make it possible for an application to retrieve the text of comments. For compatibility,
+                                  the string "--" (double-hyphen) MUST NOT occur within comments.] Parameter entity
+                                  references MUST NOT be recognized within comments.
+                                  An example of a comment:
+                                  <!-- declarations for <head> & <body> -->
+        */
+    value = "";
+        // Keep all the white space.
+        while ( p && *p && !StringEqual( p, endTag, false, encoding ) )
+        {
+                value.append( p, 1 );
+                ++p;
+        }
+        if ( p )
+                p += strlen( endTag );
         return p;
+}
 …
         if ( !p || !*p ) return 0;
         int tabsize = 4;
         if ( document )
                 tabsize = document->TabSize();
+//      int tabsize = 4;
+//      if ( document )
+//              tabsize = document->TabSize();
         if ( data )
 …
                 return 0;
+        }
         const char* end;
+        if ( *p == '\'' )
+        const char SINGLE_QUOTE = '\'';
+        const char DOUBLE_QUOTE = '\"';
+        if ( *p == SINGLE_QUOTE )
+        {
                 ++p;
                 end = "\'";
+                end = "\'";             // single quote in string
                 p = ReadText( p, &value, false, end, false, encoding );
+        }
         else if ( *p == '"' )
+        else if ( *p == DOUBLE_QUOTE )
+        {
                 ++p;
                 end = "\"";
+                end = "\"";             // double quote in string
                 p = ReadText( p, &value, false, end, false, encoding );
+        }
 …
                 // its best, even without them.
                 value = "";
                 while (    p && *p                                                                              // existence
+                while (    p && *p                                                                                      // existence
                                 && !IsWhiteSpace( *p ) && *p != '\n' && *p != '\r'      // whitespace
+                                && *p != '/' && *p != '>' )                                             // tag end
+                {
+                                && *p != '/' && *p != '>' )                                                     // tag end
+                {
+                        if ( *p == SINGLE_QUOTE || *p == DOUBLE_QUOTE ) {
+                                // [ 1451649 ] Attribute values with trailing quotes not handled correctly
+                                // We did not have an opening quote but seem to have a
+                                // closing one. Give up and throw an error.
+                                if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES, p, data, encoding );
+                                return 0;
+                        }
                         value += *p;
                         ++p;
 …
 #ifdef TIXML_USE_STL
+void TiXmlText::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
+{
+        if ( cdata )
+        {
+                int c = in->get();
+void TiXmlText::StreamIn( std::istream * in, TIXML_STRING * tag )
+{
+        while ( in->good() )
+        {
+                int c = in->peek();
+                if ( !cdata && (c == '<' ) )
+                {
+                        return;
+                }
                 if ( c <= 0 )
+                {
 …
                 (*tag) += (char) c;
+                if ( c == '>'
+                         && tag->at( tag->length() - 2 ) == ']'
+                         && tag->at( tag->length() - 3 ) == ']' )
+                {
+                        // All is well.
+                        return;
+                }
+        }
+        else
+        {
+                while ( in->good() )
+                {
+                        int c = in->peek();
+                        if ( c == '<' )
+                in->get();      // "commits" the peek made above
+                if ( cdata && c == '>' && tag->size() >= 3 ) {
+                        size_t len = tag->size();
+                        if ( (*tag)[len-2] == ']' && (*tag)[len-3] == ']' ) {
+                                // terminator of cdata.
                                 return;
+                        if ( c <= 0 )
+                        {
+                                TiXmlDocument* document = GetDocument();
+                                if ( document )
+                                        document->SetError( TIXML_ERROR_EMBEDDED_NULL, 0, 0, TIXML_ENCODING_UNKNOWN );
+                                return;
+                        }
+                        (*tag) += (char) c;
+                        in->get();
+                        }
+                }
+        }
 …
+                }
                 TIXML_STRING dummy;
+                TIXML_STRING dummy;
                 p = ReadText( p, &dummy, false, endTag, false, encoding );
                 return p;
 …
 #ifdef TIXML_USE_STL
 void TiXmlDeclaration::StreamIn( TIXML_ISTREAM * in, TIXML_STRING * tag )
+void TiXmlDeclaration::StreamIn( std::istream * in, TIXML_STRING * tag )
+{
         while ( in->good() )
 …
+                {
                         TiXmlAttribute attrib;
                         p = attrib.Parse( p, data, _encoding );
+                        p = attrib.Parse( p, data, _encoding );
                         version = attrib.Value();
+                }
 …
+                {
                         TiXmlAttribute attrib;
                         p = attrib.Parse( p, data, _encoding );
+                        p = attrib.Parse( p, data, _encoding );
                         encoding = attrib.Value();
+                }
 …
+                {
                         TiXmlAttribute attrib;
                         p = attrib.Parse( p, data, _encoding );
+                        p = attrib.Parse( p, data, _encoding );
                         standalone = attrib.Value();
+                }

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 738 for code/branches/FICN/src/tinyxml/tinyxmlparser.cc

Legend:

code/branches/FICN/src/tinyxml/tinyxmlparser.cc

Download in other formats: