1 // Created on: 2001-07-20
2 // Created by: Alexander GRIGORIEV
3 // Copyright (c) 2001-2014 OPEN CASCADE SAS
5 // This file is part of Open CASCADE Technology software library.
7 // This library is free software; you can redistribute it and/or modify it under
8 // the terms of the GNU Lesser General Public License version 2.1 as published
9 // by the Free Software Foundation, with special exception defined in the file
10 // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
11 // distribution for complete text of the license and disclaimer of any warranty.
13 // Alternatively, this file may be used under the terms of Open CASCADE
14 // commercial license or contractual agreement.
16 //AGV 060302: Input from std::istream
17 // AGV 130302: bug corr: was error if strlen(root_elem_name) < 7
19 #include <LDOM_XmlReader.hxx>
20 #include <Standard_Stream.hxx>
21 #include <LDOM_MemManager.hxx>
22 #include <LDOM_BasicAttribute.hxx>
23 #include <LDOM_CharReference.hxx>
24 #include <LDOM_OSStream.hxx>
36 const int XML_MIN_BUFFER = 10;
46 STATE_ATTRIBUTE_EQUAL,
47 STATE_ATTRIBUTE_VALUE,
53 #define TEXT_COMPARE(aPtr,aPattern) \
54 (memcmp ((aPtr), (aPattern), sizeof(aPattern) - 1) == 0)
56 static Standard_Boolean isName (const char * aString,
57 const char * aStringEnd,
58 const char *& aNameEnd);
60 //=======================================================================
61 //function : LDOM_XmlReader()
62 //purpose : Constructor (file descriptor)
63 //=======================================================================
65 LDOM_XmlReader::LDOM_XmlReader (
66 const Handle(LDOM_MemManager)& theDocument,
67 TCollection_AsciiString& theErrorString,
68 const Standard_Boolean theTagPerStep)
69 : myEOF (Standard_False),
70 myError (theErrorString),
71 myDocument (theDocument),
75 myEndPtr (&myBuffer[0]),
76 myTagPerStep (theTagPerStep)
80 //=======================================================================
81 //function : ReadRecord
82 //purpose : Read a record from XML file
83 //=======================================================================
85 LDOM_XmlReader::RecordType LDOM_XmlReader::ReadRecord (Standard_IStream& theIStream,
86 LDOM_OSStream& theData)
90 ParserState aState = STATE_WAITING;
91 const char * aStartData = NULL, * aNameEnd = NULL, * aPtr;
92 LDOMBasicString anAttrName, anAttrValue;
93 char anAttDelimiter = '\0';
94 Standard_Boolean aHasRead = Standard_False;
97 // Check if the current file buffer is exhausted
98 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
99 // There should always be some bytes available in the buffer for analysis
100 Standard_Integer aBytesRest = (Standard_Integer)(myEndPtr - myPtr);
101 if (aBytesRest < XML_MIN_BUFFER)
103 if (myEOF == Standard_True)
106 break; // END of processing
108 else if (myTagPerStep && aHasRead)
110 // in myTagPerStep mode, we should parse the buffer to the end before
111 // getting more characters from the stream.
115 // If we are reading some data, save the beginning and preserve the state
116 if (aStartData /* && aState != STATE_WAITING */) {
117 if (myPtr > aStartData)
118 theData.rdbuf()->sputn(aStartData, myPtr - aStartData);
119 aStartData = &myBuffer[0];
121 // Copy the rest of file data to the beginning of buffer
123 memcpy (&myBuffer[0], myPtr, aBytesRest);
125 // Read the full buffer and reset start and end buffer pointers
126 myPtr = &myBuffer[0];
127 Standard_Size aNBytes;
131 theIStream.getline (&myBuffer[aBytesRest], XML_BUFFER_SIZE - aBytesRest, '>');
132 aHasRead = Standard_True;
136 theIStream.read (&myBuffer[aBytesRest], XML_BUFFER_SIZE - aBytesRest);
138 aNBytes = (Standard_Size)theIStream.gcount();
142 myEOF = Standard_True; // END-OF-FILE
144 else if (myTagPerStep)
146 // replace \0 (being inserted by getline method) with >
147 myBuffer[aBytesRest + aNBytes - 1] = '>';
149 myEndPtr = &myBuffer[aBytesRest + aNBytes];
150 myBuffer[aBytesRest + aNBytes] = '\0';
154 // Check the character data
157 // Checking the characters in STATE_WAITING (blank, TEXT or markup)
158 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
168 // XML markup found, then make detect the record type
171 aState = STATE_HEADER;
176 aState = STATE_ELEMENT_END;
181 if (myPtr[2] == '-' && myPtr[3] == '-') {
182 aState = STATE_COMMENT;
184 } else if (TEXT_COMPARE (&myPtr[2], "DOCTYPE")) {
186 if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r')
188 aState = STATE_DOCTYPE;
190 } else if (TEXT_COMPARE (&myPtr[2], "[CDATA[")) {
191 aState = STATE_CDATA;
193 } else break; // ERROR
197 if (::isName (&myPtr[1], myEndPtr, aNameEnd)) {
198 aStartData = myPtr + 1;
200 if (myPtr < myEndPtr) {
201 myElement = & LDOM_BasicElement::Create (aStartData,
202 (Standard_Integer)(myPtr - aStartData),
205 aState = STATE_ATTRIBUTE_NAME;
208 aState = STATE_ELEMENT;
212 myError = "Unknown XML object: ";
213 myError += TCollection_AsciiString (myPtr, XML_MIN_BUFFER);
216 if (myEOF == Standard_True) continue;
219 // Limitation: we do not treat '&' as special character
220 aPtr = (const char *) memchr (myPtr, '<', myEndPtr - myPtr);
222 // The end of text field reached
223 theData.rdbuf()->sputn(myPtr, aPtr - myPtr);
230 aHasRead = Standard_False;
231 } // end of checking in STATE_WAITING
234 // Checking the characters in STATE_HEADER, seek for "?>" sequence
235 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
237 aPtr = (const char *) memchr (aStartData, '?', (myEndPtr-1) - aStartData);
239 // The end of XML declaration found
240 if (aPtr[1] != '>') { // ERROR
241 myError = "Character \'>\' is expected in the end of XML declaration";
244 // The XML declaration is retrieved
245 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
249 myPtr = myEndPtr - 1;
250 aHasRead = Standard_False;
253 // Checking the characters in STATE_DOCTYPE, seek for "]>" sequence
254 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
256 for (aPtr = aStartData; aPtr < myEndPtr-1; aPtr++) {
257 const int aChar = aPtr[0];
259 aState = STATE_DOCTYPE_MARKUP;
260 aStartData = &aPtr[1];
261 goto state_doctype_markup;
264 // The DOCTYPE declaration is retrieved
265 theData.rdbuf()->sputn(aStartData, aPtr - aStartData - 1);
270 myPtr = myEndPtr - 1;
271 aHasRead = Standard_False;
274 state_doctype_markup:
275 case STATE_DOCTYPE_MARKUP:
276 aPtr = (const char *) memchr (aStartData, ']', (myEndPtr-1) - aStartData);
278 // The end of DOCTYPE declaration found
279 if (aPtr[1] != '>') { // ERROR
281 "Character \'>\' is expected in the end of DOCTYPE declaration";
284 // The DOCTYPE declaration is retrieved
285 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
289 myPtr = myEndPtr - 1;
290 aHasRead = Standard_False;
293 // Checking the characters in STATE_COMMENT, seek for "-->" sequence
294 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
298 aPtr = (const char *) memchr (aPtr, '-', (myEndPtr - 2) - aPtr);
299 if (aPtr == NULL) break;
300 if (aPtr[1] != '-') ++ aPtr;
302 if (aPtr[2] != '>') { // ERROR
303 myError = "Character \'>\' is expected in the end of comment";
306 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
311 myPtr = myEndPtr - 2;
312 aHasRead = Standard_False;
315 // Checking the characters in STATE_TEXT, seek for "<"
316 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
318 aPtr = (const char *) memchr (aStartData, '<', myEndPtr - aStartData);
320 // The end of text field reached
321 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
326 aHasRead = Standard_False;
329 // Checking the characters in STATE_CDATA, seek for "]]"
330 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
334 aPtr = (const char *) memchr (aPtr, ']', (myEndPtr - 1) - aStartData);
335 if (aPtr == NULL) break;
336 if (aPtr[1] != ']') { // ERROR
337 myError = "Characters \']]\' are expected in the end of CDATA";
340 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
344 myPtr = myEndPtr - 1;
345 aHasRead = Standard_False;
348 // Checking the characters in STATE_ELEMENT, seek the end of TagName
349 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
351 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
352 if (theData.Length() == 0 || aNameEnd != myPtr) {
353 myError = "Invalid tag name";
357 theData.rdbuf()->sputn(aStartData, aNameEnd - aStartData);
358 char* aDataString = (char *)theData.str();
359 myElement = & LDOM_BasicElement::Create (aDataString, theData.Length(),
363 delete [] aDataString;
364 aState = STATE_ATTRIBUTE_NAME;
369 // Parsing a single attribute (STATE_ATTRIBUTE)
370 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
371 case STATE_ATTRIBUTE_NAME: // attribute name
377 if (aStartData) goto attr_name;
382 myError = "Inexpected end of attribute";
383 else if (myPtr[1] != '>')
384 myError = "Improper element tag termination";
389 theData << myElement->GetTagName();
391 return XML_FULL_ELEMENT;
396 myError = "Inexpected end of attribute";
402 theData << myElement->GetTagName();
404 return XML_START_ELEMENT;
406 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
407 if (theData.Length() == 0 || aNameEnd != myPtr) {
408 myError = "Invalid attribute name";
411 if (aNameEnd >= myEndPtr)
414 if (theData.Length() == 0)
415 anAttrName = LDOMBasicString(myPtr, (Standard_Integer)(aNameEnd - myPtr), myDocument);
417 theData.rdbuf()->sputn(myPtr, aNameEnd - myPtr);
419 char* aDataString = (char *)theData.str();
421 anAttrName = LDOMBasicString (aDataString, myDocument);
422 delete [] aDataString;
425 aState = STATE_ATTRIBUTE_EQUAL;
430 case STATE_ATTRIBUTE_EQUAL: // attribute 'equal' sign
433 aState = STATE_ATTRIBUTE_VALUE;
442 myError = "Equal sign expected in attribute definition";
446 case STATE_ATTRIBUTE_VALUE: // attribute value
452 if (aStartData == NULL) {
456 if (anAttDelimiter == '\0') {
457 myError = "Expected an attribute value";
461 if (aStartData == NULL) {
462 aStartData = &myPtr[1];
463 anAttDelimiter = myPtr[0];
467 // Limitation: we do not take into account that '<' and '&'
468 // are not allowed in attribute values
469 aPtr = (const char *) memchr (aStartData, anAttDelimiter,
470 myEndPtr - aStartData);
472 (char&) aPtr[0] = '\0';
473 anAttDelimiter = '\0';
474 char * aDataString = (char *) aStartData;
475 const char * ePtr = aPtr;
477 // Append the end of the string to previously taken data
478 if (theData.Length() > 0) {
479 theData.rdbuf()->sputn(aStartData, aPtr-aStartData);
480 aDataString = (char *)theData.str();
481 ePtr = strchr (aDataString, '\0');
484 Standard_Integer aDataLen;
485 aDataString = LDOM_CharReference::Decode (aDataString, aDataLen);
486 if (IsDigit(aDataString[0])) {
487 if (getInteger (anAttrValue, aDataString, ePtr))
488 anAttrValue = LDOMBasicString (aDataString,aDataLen,myDocument);
490 anAttrValue = LDOMBasicString (aDataString, aDataLen, myDocument);
492 if (theData.Length() > 0) {
494 delete [] aDataString;
496 // Create an attribute
497 myLastChild = myElement -> AddAttribute (anAttrName, anAttrValue,
498 myDocument, myLastChild);
501 aState = STATE_ATTRIBUTE_NAME;
505 aHasRead = Standard_False;
509 // Checking the characters in STATE_ELEMENT_END, seek for ">"
510 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
511 case STATE_ELEMENT_END:
512 aPtr = (const char *) memchr (aStartData, '>', myEndPtr - aStartData);
514 // The end of the end-element markup
515 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
517 return XML_END_ELEMENT;
520 aHasRead = Standard_False;
524 if (aState != STATE_WAITING) {
525 myError = "Unexpected end of file";
531 //=======================================================================
534 //purpose : Check if aString is a valid XML Name
535 //=======================================================================
537 static Standard_Boolean isName (const char * aString,
538 const char * aStringEnd,
539 const char *& aNameEnd)
541 Standard_Boolean aResult;
542 char aCh = aString[0];
543 if (IsAlphabetic(aCh) || aCh == '_' || aCh == ':') {
544 const char * aPtr = &aString[1];
545 while (aPtr < aStringEnd) {
557 return Standard_True;
559 if (IsAlphanumeric(aCh) == 0) {
561 return Standard_False;
572 aResult = Standard_True;
575 aResult = Standard_False;
580 //=======================================================================
581 //function : CreateElement
583 //=======================================================================
584 void LDOM_XmlReader::CreateElement( const char *theName, const Standard_Integer theLen )
586 myElement = &LDOM_BasicElement::Create (theName, theLen, myDocument);
589 //=======================================================================
590 //function : getInteger
591 //purpose : Try to initialize theValue as Integer; return False on success
592 //=======================================================================
594 Standard_Boolean LDOM_XmlReader::getInteger (LDOMBasicString& theValue,
595 const char * theStart,
600 if (theEnd - theStart == 1 || theStart[0] != '0')
602 long aResult = strtol (theStart, &ptr, 10);
603 if (ptr == theEnd && errno == 0)
605 theValue = Standard_Integer(aResult);
606 return Standard_False;
609 return Standard_True;