1 // Created on: 2001-07-20
2 // Created by: Alexander GRIGORIEV
3 // Copyright (c) 2001-2014 OPEN CASCADE SAS
5 // This file is part of Open CASCADE Technology software library.
7 // This library is free software; you can redistribute it and/or modify it under
8 // the terms of the GNU Lesser General Public License version 2.1 as published
9 // by the Free Software Foundation, with special exception defined in the file
10 // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
11 // distribution for complete text of the license and disclaimer of any warranty.
13 // Alternatively, this file may be used under the terms of Open CASCADE
14 // commercial license or contractual agreement.
16 //AGV 060302: Input from istream
17 // AGV 130302: bug corr: was error if strlen(root_elem_name) < 7
19 #include <LDOM_XmlReader.hxx>
20 #include <Standard_Stream.hxx>
21 #include <LDOM_MemManager.hxx>
22 #include <LDOM_BasicAttribute.hxx>
23 #include <LDOM_CharReference.hxx>
24 #include <LDOM_OSStream.hxx>
36 const int XML_MIN_BUFFER = 10;
37 const int FILE_NONVALUE = -1;
47 STATE_ATTRIBUTE_EQUAL,
48 STATE_ATTRIBUTE_VALUE,
54 #define TEXT_COMPARE(aPtr,aPattern) \
55 (memcmp ((aPtr), (aPattern), sizeof(aPattern) - 1) == 0)
57 static Standard_Boolean isName (const char * aString,
58 const char * aStringEnd,
59 const char *& aNameEnd);
61 //=======================================================================
62 //function : LDOM_XmlReader()
63 //purpose : Constructor (file descriptor)
64 //=======================================================================
66 LDOM_XmlReader::LDOM_XmlReader (const int theFileDes,
67 const Handle(LDOM_MemManager)& theDocument,
68 TCollection_AsciiString& theErrorString)
69 : myEOF (Standard_False),
70 myFileDes (theFileDes),
71 myIStream (cin), // just a placeholder, myIStream will never be used anyway
72 myError (theErrorString),
73 myDocument (theDocument),
77 myEndPtr (&myBuffer[0])
81 //=======================================================================
82 //function : LDOM_XmlReader()
83 //purpose : Constructor (istream)
84 //=======================================================================
86 LDOM_XmlReader::LDOM_XmlReader (istream& theInput,
87 const Handle(LDOM_MemManager)& theDocument,
88 TCollection_AsciiString& theErrorString)
89 : myEOF (Standard_False),
90 myFileDes (FILE_NONVALUE),
92 myError (theErrorString),
93 myDocument (theDocument),
97 myEndPtr (&myBuffer[0])
101 //=======================================================================
102 //function : ReadRecord
103 //purpose : Read a record from XML file
104 //=======================================================================
106 LDOM_XmlReader::RecordType LDOM_XmlReader::ReadRecord
107 (LDOM_OSStream& theData)
111 ParserState aState = STATE_WAITING;
112 const char * aStartData = NULL, * aNameEnd = NULL, * aPtr;
113 LDOMBasicString anAttrName, anAttrValue;
114 char anAttDelimiter = '\0';
117 // Check if the current file buffer is exhausted
118 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
119 // There should always be some bytes available in the buffer for analysis
120 Standard_Integer aBytesRest = (Standard_Integer)(myEndPtr - myPtr);
121 if (aBytesRest < XML_MIN_BUFFER) {
122 if (myEOF == Standard_True) {
124 break; // END of processing
126 // If we are reading some data, save the beginning and preserve the state
127 if (aStartData /* && aState != STATE_WAITING */) {
128 if (myPtr > aStartData)
129 theData.rdbuf()->sputn(aStartData, myPtr - aStartData);
130 aStartData = &myBuffer[0];
132 // Copy the rest of file data to the beginning of buffer
134 memcpy (&myBuffer[0], myPtr, aBytesRest);
136 // Read the full buffer and reset start and end buffer pointers
137 myPtr = &myBuffer[0];
138 Standard_Size aNBytes;
139 if (myFileDes != FILE_NONVALUE)
140 aNBytes = read (myFileDes, &myBuffer[aBytesRest],
141 XML_BUFFER_SIZE - aBytesRest);
143 myIStream.read (&myBuffer[aBytesRest],
144 XML_BUFFER_SIZE - aBytesRest);
145 aNBytes = (Standard_Size)myIStream.gcount();
148 myEOF = Standard_True; // END-OF-FILE
149 myEndPtr = &myBuffer[aBytesRest + aNBytes];
150 myBuffer[aBytesRest + aNBytes] = '\0';
154 // Check the character data
157 // Checking the characters in STATE_WAITING (blank, TEXT or markup)
158 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
168 // XML markup found, then make detect the record type
171 aState = STATE_HEADER;
176 aState = STATE_ELEMENT_END;
181 if (myPtr[2] == '-' && myPtr[3] == '-') {
182 aState = STATE_COMMENT;
184 } else if (TEXT_COMPARE (&myPtr[2], "DOCTYPE")) {
186 if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r')
188 aState = STATE_DOCTYPE;
190 } else if (TEXT_COMPARE (&myPtr[2], "[CDATA[")) {
191 aState = STATE_CDATA;
193 } else break; // ERROR
197 if (::isName (&myPtr[1], myEndPtr, aNameEnd)) {
198 aStartData = myPtr + 1;
200 if (myPtr < myEndPtr) {
201 myElement = & LDOM_BasicElement::Create (aStartData,
202 (Standard_Integer)(myPtr - aStartData),
205 aState = STATE_ATTRIBUTE_NAME;
208 aState = STATE_ELEMENT;
212 myError = "Unknown XML object: ";
213 myError += TCollection_AsciiString ((const Standard_CString)myPtr,
217 if (myEOF == Standard_True) continue;
219 // Limitation: we do not treat '&' as special character
220 aPtr = (const char *) memchr (myPtr, '<', myEndPtr - myPtr);
222 // The end of text field reached
223 theData.rdbuf()->sputn(myPtr, aPtr - myPtr);
230 } // end of checking in STATE_WAITING
233 // Checking the characters in STATE_HEADER, seek for "?>" sequence
234 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
236 aPtr = (const char *) memchr (aStartData, '?', (myEndPtr-1) - aStartData);
238 // The end of XML declaration found
239 if (aPtr[1] != '>') { // ERROR
240 myError = "Character \'>\' is expected in the end of XML declaration";
243 // The XML declaration is retrieved
244 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
248 myPtr = myEndPtr - 1;
251 // Checking the characters in STATE_DOCTYPE, seek for "]>" sequence
252 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
254 for (aPtr = aStartData; aPtr < myEndPtr-1; aPtr++) {
255 const int aChar = aPtr[0];
257 aState = STATE_DOCTYPE_MARKUP;
258 aStartData = &aPtr[1];
259 goto state_doctype_markup;
262 // The DOCTYPE declaration is retrieved
263 theData.rdbuf()->sputn(aStartData, aPtr - aStartData - 1);
268 myPtr = myEndPtr - 1;
271 state_doctype_markup:
272 case STATE_DOCTYPE_MARKUP:
273 aPtr = (const char *) memchr (aStartData, ']', (myEndPtr-1) - aStartData);
275 // The end of DOCTYPE declaration found
276 if (aPtr[1] != '>') { // ERROR
278 "Character \'>\' is expected in the end of DOCTYPE declaration";
281 // The DOCTYPE declaration is retrieved
282 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
286 myPtr = myEndPtr - 1;
289 // Checking the characters in STATE_COMMENT, seek for "-->" sequence
290 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
294 aPtr = (const char *) memchr (aPtr, '-', (myEndPtr - 2) - aPtr);
295 if (aPtr == NULL) break;
296 if (aPtr[1] != '-') ++ aPtr;
298 if (aPtr[2] != '>') { // ERROR
299 myError = "Character \'>\' is expected in the end of comment";
302 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
307 myPtr = myEndPtr - 2;
310 // Checking the characters in STATE_TEXT, seek for "<"
311 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
313 aPtr = (const char *) memchr (aStartData, '<', myEndPtr - aStartData);
315 // The end of text field reached
316 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
323 // Checking the characters in STATE_CDATA, seek for "]]"
324 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
328 aPtr = (const char *) memchr (aPtr, ']', (myEndPtr - 1) - aStartData);
329 if (aPtr == NULL) break;
330 if (aPtr[1] != ']') { // ERROR
331 myError = "Characters \']]\' are expected in the end of CDATA";
334 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
338 myPtr = myEndPtr - 1;
341 // Checking the characters in STATE_ELEMENT, seek the end of TagName
342 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
344 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
345 if (theData.Length() == 0 || aNameEnd != myPtr) {
346 myError = "Invalid tag name";
350 theData.rdbuf()->sputn(aStartData, aNameEnd - aStartData);
351 char* aDataString = (char *)theData.str();
352 myElement = & LDOM_BasicElement::Create (aDataString, theData.Length(),
356 delete [] aDataString;
357 aState = STATE_ATTRIBUTE_NAME;
362 // Parsing a single attribute (STATE_ATTRIBUTE)
363 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
364 case STATE_ATTRIBUTE_NAME: // attribute name
370 if (aStartData) goto attr_name;
375 myError = "Inexpected end of attribute";
376 else if (myPtr[1] != '>')
377 myError = "Improper element tag termination";
382 theData << myElement->GetTagName();
384 return XML_FULL_ELEMENT;
389 myError = "Inexpected end of attribute";
395 theData << myElement->GetTagName();
397 return XML_START_ELEMENT;
399 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
400 if (theData.Length() == 0 || aNameEnd != myPtr) {
401 myError = "Invalid attribute name";
404 if (aNameEnd >= myEndPtr)
407 if (theData.Length() == 0)
408 anAttrName = LDOMBasicString(myPtr, (Standard_Integer)(aNameEnd - myPtr), myDocument);
410 theData.rdbuf()->sputn(myPtr, aNameEnd - myPtr);
412 char* aDataString = (char *)theData.str();
414 anAttrName = LDOMBasicString (aDataString, myDocument);
415 delete [] aDataString;
418 aState = STATE_ATTRIBUTE_EQUAL;
423 case STATE_ATTRIBUTE_EQUAL: // attribute 'equal' sign
426 aState = STATE_ATTRIBUTE_VALUE;
434 myError = "Equal sign expected in attribute definition";
438 case STATE_ATTRIBUTE_VALUE: // attribute value
444 if (aStartData == NULL) {
448 if (anAttDelimiter == '\0') {
449 myError = "Expected an attribute value";
453 if (aStartData == NULL) {
454 aStartData = &myPtr[1];
455 anAttDelimiter = myPtr[0];
459 // Limitation: we do not take into account that '<' and '&'
460 // are not allowed in attribute values
461 aPtr = (const char *) memchr (aStartData, anAttDelimiter,
462 myEndPtr - aStartData);
464 (char&) aPtr[0] = '\0';
465 anAttDelimiter = '\0';
466 char * aDataString = (char *) aStartData;
467 const char * ePtr = aPtr;
469 // Append the end of the string to previously taken data
470 if (theData.Length() > 0) {
471 theData.rdbuf()->sputn(aStartData, aPtr-aStartData);
472 aDataString = (char *)theData.str();
473 ePtr = strchr (aDataString, '\0');
476 Standard_Integer aDataLen;
477 aDataString = LDOM_CharReference::Decode (aDataString, aDataLen);
478 if (IsDigit(aDataString[0])) {
479 if (getInteger (anAttrValue, aDataString, ePtr))
480 anAttrValue = LDOMBasicString (aDataString,aDataLen,myDocument);
482 anAttrValue = LDOMBasicString (aDataString, aDataLen, myDocument);
484 if (theData.Length() > 0) {
486 delete [] aDataString;
488 // Create an attribute
489 myLastChild = myElement -> AddAttribute (anAttrName, anAttrValue,
490 myDocument, myLastChild);
493 aState = STATE_ATTRIBUTE_NAME;
498 // Checking the characters in STATE_ELEMENT_END, seek for ">"
499 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
500 case STATE_ELEMENT_END:
501 aPtr = (const char *) memchr (aStartData, '>', myEndPtr - aStartData);
503 // The end of the end-element markup
504 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
506 return XML_END_ELEMENT;
512 if (aState != STATE_WAITING) {
513 myError = "Unexpected end of file";
519 //=======================================================================
522 //purpose : Check if aString is a valid XML Name
523 //=======================================================================
525 static Standard_Boolean isName (const char * aString,
526 const char * aStringEnd,
527 const char *& aNameEnd)
529 Standard_Boolean aResult;
530 char aCh = aString[0];
531 if (IsAlphabetic(aCh) || aCh == '_' || aCh == ':') {
532 const char * aPtr = &aString[1];
533 while (aPtr < aStringEnd) {
545 return Standard_True;
547 if (IsAlphanumeric(aCh) == 0) {
549 return Standard_False;
559 aResult = Standard_True;
562 aResult = Standard_False;
567 //=======================================================================
568 //function : getInteger
569 //purpose : Try to initialize theValue as Integer; return False on success
570 //=======================================================================
572 Standard_Boolean LDOM_XmlReader::getInteger (LDOMBasicString& theValue,
573 const char * theStart,
578 if (theEnd - theStart == 1 || theStart[0] != '0')
580 long aResult = strtol (theStart, &ptr, 10);
581 if (ptr == theEnd && errno == 0)
583 theValue = Standard_Integer(aResult);
584 return Standard_False;
587 return Standard_True;