1 // Created on: 2001-07-20
2 // Created by: Alexander GRIGORIEV
3 // Copyright (c) 2001-2012 OPEN CASCADE SAS
5 // The content of this file is subject to the Open CASCADE Technology Public
6 // License Version 6.5 (the "License"). You may not use the content of this file
7 // except in compliance with the License. Please obtain a copy of the License
8 // at http://www.opencascade.org and read it completely before using this file.
10 // The Initial Developer of the Original Code is Open CASCADE S.A.S., having its
11 // main offices at: 1, place des Freres Montgolfier, 78280 Guyancourt, France.
13 // The Original Code and all software distributed under the License is
14 // distributed on an "AS IS" basis, without warranty of any kind, and the
15 // Initial Developer hereby disclaims all such warranties, including without
16 // limitation, any warranties of merchantability, fitness for a particular
17 // purpose or non-infringement. Please see the License for the specific terms
18 // and conditions governing the rights and limitations under the License.
20 //AGV 060302: Input from istream
21 // AGV 130302: bug corr: was error if strlen(root_elem_name) < 7
23 #include <LDOM_XmlReader.hxx>
24 #include <Standard_Stream.hxx>
25 #include <LDOM_MemManager.hxx>
26 #include <LDOM_BasicAttribute.hxx>
27 #include <LDOM_CharReference.hxx>
28 #include <LDOM_OSStream.hxx>
40 const int XML_MIN_BUFFER = 10;
41 const int MAX_ATTRIBUTES = 512;
42 const int FILE_NONVALUE = -1;
52 STATE_ATTRIBUTE_EQUAL,
53 STATE_ATTRIBUTE_VALUE,
59 #define TEXT_COMPARE(aPtr,aPattern) \
60 (memcmp ((aPtr), (aPattern), sizeof(aPattern) - 1) == 0)
62 static Standard_Boolean isName (const char * aString,
63 const char * aStringEnd,
64 const char *& aNameEnd);
66 //=======================================================================
67 //function : LDOM_XmlReader()
68 //purpose : Constructor (file descriptor)
69 //=======================================================================
71 LDOM_XmlReader::LDOM_XmlReader (const int aFileDes,
72 const Handle(LDOM_MemManager)& aDocument,
73 TCollection_AsciiString& anErrorString)
74 : myEOF (Standard_False),
77 myIStream (cin), // one quirk of MSVC6.0: can't initialise by 0
79 myIStream (* (istream *) UndefinedHandleAddress),
81 myError (anErrorString),
82 myDocument (aDocument),
84 myEndPtr (&myBuffer[0])
87 //=======================================================================
88 //function : LDOM_XmlReader()
89 //purpose : Constructor (istream)
90 //=======================================================================
92 LDOM_XmlReader::LDOM_XmlReader (istream& anInput,
93 const Handle(LDOM_MemManager)& aDocument,
94 TCollection_AsciiString& anErrorString)
95 : myEOF (Standard_False),
96 myFileDes (FILE_NONVALUE),
98 myError (anErrorString),
99 myDocument (aDocument),
100 myPtr (&myBuffer[0]),
101 myEndPtr (&myBuffer[0])
104 //=======================================================================
105 //function : ReadRecord
106 //purpose : Read a record from XML file
107 //=======================================================================
109 LDOM_XmlReader::RecordType LDOM_XmlReader::ReadRecord
110 (LDOM_OSStream& theData)
114 ParserState aState = STATE_WAITING;
115 const char * aStartData = NULL, * aNameEnd, * aPtr;
116 LDOMBasicString anAttrName, anAttrValue;
117 char anAttDelimiter = '\0';
120 // Check if the current file buffer is exhausted
121 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
122 // There should always be some bytes available in the buffer for analysis
123 Standard_Integer aBytesRest = myEndPtr - myPtr;
124 if (aBytesRest < XML_MIN_BUFFER) {
125 if (myEOF == Standard_True) {
127 break; // END of processing
129 // If we are reading some data, save the beginning and preserve the state
130 if (aStartData /* && aState != STATE_WAITING */) {
131 if (myPtr > aStartData)
132 theData.rdbuf()->sputn(aStartData, myPtr - aStartData);
133 aStartData = &myBuffer[0];
135 // Copy the rest of file data to the beginning of buffer
137 memcpy (&myBuffer[0], myPtr, aBytesRest);
139 // Read the full buffer and reset start and end buffer pointers
140 myPtr = &myBuffer[0];
141 Standard_Integer aNBytes;
142 if (myFileDes != FILE_NONVALUE)
143 aNBytes = read (myFileDes, &myBuffer[aBytesRest],
144 XML_BUFFER_SIZE - aBytesRest);
146 myIStream.read (&myBuffer[aBytesRest],
147 XML_BUFFER_SIZE - aBytesRest);
148 aNBytes = myIStream.gcount();
151 myEOF = Standard_True; // END-OF-FILE
152 myEndPtr = &myBuffer[aBytesRest + aNBytes];
153 myBuffer[aBytesRest + aNBytes] = '\0';
157 // Check the character data
160 // Checking the characters in STATE_WAITING (blank, TEXT or markup)
161 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
171 // XML markup found, then make detect the record type
174 aState = STATE_HEADER;
179 aState = STATE_ELEMENT_END;
184 if (myPtr[2] == '-' && myPtr[3] == '-') {
185 aState = STATE_COMMENT;
187 } else if (TEXT_COMPARE (&myPtr[2], "DOCTYPE")) {
189 if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r')
191 aState = STATE_DOCTYPE;
193 } else if (TEXT_COMPARE (&myPtr[2], "[CDATA[")) {
194 aState = STATE_CDATA;
196 } else break; // ERROR
200 if (::isName (&myPtr[1], myEndPtr, aNameEnd)) {
201 aStartData = myPtr + 1;
203 if (myPtr < myEndPtr) {
204 myElement = & LDOM_BasicElement::Create (aStartData,
208 aState = STATE_ATTRIBUTE_NAME;
211 aState = STATE_ELEMENT;
215 myError = "Unknown XML object: ";
216 myError += TCollection_AsciiString ((const Standard_CString)myPtr,
220 if (myEOF == Standard_True) continue;
222 // Limitation: we do not treat '&' as special character
223 aPtr = (const char *) memchr (myPtr, '<', myEndPtr - myPtr);
225 // The end of text field reached
226 theData.rdbuf()->sputn(myPtr, aPtr - myPtr);
233 } // end of checking in STATE_WAITING
236 // Checking the characters in STATE_HEADER, seek for "?>" sequence
237 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
239 aPtr = (const char *) memchr (aStartData, '?', (myEndPtr-1) - aStartData);
241 // The end of XML declaration found
242 if (aPtr[1] != '>') { // ERROR
243 myError = "Character \'>\' is expected in the end of XML declaration";
246 // The XML declaration is retrieved
247 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
251 myPtr = myEndPtr - 1;
254 // Checking the characters in STATE_DOCTYPE, seek for "]>" sequence
255 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
257 for (aPtr = aStartData; aPtr < myEndPtr-1; aPtr++) {
258 const int aChar = aPtr[0];
260 aState = STATE_DOCTYPE_MARKUP;
261 aStartData = &aPtr[1];
262 goto state_doctype_markup;
265 // The DOCTYPE declaration is retrieved
266 theData.rdbuf()->sputn(aStartData, aPtr - aStartData - 1);
271 myPtr = myEndPtr - 1;
274 state_doctype_markup:
275 case STATE_DOCTYPE_MARKUP:
276 aPtr = (const char *) memchr (aStartData, ']', (myEndPtr-1) - aStartData);
278 // The end of DOCTYPE declaration found
279 if (aPtr[1] != '>') { // ERROR
281 "Character \'>\' is expected in the end of DOCTYPE declaration";
284 // The DOCTYPE declaration is retrieved
285 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
289 myPtr = myEndPtr - 1;
292 // Checking the characters in STATE_COMMENT, seek for "-->" sequence
293 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
297 aPtr = (const char *) memchr (aPtr, '-', (myEndPtr - 2) - aPtr);
298 if (aPtr == NULL) break;
299 if (aPtr[1] != '-') ++ aPtr;
301 if (aPtr[2] != '>') { // ERROR
302 myError = "Character \'>\' is expected in the end of comment";
305 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
310 myPtr = myEndPtr - 2;
313 // Checking the characters in STATE_TEXT, seek for "<"
314 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
316 aPtr = (const char *) memchr (aStartData, '<', myEndPtr - aStartData);
318 // The end of text field reached
319 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
326 // Checking the characters in STATE_CDATA, seek for "]]"
327 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
331 aPtr = (const char *) memchr (aPtr, ']', (myEndPtr - 1) - aStartData);
332 if (aPtr == NULL) break;
333 if (aPtr[1] != ']') { // ERROR
334 myError = "Characters \']]\' are expected in the end of CDATA";
337 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
341 myPtr = myEndPtr - 1;
344 // Checking the characters in STATE_ELEMENT, seek the end of TagName
345 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
347 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
348 if (theData.Length() == 0 || aNameEnd != myPtr) {
349 myError = "Invalid tag name";
353 theData.rdbuf()->sputn(aStartData, aNameEnd - aStartData);
354 char* aDataString = (char *)theData.str();
355 myElement = & LDOM_BasicElement::Create (aDataString, theData.Length(),
359 delete [] aDataString;
360 aState = STATE_ATTRIBUTE_NAME;
365 // Parsing a single attribute (STATE_ATTRIBUTE)
366 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
367 case STATE_ATTRIBUTE_NAME: // attribute name
373 if (aStartData) goto attr_name;
378 myError = "Inexpected end of attribute";
379 else if (myPtr[1] != '>')
380 myError = "Improper element tag termination";
385 theData << myElement->GetTagName();
387 return XML_FULL_ELEMENT;
392 myError = "Inexpected end of attribute";
398 theData << myElement->GetTagName();
400 return XML_START_ELEMENT;
402 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
403 if (theData.Length() == 0 || aNameEnd != myPtr) {
404 myError = "Invalid attribute name";
407 if (aNameEnd >= myEndPtr)
410 if (theData.Length() == 0)
411 anAttrName = LDOMBasicString(myPtr, aNameEnd - myPtr, myDocument);
413 theData.rdbuf()->sputn(myPtr, aNameEnd - myPtr);
415 char* aDataString = (char *)theData.str();
417 anAttrName = LDOMBasicString (aDataString, myDocument);
418 delete [] aDataString;
421 aState = STATE_ATTRIBUTE_EQUAL;
426 case STATE_ATTRIBUTE_EQUAL: // attribute 'equal' sign
429 aState = STATE_ATTRIBUTE_VALUE;
437 myError = "Equal sign expected in attribute definition";
441 case STATE_ATTRIBUTE_VALUE: // attribute value
447 if (aStartData == NULL) {
451 if (anAttDelimiter == '\0') {
452 myError = "Expected an attribute value";
456 if (aStartData == NULL) {
457 aStartData = &myPtr[1];
458 anAttDelimiter = myPtr[0];
462 // Limitation: we do not take into account that '<' and '&'
463 // are not allowed in attribute values
464 aPtr = (const char *) memchr (aStartData, anAttDelimiter,
465 myEndPtr - aStartData);
467 (char&) aPtr[0] = '\0';
468 anAttDelimiter = '\0';
469 char * aDataString = (char *) aStartData;
470 const char * ePtr = aPtr;
472 // Append the end of the string to previously taken data
473 if (theData.Length() > 0) {
474 theData.rdbuf()->sputn(aStartData, aPtr-aStartData);
475 aDataString = (char *)theData.str();
476 ePtr = strchr (aDataString, '\0');
479 Standard_Integer aDataLen;
480 aDataString = LDOM_CharReference::Decode (aDataString, aDataLen);
481 if (IsDigit(aDataString[0])) {
482 if (getInteger (anAttrValue, aDataString, ePtr))
483 anAttrValue = LDOMBasicString (aDataString,aDataLen,myDocument);
485 anAttrValue = LDOMBasicString (aDataString, aDataLen, myDocument);
487 if (theData.Length() > 0) {
489 delete [] aDataString;
491 // Create an attribute
492 myLastChild = myElement -> AddAttribute (anAttrName, anAttrValue,
493 myDocument, myLastChild);
496 aState = STATE_ATTRIBUTE_NAME;
501 // Checking the characters in STATE_ELEMENT_END, seek for ">"
502 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
503 case STATE_ELEMENT_END:
504 aPtr = (const char *) memchr (aStartData, '>', myEndPtr - aStartData);
506 // The end of the end-element markup
507 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
509 return XML_END_ELEMENT;
515 if (aState != STATE_WAITING) {
516 myError = "Unexpected end of file";
522 //=======================================================================
525 //purpose : Check if aString is a valid XML Name
526 //=======================================================================
528 static Standard_Boolean isName (const char * aString,
529 const char * aStringEnd,
530 const char *& aNameEnd)
532 Standard_Boolean aResult;
533 int aCh = aString[0];
534 if (IsAlphabetic(aCh) || aCh == '_' || aCh == ':') {
535 const char * aPtr = &aString[1];
536 while (aPtr < aStringEnd) {
548 return Standard_True;
550 if (IsAlphanumeric(aCh) == 0) {
552 return Standard_False;
562 aResult = Standard_True;
565 aResult = Standard_False;
570 //=======================================================================
571 //function : getInteger
572 //purpose : Try to initialize theValue as Integer; return False on success
573 //=======================================================================
575 Standard_Boolean LDOM_XmlReader::getInteger (LDOMBasicString& theValue,
576 const char * theStart,
581 if (theEnd - theStart == 1 || theStart[0] != '0')
583 long aResult = strtol (theStart, &ptr, 10);
584 if (ptr == theEnd && errno == 0)
586 theValue = Standard_Integer(aResult);
587 return Standard_False;
590 return Standard_True;