0024129: Eliminate remaining compiler warnings in MSVC++ 2008 32 bit with warning...
[occt.git] / src / LDOM / LDOM_XmlReader.cxx
CommitLineData
b311480e 1// Created on: 2001-07-20
2// Created by: Alexander GRIGORIEV
3// Copyright (c) 2001-2012 OPEN CASCADE SAS
4//
5// The content of this file is subject to the Open CASCADE Technology Public
6// License Version 6.5 (the "License"). You may not use the content of this file
7// except in compliance with the License. Please obtain a copy of the License
8// at http://www.opencascade.org and read it completely before using this file.
9//
10// The Initial Developer of the Original Code is Open CASCADE S.A.S., having its
11// main offices at: 1, place des Freres Montgolfier, 78280 Guyancourt, France.
12//
13// The Original Code and all software distributed under the License is
14// distributed on an "AS IS" basis, without warranty of any kind, and the
15// Initial Developer hereby disclaims all such warranties, including without
16// limitation, any warranties of merchantability, fitness for a particular
17// purpose or non-infringement. Please see the License for the specific terms
18// and conditions governing the rights and limitations under the License.
19
20//AGV 060302: Input from istream
7fd59977 21// AGV 130302: bug corr: was error if strlen(root_elem_name) < 7
22
23#include <LDOM_XmlReader.hxx>
24#include <Standard_Stream.hxx>
25#include <LDOM_MemManager.hxx>
26#include <LDOM_BasicAttribute.hxx>
27#include <LDOM_CharReference.hxx>
28#include <LDOM_OSStream.hxx>
29
30#include <string.h>
31#include <errno.h>
32#ifdef WNT
33#include <io.h>
34#else
35#include <unistd.h>
36#endif
37
38//#include <ctype.h>
39
40const int XML_MIN_BUFFER = 10;
41const int MAX_ATTRIBUTES = 512;
42const int FILE_NONVALUE = -1;
43
44typedef enum {
45 STATE_WAITING = 0,
46 STATE_HEADER,
47 STATE_DOCTYPE,
48 STATE_DOCTYPE_MARKUP,
49 STATE_ELEMENT,
50 STATE_ELEMENT_END,
51 STATE_ATTRIBUTE_NAME,
52 STATE_ATTRIBUTE_EQUAL,
53 STATE_ATTRIBUTE_VALUE,
54 STATE_COMMENT,
55 STATE_CDATA,
56 STATE_TEXT
57} ParserState;
58
59#define TEXT_COMPARE(aPtr,aPattern) \
60 (memcmp ((aPtr), (aPattern), sizeof(aPattern) - 1) == 0)
61
62static Standard_Boolean isName (const char * aString,
63 const char * aStringEnd,
64 const char *& aNameEnd);
65
66//=======================================================================
67//function : LDOM_XmlReader()
68//purpose : Constructor (file descriptor)
69//=======================================================================
70
71LDOM_XmlReader::LDOM_XmlReader (const int aFileDes,
72 const Handle(LDOM_MemManager)& aDocument,
73 TCollection_AsciiString& anErrorString)
74 : myEOF (Standard_False),
75 myFileDes (aFileDes),
76#ifdef WNT
77 myIStream (cin), // one quirk of MSVC6.0: can't initialise by 0
78#else
79 myIStream (* (istream *) UndefinedHandleAddress),
80#endif
81 myError (anErrorString),
82 myDocument (aDocument),
83 myPtr (&myBuffer[0]),
84 myEndPtr (&myBuffer[0])
85{}
86
87//=======================================================================
88//function : LDOM_XmlReader()
89//purpose : Constructor (istream)
90//=======================================================================
91
92LDOM_XmlReader::LDOM_XmlReader (istream& anInput,
93 const Handle(LDOM_MemManager)& aDocument,
94 TCollection_AsciiString& anErrorString)
95 : myEOF (Standard_False),
96 myFileDes (FILE_NONVALUE),
97 myIStream (anInput),
98 myError (anErrorString),
99 myDocument (aDocument),
100 myPtr (&myBuffer[0]),
101 myEndPtr (&myBuffer[0])
102{}
103
104//=======================================================================
105//function : ReadRecord
106//purpose : Read a record from XML file
107//=======================================================================
108
109LDOM_XmlReader::RecordType LDOM_XmlReader::ReadRecord
110 (LDOM_OSStream& theData)
111{
112 theData.Clear();
113 myError.Clear();
114 ParserState aState = STATE_WAITING;
1d47d8d0 115 const char * aStartData = NULL, * aNameEnd = NULL, * aPtr;
7fd59977 116 LDOMBasicString anAttrName, anAttrValue;
117 char anAttDelimiter = '\0';
118
302f96fb 119 for(;;) {
7fd59977 120 // Check if the current file buffer is exhausted
121 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
122 // There should always be some bytes available in the buffer for analysis
123 Standard_Integer aBytesRest = myEndPtr - myPtr;
124 if (aBytesRest < XML_MIN_BUFFER) {
125 if (myEOF == Standard_True) {
126 if (aBytesRest <= 0)
127 break; // END of processing
128 } else {
129 // If we are reading some data, save the beginning and preserve the state
130 if (aStartData /* && aState != STATE_WAITING */) {
131 if (myPtr > aStartData)
132 theData.rdbuf()->sputn(aStartData, myPtr - aStartData);
133 aStartData = &myBuffer[0];
134 }
135 // Copy the rest of file data to the beginning of buffer
136 if (aBytesRest > 0)
137 memcpy (&myBuffer[0], myPtr, aBytesRest);
138
139 // Read the full buffer and reset start and end buffer pointers
140 myPtr = &myBuffer[0];
60be1f9b 141 Standard_Size aNBytes;
7fd59977 142 if (myFileDes != FILE_NONVALUE)
143 aNBytes = read (myFileDes, &myBuffer[aBytesRest],
144 XML_BUFFER_SIZE - aBytesRest);
145 else {
146 myIStream.read (&myBuffer[aBytesRest],
147 XML_BUFFER_SIZE - aBytesRest);
148 aNBytes = myIStream.gcount();
149 }
150 if (aNBytes == 0)
151 myEOF = Standard_True; // END-OF-FILE
152 myEndPtr = &myBuffer[aBytesRest + aNBytes];
153 myBuffer[aBytesRest + aNBytes] = '\0';
154 }
155 }
156
157 // Check the character data
158 switch (aState) {
159
160 // Checking the characters in STATE_WAITING (blank, TEXT or markup)
161 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
162 case STATE_WAITING:
163 switch (myPtr[0]) {
164 case ' ':
165 case '\t':
166 case '\n':
167 case '\r':
168 ++ myPtr;
169 continue;
170 case '<':
171 // XML markup found, then make detect the record type
172 switch (myPtr[1]) {
173 case '?':
174 aState = STATE_HEADER;
175 myPtr += 2;
176 aStartData = myPtr;
177 continue;
178 case '/':
179 aState = STATE_ELEMENT_END;
180 myPtr += 2;
181 aStartData = myPtr;
182 continue;
183 case '!':
184 if (myPtr[2] == '-' && myPtr[3] == '-') {
185 aState = STATE_COMMENT;
186 myPtr += 4;
187 } else if (TEXT_COMPARE (&myPtr[2], "DOCTYPE")) {
188 char ch = myPtr[9];
189 if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r')
190 break;
191 aState = STATE_DOCTYPE;
192 myPtr += 10;
193 } else if (TEXT_COMPARE (&myPtr[2], "[CDATA[")) {
194 aState = STATE_CDATA;
195 myPtr += 9;
196 } else break; // ERROR
197 aStartData = myPtr;
198 continue;
199 default:
200 if (::isName (&myPtr[1], myEndPtr, aNameEnd)) {
201 aStartData = myPtr + 1;
202 myPtr = aNameEnd;
203 if (myPtr < myEndPtr) {
204 myElement = & LDOM_BasicElement::Create (aStartData,
205 myPtr - aStartData,
206 myDocument);
207 myLastChild = NULL;
208 aState = STATE_ATTRIBUTE_NAME;
209 aStartData = NULL;
210 }else
211 aState = STATE_ELEMENT;
212 continue;
213 } // otherwise ERROR
214 } // end of switch
215 myError = "Unknown XML object: ";
216 myError += TCollection_AsciiString ((const Standard_CString)myPtr,
217 XML_MIN_BUFFER);
218 return XML_UNKNOWN;
219 case '\0':
220 if (myEOF == Standard_True) continue;
221 default:
222 // Limitation: we do not treat '&' as special character
223 aPtr = (const char *) memchr (myPtr, '<', myEndPtr - myPtr);
224 if (aPtr) {
225 // The end of text field reached
226 theData.rdbuf()->sputn(myPtr, aPtr - myPtr);
227 myPtr = aPtr;
228 return XML_TEXT;
229 }
230 aState = STATE_TEXT;
231 aStartData = myPtr;
232 myPtr = myEndPtr;
233 } // end of checking in STATE_WAITING
234 continue;
235
236 // Checking the characters in STATE_HEADER, seek for "?>" sequence
237 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
238 case STATE_HEADER:
239 aPtr = (const char *) memchr (aStartData, '?', (myEndPtr-1) - aStartData);
240 if (aPtr) {
241 // The end of XML declaration found
242 if (aPtr[1] != '>') { // ERROR
243 myError = "Character \'>\' is expected in the end of XML declaration";
244 return XML_UNKNOWN;
245 }
246 // The XML declaration is retrieved
247 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
248 myPtr = aPtr + 2;
249 return XML_HEADER;
250 }
251 myPtr = myEndPtr - 1;
252 continue;
253
254 // Checking the characters in STATE_DOCTYPE, seek for "]>" sequence
255 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
256 case STATE_DOCTYPE:
257 for (aPtr = aStartData; aPtr < myEndPtr-1; aPtr++) {
258 const int aChar = aPtr[0];
259 if (aChar == '[') {
260 aState = STATE_DOCTYPE_MARKUP;
261 aStartData = &aPtr[1];
262 goto state_doctype_markup;
263 }
264 if (aChar == '>') {
265 // The DOCTYPE declaration is retrieved
266 theData.rdbuf()->sputn(aStartData, aPtr - aStartData - 1);
267 myPtr = aPtr + 1;
268 return XML_DOCTYPE;
269 }
270 }
271 myPtr = myEndPtr - 1;
272 continue;
273
274 state_doctype_markup:
275 case STATE_DOCTYPE_MARKUP:
276 aPtr = (const char *) memchr (aStartData, ']', (myEndPtr-1) - aStartData);
277 if (aPtr) {
278 // The end of DOCTYPE declaration found
279 if (aPtr[1] != '>') { // ERROR
280 myError =
281 "Character \'>\' is expected in the end of DOCTYPE declaration";
282 return XML_UNKNOWN;
283 }
284 // The DOCTYPE declaration is retrieved
285 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
286 myPtr = aPtr + 2;
287 return XML_DOCTYPE;
288 }
289 myPtr = myEndPtr - 1;
290 continue;
291
292 // Checking the characters in STATE_COMMENT, seek for "-->" sequence
293 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
294 case STATE_COMMENT:
295 aPtr = aStartData;
302f96fb 296 for(;;) {
7fd59977 297 aPtr = (const char *) memchr (aPtr, '-', (myEndPtr - 2) - aPtr);
298 if (aPtr == NULL) break;
299 if (aPtr[1] != '-') ++ aPtr;
300 else {
301 if (aPtr[2] != '>') { // ERROR
302 myError = "Character \'>\' is expected in the end of comment";
303 return XML_UNKNOWN;
304 }
305 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
306 myPtr = aPtr + 3;
307 return XML_COMMENT;
308 }
309 }
310 myPtr = myEndPtr - 2;
311 continue;
312
313 // Checking the characters in STATE_TEXT, seek for "<"
314 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
315 case STATE_TEXT:
316 aPtr = (const char *) memchr (aStartData, '<', myEndPtr - aStartData);
317 if (aPtr) {
318 // The end of text field reached
319 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
320 myPtr = aPtr;
321 return XML_TEXT;
322 }
323 myPtr = myEndPtr;
324 continue;
325
326 // Checking the characters in STATE_CDATA, seek for "]]"
327 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
328 case STATE_CDATA:
329 aPtr = aStartData;
302f96fb 330 for(;;) {
7fd59977 331 aPtr = (const char *) memchr (aPtr, ']', (myEndPtr - 1) - aStartData);
332 if (aPtr == NULL) break;
333 if (aPtr[1] != ']') { // ERROR
334 myError = "Characters \']]\' are expected in the end of CDATA";
335 return XML_UNKNOWN;
336 }
337 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
338 myPtr = aPtr + 2;
339 return XML_CDATA;
340 }
341 myPtr = myEndPtr - 1;
342 continue;
343
344 // Checking the characters in STATE_ELEMENT, seek the end of TagName
345 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
346 case STATE_ELEMENT:
347 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
348 if (theData.Length() == 0 || aNameEnd != myPtr) {
349 myError = "Invalid tag name";
350 return XML_UNKNOWN;
351 }
352 {
353 theData.rdbuf()->sputn(aStartData, aNameEnd - aStartData);
354 char* aDataString = (char *)theData.str();
355 myElement = & LDOM_BasicElement::Create (aDataString, theData.Length(),
356 myDocument);
357 theData.Clear();
358 myLastChild = NULL;
359 delete [] aDataString;
360 aState = STATE_ATTRIBUTE_NAME;
361 aStartData = NULL;
362 myPtr = aNameEnd;
363 continue;
364 }
365 // Parsing a single attribute (STATE_ATTRIBUTE)
366 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
367 case STATE_ATTRIBUTE_NAME: // attribute name
368 switch (myPtr[0]) {
369 case ' ' :
370 case '\t':
371 case '\n':
372 case '\r':
373 if (aStartData) goto attr_name;
374 ++ myPtr;
375 continue;
376 case '/' :
377 if (aStartData)
378 myError = "Inexpected end of attribute";
379 else if (myPtr[1] != '>')
380 myError = "Improper element tag termination";
381 else {
382 myPtr += 2;
383#ifdef DEB
384 theData.Clear();
385 theData << myElement->GetTagName();
386#endif
387 return XML_FULL_ELEMENT;
388 }
389 return XML_UNKNOWN;
390 case '>' :
391 if (aStartData) {
392 myError = "Inexpected end of attribute";
393 return XML_UNKNOWN;
394 }
395 ++ myPtr;
396#ifdef DEB
397 theData.Clear();
398 theData << myElement->GetTagName();
399#endif
400 return XML_START_ELEMENT;
401 default :
402 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
403 if (theData.Length() == 0 || aNameEnd != myPtr) {
404 myError = "Invalid attribute name";
405 return XML_UNKNOWN;
406 }
407 if (aNameEnd >= myEndPtr)
408 aStartData = myPtr;
409 else {
410 if (theData.Length() == 0)
411 anAttrName = LDOMBasicString(myPtr, aNameEnd - myPtr, myDocument);
412 else {
413 theData.rdbuf()->sputn(myPtr, aNameEnd - myPtr);
414attr_name:
415 char* aDataString = (char *)theData.str();
416 theData.Clear();
417 anAttrName = LDOMBasicString (aDataString, myDocument);
418 delete [] aDataString;
419 }
420 aStartData = NULL;
421 aState = STATE_ATTRIBUTE_EQUAL;
422 }
423 myPtr = aNameEnd;
424 continue;
425 }
426 case STATE_ATTRIBUTE_EQUAL: // attribute 'equal' sign
427 switch (myPtr[0]) {
428 case '=' :
429 aState = STATE_ATTRIBUTE_VALUE;
430 case ' ' :
431 case '\t':
432 case '\n':
433 case '\r':
434 ++ myPtr;
435 continue;
436 default:
437 myError = "Equal sign expected in attribute definition";
438 return XML_UNKNOWN;
439 }
440
441 case STATE_ATTRIBUTE_VALUE: // attribute value
442 switch (myPtr[0]) {
443 case ' ' :
444 case '\t':
445 case '\n':
446 case '\r':
447 if (aStartData == NULL) {
448 ++ myPtr;
449 continue;
450 default:
451 if (anAttDelimiter == '\0') {
452 myError = "Expected an attribute value";
453 return XML_UNKNOWN;
454 case '\"':
455 case '\'':
456 if (aStartData == NULL) {
457 aStartData = &myPtr[1];
458 anAttDelimiter = myPtr[0];
459 }
460 }
461 }
462 // Limitation: we do not take into account that '<' and '&'
463 // are not allowed in attribute values
464 aPtr = (const char *) memchr (aStartData, anAttDelimiter,
465 myEndPtr - aStartData);
466 if (aPtr) {
467 (char&) aPtr[0] = '\0';
468 anAttDelimiter = '\0';
469 char * aDataString = (char *) aStartData;
470 const char * ePtr = aPtr;
471
472 // Append the end of the string to previously taken data
473 if (theData.Length() > 0) {
474 theData.rdbuf()->sputn(aStartData, aPtr-aStartData);
475 aDataString = (char *)theData.str();
476 ePtr = strchr (aDataString, '\0');
477 }
478
479 Standard_Integer aDataLen;
480 aDataString = LDOM_CharReference::Decode (aDataString, aDataLen);
481 if (IsDigit(aDataString[0])) {
482 if (getInteger (anAttrValue, aDataString, ePtr))
483 anAttrValue = LDOMBasicString (aDataString,aDataLen,myDocument);
484 } else
485 anAttrValue = LDOMBasicString (aDataString, aDataLen, myDocument);
486
487 if (theData.Length() > 0) {
488 theData.Clear();
489 delete [] aDataString;
490 }
491 // Create an attribute
492 myLastChild = myElement -> AddAttribute (anAttrName, anAttrValue,
493 myDocument, myLastChild);
494 myPtr = aPtr + 1;
495 aStartData = NULL;
496 aState = STATE_ATTRIBUTE_NAME;
497 } else
498 myPtr = myEndPtr;
499 continue;
500 }
501 // Checking the characters in STATE_ELEMENT_END, seek for ">"
502 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
503 case STATE_ELEMENT_END:
504 aPtr = (const char *) memchr (aStartData, '>', myEndPtr - aStartData);
505 if (aPtr) {
506 // The end of the end-element markup
507 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
508 myPtr = aPtr + 1;
509 return XML_END_ELEMENT;
510 }
511 myPtr = myEndPtr;
512 continue;
513 }
514 }
515 if (aState != STATE_WAITING) {
516 myError = "Unexpected end of file";
517 return XML_UNKNOWN;
518 }
519 return XML_EOF;
520}
521
522//=======================================================================
523//function : isName
524//type : static
525//purpose : Check if aString is a valid XML Name
526//=======================================================================
527
528static Standard_Boolean isName (const char * aString,
529 const char * aStringEnd,
530 const char *& aNameEnd)
531{
532 Standard_Boolean aResult;
302f96fb 533 char aCh = aString[0];
7fd59977 534 if (IsAlphabetic(aCh) || aCh == '_' || aCh == ':') {
535 const char * aPtr = &aString[1];
536 while (aPtr < aStringEnd) {
537 aCh = * aPtr;
538 switch (aCh) {
539 case ' ' :
540 case '\n':
541 case '\r':
542 case '\t':
543 case '=' :
544 case '\0':
545 case '/' :
546 case '>' :
547 aNameEnd = aPtr;
548 return Standard_True;
549 default:
550 if (IsAlphanumeric(aCh) == 0) {
551 aNameEnd = aPtr;
552 return Standard_False;
553 }
554 case '.' :
555 case '-' :
556 case '_' :
557 case ':' :
558 ++ aPtr;
559 }
560 }
561 aNameEnd = aPtr;
562 aResult = Standard_True;
563 } else {
564 aNameEnd = aString;
565 aResult = Standard_False;
566 }
567 return aResult;
568}
569
570//=======================================================================
571//function : getInteger
572//purpose : Try to initialize theValue as Integer; return False on success
573//=======================================================================
574
575Standard_Boolean LDOM_XmlReader::getInteger (LDOMBasicString& theValue,
576 const char * theStart,
577 const char * theEnd)
578{
579 char * ptr;
580 errno = 0;
581 if (theEnd - theStart == 1 || theStart[0] != '0')
582 {
583 long aResult = strtol (theStart, &ptr, 10);
584 if (ptr == theEnd && errno == 0)
585 {
586 theValue = Standard_Integer(aResult);
587 return Standard_False;
588 }
589 }
590 return Standard_True;
591}