0024284: Some trivial warnings produced by ICC 14
[occt.git] / src / LDOM / LDOM_XmlReader.cxx
CommitLineData
b311480e 1// Created on: 2001-07-20
2// Created by: Alexander GRIGORIEV
3// Copyright (c) 2001-2012 OPEN CASCADE SAS
4//
5// The content of this file is subject to the Open CASCADE Technology Public
6// License Version 6.5 (the "License"). You may not use the content of this file
7// except in compliance with the License. Please obtain a copy of the License
8// at http://www.opencascade.org and read it completely before using this file.
9//
10// The Initial Developer of the Original Code is Open CASCADE S.A.S., having its
11// main offices at: 1, place des Freres Montgolfier, 78280 Guyancourt, France.
12//
13// The Original Code and all software distributed under the License is
14// distributed on an "AS IS" basis, without warranty of any kind, and the
15// Initial Developer hereby disclaims all such warranties, including without
16// limitation, any warranties of merchantability, fitness for a particular
17// purpose or non-infringement. Please see the License for the specific terms
18// and conditions governing the rights and limitations under the License.
19
20//AGV 060302: Input from istream
7fd59977 21// AGV 130302: bug corr: was error if strlen(root_elem_name) < 7
22
23#include <LDOM_XmlReader.hxx>
24#include <Standard_Stream.hxx>
25#include <LDOM_MemManager.hxx>
26#include <LDOM_BasicAttribute.hxx>
27#include <LDOM_CharReference.hxx>
28#include <LDOM_OSStream.hxx>
29
30#include <string.h>
31#include <errno.h>
32#ifdef WNT
33#include <io.h>
34#else
35#include <unistd.h>
36#endif
37
38//#include <ctype.h>
39
40const int XML_MIN_BUFFER = 10;
7fd59977 41const int FILE_NONVALUE = -1;
42
43typedef enum {
44 STATE_WAITING = 0,
45 STATE_HEADER,
46 STATE_DOCTYPE,
47 STATE_DOCTYPE_MARKUP,
48 STATE_ELEMENT,
49 STATE_ELEMENT_END,
50 STATE_ATTRIBUTE_NAME,
51 STATE_ATTRIBUTE_EQUAL,
52 STATE_ATTRIBUTE_VALUE,
53 STATE_COMMENT,
54 STATE_CDATA,
55 STATE_TEXT
56} ParserState;
57
58#define TEXT_COMPARE(aPtr,aPattern) \
59 (memcmp ((aPtr), (aPattern), sizeof(aPattern) - 1) == 0)
60
61static Standard_Boolean isName (const char * aString,
62 const char * aStringEnd,
63 const char *& aNameEnd);
64
65//=======================================================================
66//function : LDOM_XmlReader()
67//purpose : Constructor (file descriptor)
68//=======================================================================
69
70LDOM_XmlReader::LDOM_XmlReader (const int aFileDes,
71 const Handle(LDOM_MemManager)& aDocument,
72 TCollection_AsciiString& anErrorString)
73 : myEOF (Standard_False),
74 myFileDes (aFileDes),
75#ifdef WNT
76 myIStream (cin), // one quirk of MSVC6.0: can't initialise by 0
77#else
78 myIStream (* (istream *) UndefinedHandleAddress),
79#endif
80 myError (anErrorString),
81 myDocument (aDocument),
82 myPtr (&myBuffer[0]),
83 myEndPtr (&myBuffer[0])
84{}
85
86//=======================================================================
87//function : LDOM_XmlReader()
88//purpose : Constructor (istream)
89//=======================================================================
90
91LDOM_XmlReader::LDOM_XmlReader (istream& anInput,
92 const Handle(LDOM_MemManager)& aDocument,
93 TCollection_AsciiString& anErrorString)
94 : myEOF (Standard_False),
95 myFileDes (FILE_NONVALUE),
96 myIStream (anInput),
97 myError (anErrorString),
98 myDocument (aDocument),
99 myPtr (&myBuffer[0]),
100 myEndPtr (&myBuffer[0])
101{}
102
103//=======================================================================
104//function : ReadRecord
105//purpose : Read a record from XML file
106//=======================================================================
107
108LDOM_XmlReader::RecordType LDOM_XmlReader::ReadRecord
109 (LDOM_OSStream& theData)
110{
111 theData.Clear();
112 myError.Clear();
113 ParserState aState = STATE_WAITING;
1d47d8d0 114 const char * aStartData = NULL, * aNameEnd = NULL, * aPtr;
7fd59977 115 LDOMBasicString anAttrName, anAttrValue;
116 char anAttDelimiter = '\0';
117
302f96fb 118 for(;;) {
7fd59977 119 // Check if the current file buffer is exhausted
120 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
121 // There should always be some bytes available in the buffer for analysis
7dc9e047 122 Standard_Integer aBytesRest = (Standard_Integer)(myEndPtr - myPtr);
7fd59977 123 if (aBytesRest < XML_MIN_BUFFER) {
124 if (myEOF == Standard_True) {
125 if (aBytesRest <= 0)
126 break; // END of processing
127 } else {
128 // If we are reading some data, save the beginning and preserve the state
129 if (aStartData /* && aState != STATE_WAITING */) {
130 if (myPtr > aStartData)
131 theData.rdbuf()->sputn(aStartData, myPtr - aStartData);
132 aStartData = &myBuffer[0];
133 }
134 // Copy the rest of file data to the beginning of buffer
135 if (aBytesRest > 0)
136 memcpy (&myBuffer[0], myPtr, aBytesRest);
137
138 // Read the full buffer and reset start and end buffer pointers
139 myPtr = &myBuffer[0];
60be1f9b 140 Standard_Size aNBytes;
7fd59977 141 if (myFileDes != FILE_NONVALUE)
142 aNBytes = read (myFileDes, &myBuffer[aBytesRest],
143 XML_BUFFER_SIZE - aBytesRest);
144 else {
145 myIStream.read (&myBuffer[aBytesRest],
146 XML_BUFFER_SIZE - aBytesRest);
105aae76 147 aNBytes = (Standard_Size)myIStream.gcount();
7fd59977 148 }
149 if (aNBytes == 0)
150 myEOF = Standard_True; // END-OF-FILE
151 myEndPtr = &myBuffer[aBytesRest + aNBytes];
152 myBuffer[aBytesRest + aNBytes] = '\0';
153 }
154 }
155
156 // Check the character data
157 switch (aState) {
158
159 // Checking the characters in STATE_WAITING (blank, TEXT or markup)
160 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
161 case STATE_WAITING:
162 switch (myPtr[0]) {
163 case ' ':
164 case '\t':
165 case '\n':
166 case '\r':
167 ++ myPtr;
168 continue;
169 case '<':
170 // XML markup found, then make detect the record type
171 switch (myPtr[1]) {
172 case '?':
173 aState = STATE_HEADER;
174 myPtr += 2;
175 aStartData = myPtr;
176 continue;
177 case '/':
178 aState = STATE_ELEMENT_END;
179 myPtr += 2;
180 aStartData = myPtr;
181 continue;
182 case '!':
183 if (myPtr[2] == '-' && myPtr[3] == '-') {
184 aState = STATE_COMMENT;
185 myPtr += 4;
186 } else if (TEXT_COMPARE (&myPtr[2], "DOCTYPE")) {
187 char ch = myPtr[9];
188 if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r')
189 break;
190 aState = STATE_DOCTYPE;
191 myPtr += 10;
192 } else if (TEXT_COMPARE (&myPtr[2], "[CDATA[")) {
193 aState = STATE_CDATA;
194 myPtr += 9;
195 } else break; // ERROR
196 aStartData = myPtr;
197 continue;
198 default:
199 if (::isName (&myPtr[1], myEndPtr, aNameEnd)) {
200 aStartData = myPtr + 1;
201 myPtr = aNameEnd;
202 if (myPtr < myEndPtr) {
203 myElement = & LDOM_BasicElement::Create (aStartData,
7dc9e047 204 (Standard_Integer)(myPtr - aStartData),
7fd59977 205 myDocument);
206 myLastChild = NULL;
207 aState = STATE_ATTRIBUTE_NAME;
208 aStartData = NULL;
209 }else
210 aState = STATE_ELEMENT;
211 continue;
212 } // otherwise ERROR
213 } // end of switch
214 myError = "Unknown XML object: ";
215 myError += TCollection_AsciiString ((const Standard_CString)myPtr,
216 XML_MIN_BUFFER);
217 return XML_UNKNOWN;
218 case '\0':
219 if (myEOF == Standard_True) continue;
220 default:
221 // Limitation: we do not treat '&' as special character
222 aPtr = (const char *) memchr (myPtr, '<', myEndPtr - myPtr);
223 if (aPtr) {
224 // The end of text field reached
225 theData.rdbuf()->sputn(myPtr, aPtr - myPtr);
226 myPtr = aPtr;
227 return XML_TEXT;
228 }
229 aState = STATE_TEXT;
230 aStartData = myPtr;
231 myPtr = myEndPtr;
232 } // end of checking in STATE_WAITING
233 continue;
234
235 // Checking the characters in STATE_HEADER, seek for "?>" sequence
236 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
237 case STATE_HEADER:
238 aPtr = (const char *) memchr (aStartData, '?', (myEndPtr-1) - aStartData);
239 if (aPtr) {
240 // The end of XML declaration found
241 if (aPtr[1] != '>') { // ERROR
242 myError = "Character \'>\' is expected in the end of XML declaration";
243 return XML_UNKNOWN;
244 }
245 // The XML declaration is retrieved
246 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
247 myPtr = aPtr + 2;
248 return XML_HEADER;
249 }
250 myPtr = myEndPtr - 1;
251 continue;
252
253 // Checking the characters in STATE_DOCTYPE, seek for "]>" sequence
254 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
255 case STATE_DOCTYPE:
256 for (aPtr = aStartData; aPtr < myEndPtr-1; aPtr++) {
257 const int aChar = aPtr[0];
258 if (aChar == '[') {
259 aState = STATE_DOCTYPE_MARKUP;
260 aStartData = &aPtr[1];
261 goto state_doctype_markup;
262 }
263 if (aChar == '>') {
264 // The DOCTYPE declaration is retrieved
265 theData.rdbuf()->sputn(aStartData, aPtr - aStartData - 1);
266 myPtr = aPtr + 1;
267 return XML_DOCTYPE;
268 }
269 }
270 myPtr = myEndPtr - 1;
271 continue;
272
273 state_doctype_markup:
274 case STATE_DOCTYPE_MARKUP:
275 aPtr = (const char *) memchr (aStartData, ']', (myEndPtr-1) - aStartData);
276 if (aPtr) {
277 // The end of DOCTYPE declaration found
278 if (aPtr[1] != '>') { // ERROR
279 myError =
280 "Character \'>\' is expected in the end of DOCTYPE declaration";
281 return XML_UNKNOWN;
282 }
283 // The DOCTYPE declaration is retrieved
284 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
285 myPtr = aPtr + 2;
286 return XML_DOCTYPE;
287 }
288 myPtr = myEndPtr - 1;
289 continue;
290
291 // Checking the characters in STATE_COMMENT, seek for "-->" sequence
292 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
293 case STATE_COMMENT:
294 aPtr = aStartData;
302f96fb 295 for(;;) {
7fd59977 296 aPtr = (const char *) memchr (aPtr, '-', (myEndPtr - 2) - aPtr);
297 if (aPtr == NULL) break;
298 if (aPtr[1] != '-') ++ aPtr;
299 else {
300 if (aPtr[2] != '>') { // ERROR
301 myError = "Character \'>\' is expected in the end of comment";
302 return XML_UNKNOWN;
303 }
304 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
305 myPtr = aPtr + 3;
306 return XML_COMMENT;
307 }
308 }
309 myPtr = myEndPtr - 2;
310 continue;
311
312 // Checking the characters in STATE_TEXT, seek for "<"
313 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
314 case STATE_TEXT:
315 aPtr = (const char *) memchr (aStartData, '<', myEndPtr - aStartData);
316 if (aPtr) {
317 // The end of text field reached
318 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
319 myPtr = aPtr;
320 return XML_TEXT;
321 }
322 myPtr = myEndPtr;
323 continue;
324
325 // Checking the characters in STATE_CDATA, seek for "]]"
326 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
327 case STATE_CDATA:
328 aPtr = aStartData;
302f96fb 329 for(;;) {
7fd59977 330 aPtr = (const char *) memchr (aPtr, ']', (myEndPtr - 1) - aStartData);
331 if (aPtr == NULL) break;
332 if (aPtr[1] != ']') { // ERROR
333 myError = "Characters \']]\' are expected in the end of CDATA";
334 return XML_UNKNOWN;
335 }
336 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
337 myPtr = aPtr + 2;
338 return XML_CDATA;
339 }
340 myPtr = myEndPtr - 1;
341 continue;
342
343 // Checking the characters in STATE_ELEMENT, seek the end of TagName
344 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
345 case STATE_ELEMENT:
346 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
347 if (theData.Length() == 0 || aNameEnd != myPtr) {
348 myError = "Invalid tag name";
349 return XML_UNKNOWN;
350 }
351 {
352 theData.rdbuf()->sputn(aStartData, aNameEnd - aStartData);
353 char* aDataString = (char *)theData.str();
354 myElement = & LDOM_BasicElement::Create (aDataString, theData.Length(),
355 myDocument);
356 theData.Clear();
357 myLastChild = NULL;
358 delete [] aDataString;
359 aState = STATE_ATTRIBUTE_NAME;
360 aStartData = NULL;
361 myPtr = aNameEnd;
362 continue;
363 }
364 // Parsing a single attribute (STATE_ATTRIBUTE)
365 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
366 case STATE_ATTRIBUTE_NAME: // attribute name
367 switch (myPtr[0]) {
368 case ' ' :
369 case '\t':
370 case '\n':
371 case '\r':
372 if (aStartData) goto attr_name;
373 ++ myPtr;
374 continue;
375 case '/' :
376 if (aStartData)
377 myError = "Inexpected end of attribute";
378 else if (myPtr[1] != '>')
379 myError = "Improper element tag termination";
380 else {
381 myPtr += 2;
382#ifdef DEB
383 theData.Clear();
384 theData << myElement->GetTagName();
385#endif
386 return XML_FULL_ELEMENT;
387 }
388 return XML_UNKNOWN;
389 case '>' :
390 if (aStartData) {
391 myError = "Inexpected end of attribute";
392 return XML_UNKNOWN;
393 }
394 ++ myPtr;
395#ifdef DEB
396 theData.Clear();
397 theData << myElement->GetTagName();
398#endif
399 return XML_START_ELEMENT;
400 default :
401 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
402 if (theData.Length() == 0 || aNameEnd != myPtr) {
403 myError = "Invalid attribute name";
404 return XML_UNKNOWN;
405 }
406 if (aNameEnd >= myEndPtr)
407 aStartData = myPtr;
408 else {
409 if (theData.Length() == 0)
7dc9e047 410 anAttrName = LDOMBasicString(myPtr, (Standard_Integer)(aNameEnd - myPtr), myDocument);
7fd59977 411 else {
412 theData.rdbuf()->sputn(myPtr, aNameEnd - myPtr);
413attr_name:
414 char* aDataString = (char *)theData.str();
415 theData.Clear();
416 anAttrName = LDOMBasicString (aDataString, myDocument);
417 delete [] aDataString;
418 }
419 aStartData = NULL;
420 aState = STATE_ATTRIBUTE_EQUAL;
421 }
422 myPtr = aNameEnd;
423 continue;
424 }
425 case STATE_ATTRIBUTE_EQUAL: // attribute 'equal' sign
426 switch (myPtr[0]) {
427 case '=' :
428 aState = STATE_ATTRIBUTE_VALUE;
429 case ' ' :
430 case '\t':
431 case '\n':
432 case '\r':
433 ++ myPtr;
434 continue;
435 default:
436 myError = "Equal sign expected in attribute definition";
437 return XML_UNKNOWN;
438 }
439
440 case STATE_ATTRIBUTE_VALUE: // attribute value
441 switch (myPtr[0]) {
442 case ' ' :
443 case '\t':
444 case '\n':
445 case '\r':
446 if (aStartData == NULL) {
447 ++ myPtr;
448 continue;
449 default:
450 if (anAttDelimiter == '\0') {
451 myError = "Expected an attribute value";
452 return XML_UNKNOWN;
453 case '\"':
454 case '\'':
455 if (aStartData == NULL) {
456 aStartData = &myPtr[1];
457 anAttDelimiter = myPtr[0];
458 }
459 }
460 }
461 // Limitation: we do not take into account that '<' and '&'
462 // are not allowed in attribute values
463 aPtr = (const char *) memchr (aStartData, anAttDelimiter,
464 myEndPtr - aStartData);
465 if (aPtr) {
466 (char&) aPtr[0] = '\0';
467 anAttDelimiter = '\0';
468 char * aDataString = (char *) aStartData;
469 const char * ePtr = aPtr;
470
471 // Append the end of the string to previously taken data
472 if (theData.Length() > 0) {
473 theData.rdbuf()->sputn(aStartData, aPtr-aStartData);
474 aDataString = (char *)theData.str();
475 ePtr = strchr (aDataString, '\0');
476 }
477
478 Standard_Integer aDataLen;
479 aDataString = LDOM_CharReference::Decode (aDataString, aDataLen);
480 if (IsDigit(aDataString[0])) {
481 if (getInteger (anAttrValue, aDataString, ePtr))
482 anAttrValue = LDOMBasicString (aDataString,aDataLen,myDocument);
483 } else
484 anAttrValue = LDOMBasicString (aDataString, aDataLen, myDocument);
485
486 if (theData.Length() > 0) {
487 theData.Clear();
488 delete [] aDataString;
489 }
490 // Create an attribute
491 myLastChild = myElement -> AddAttribute (anAttrName, anAttrValue,
492 myDocument, myLastChild);
493 myPtr = aPtr + 1;
494 aStartData = NULL;
495 aState = STATE_ATTRIBUTE_NAME;
496 } else
497 myPtr = myEndPtr;
498 continue;
499 }
500 // Checking the characters in STATE_ELEMENT_END, seek for ">"
501 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
502 case STATE_ELEMENT_END:
503 aPtr = (const char *) memchr (aStartData, '>', myEndPtr - aStartData);
504 if (aPtr) {
505 // The end of the end-element markup
506 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
507 myPtr = aPtr + 1;
508 return XML_END_ELEMENT;
509 }
510 myPtr = myEndPtr;
511 continue;
512 }
513 }
514 if (aState != STATE_WAITING) {
515 myError = "Unexpected end of file";
516 return XML_UNKNOWN;
517 }
518 return XML_EOF;
519}
520
521//=======================================================================
522//function : isName
523//type : static
524//purpose : Check if aString is a valid XML Name
525//=======================================================================
526
527static Standard_Boolean isName (const char * aString,
528 const char * aStringEnd,
529 const char *& aNameEnd)
530{
531 Standard_Boolean aResult;
302f96fb 532 char aCh = aString[0];
7fd59977 533 if (IsAlphabetic(aCh) || aCh == '_' || aCh == ':') {
534 const char * aPtr = &aString[1];
535 while (aPtr < aStringEnd) {
536 aCh = * aPtr;
537 switch (aCh) {
538 case ' ' :
539 case '\n':
540 case '\r':
541 case '\t':
542 case '=' :
543 case '\0':
544 case '/' :
545 case '>' :
546 aNameEnd = aPtr;
547 return Standard_True;
548 default:
549 if (IsAlphanumeric(aCh) == 0) {
550 aNameEnd = aPtr;
551 return Standard_False;
552 }
553 case '.' :
554 case '-' :
555 case '_' :
556 case ':' :
557 ++ aPtr;
558 }
559 }
560 aNameEnd = aPtr;
561 aResult = Standard_True;
562 } else {
563 aNameEnd = aString;
564 aResult = Standard_False;
565 }
566 return aResult;
567}
568
569//=======================================================================
570//function : getInteger
571//purpose : Try to initialize theValue as Integer; return False on success
572//=======================================================================
573
574Standard_Boolean LDOM_XmlReader::getInteger (LDOMBasicString& theValue,
575 const char * theStart,
576 const char * theEnd)
577{
578 char * ptr;
579 errno = 0;
580 if (theEnd - theStart == 1 || theStart[0] != '0')
581 {
582 long aResult = strtol (theStart, &ptr, 10);
583 if (ptr == theEnd && errno == 0)
584 {
585 theValue = Standard_Integer(aResult);
586 return Standard_False;
587 }
588 }
589 return Standard_True;
590}