0022815: Missing delete operator for placement new
[occt.git] / src / LDOM / LDOM_XmlReader.cxx
CommitLineData
7fd59977 1// File: LDOM_XmlReader.cxx
2// Created: 20.07.01 15:38:15
3// Author: Alexander GRIGORIEV
4// Copyright: OpenCascade 2001
5// History: AGV 060302: Input from istream
6// AGV 130302: bug corr: was error if strlen(root_elem_name) < 7
7
8#include <LDOM_XmlReader.hxx>
9#include <Standard_Stream.hxx>
10#include <LDOM_MemManager.hxx>
11#include <LDOM_BasicAttribute.hxx>
12#include <LDOM_CharReference.hxx>
13#include <LDOM_OSStream.hxx>
14
15#include <string.h>
16#include <errno.h>
17#ifdef WNT
18#include <io.h>
19#else
20#include <unistd.h>
21#endif
22
23//#include <ctype.h>
24
25const int XML_MIN_BUFFER = 10;
26const int MAX_ATTRIBUTES = 512;
27const int FILE_NONVALUE = -1;
28
29typedef enum {
30 STATE_WAITING = 0,
31 STATE_HEADER,
32 STATE_DOCTYPE,
33 STATE_DOCTYPE_MARKUP,
34 STATE_ELEMENT,
35 STATE_ELEMENT_END,
36 STATE_ATTRIBUTE_NAME,
37 STATE_ATTRIBUTE_EQUAL,
38 STATE_ATTRIBUTE_VALUE,
39 STATE_COMMENT,
40 STATE_CDATA,
41 STATE_TEXT
42} ParserState;
43
44#define TEXT_COMPARE(aPtr,aPattern) \
45 (memcmp ((aPtr), (aPattern), sizeof(aPattern) - 1) == 0)
46
47static Standard_Boolean isName (const char * aString,
48 const char * aStringEnd,
49 const char *& aNameEnd);
50
51//=======================================================================
52//function : LDOM_XmlReader()
53//purpose : Constructor (file descriptor)
54//=======================================================================
55
56LDOM_XmlReader::LDOM_XmlReader (const int aFileDes,
57 const Handle(LDOM_MemManager)& aDocument,
58 TCollection_AsciiString& anErrorString)
59 : myEOF (Standard_False),
60 myFileDes (aFileDes),
61#ifdef WNT
62 myIStream (cin), // one quirk of MSVC6.0: can't initialise by 0
63#else
64 myIStream (* (istream *) UndefinedHandleAddress),
65#endif
66 myError (anErrorString),
67 myDocument (aDocument),
68 myPtr (&myBuffer[0]),
69 myEndPtr (&myBuffer[0])
70{}
71
72//=======================================================================
73//function : LDOM_XmlReader()
74//purpose : Constructor (istream)
75//=======================================================================
76
77LDOM_XmlReader::LDOM_XmlReader (istream& anInput,
78 const Handle(LDOM_MemManager)& aDocument,
79 TCollection_AsciiString& anErrorString)
80 : myEOF (Standard_False),
81 myFileDes (FILE_NONVALUE),
82 myIStream (anInput),
83 myError (anErrorString),
84 myDocument (aDocument),
85 myPtr (&myBuffer[0]),
86 myEndPtr (&myBuffer[0])
87{}
88
89//=======================================================================
90//function : ReadRecord
91//purpose : Read a record from XML file
92//=======================================================================
93
94LDOM_XmlReader::RecordType LDOM_XmlReader::ReadRecord
95 (LDOM_OSStream& theData)
96{
97 theData.Clear();
98 myError.Clear();
99 ParserState aState = STATE_WAITING;
100 const char * aStartData = NULL, * aNameEnd, * aPtr;
101 LDOMBasicString anAttrName, anAttrValue;
102 char anAttDelimiter = '\0';
103
104 while (1) {
105 // Check if the current file buffer is exhausted
106 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
107 // There should always be some bytes available in the buffer for analysis
108 Standard_Integer aBytesRest = myEndPtr - myPtr;
109 if (aBytesRest < XML_MIN_BUFFER) {
110 if (myEOF == Standard_True) {
111 if (aBytesRest <= 0)
112 break; // END of processing
113 } else {
114 // If we are reading some data, save the beginning and preserve the state
115 if (aStartData /* && aState != STATE_WAITING */) {
116 if (myPtr > aStartData)
117 theData.rdbuf()->sputn(aStartData, myPtr - aStartData);
118 aStartData = &myBuffer[0];
119 }
120 // Copy the rest of file data to the beginning of buffer
121 if (aBytesRest > 0)
122 memcpy (&myBuffer[0], myPtr, aBytesRest);
123
124 // Read the full buffer and reset start and end buffer pointers
125 myPtr = &myBuffer[0];
126 Standard_Integer aNBytes;
127 if (myFileDes != FILE_NONVALUE)
128 aNBytes = read (myFileDes, &myBuffer[aBytesRest],
129 XML_BUFFER_SIZE - aBytesRest);
130 else {
131 myIStream.read (&myBuffer[aBytesRest],
132 XML_BUFFER_SIZE - aBytesRest);
133 aNBytes = myIStream.gcount();
134 }
135 if (aNBytes == 0)
136 myEOF = Standard_True; // END-OF-FILE
137 myEndPtr = &myBuffer[aBytesRest + aNBytes];
138 myBuffer[aBytesRest + aNBytes] = '\0';
139 }
140 }
141
142 // Check the character data
143 switch (aState) {
144
145 // Checking the characters in STATE_WAITING (blank, TEXT or markup)
146 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
147 case STATE_WAITING:
148 switch (myPtr[0]) {
149 case ' ':
150 case '\t':
151 case '\n':
152 case '\r':
153 ++ myPtr;
154 continue;
155 case '<':
156 // XML markup found, then make detect the record type
157 switch (myPtr[1]) {
158 case '?':
159 aState = STATE_HEADER;
160 myPtr += 2;
161 aStartData = myPtr;
162 continue;
163 case '/':
164 aState = STATE_ELEMENT_END;
165 myPtr += 2;
166 aStartData = myPtr;
167 continue;
168 case '!':
169 if (myPtr[2] == '-' && myPtr[3] == '-') {
170 aState = STATE_COMMENT;
171 myPtr += 4;
172 } else if (TEXT_COMPARE (&myPtr[2], "DOCTYPE")) {
173 char ch = myPtr[9];
174 if (ch != ' ' && ch != '\t' && ch != '\n' && ch != '\r')
175 break;
176 aState = STATE_DOCTYPE;
177 myPtr += 10;
178 } else if (TEXT_COMPARE (&myPtr[2], "[CDATA[")) {
179 aState = STATE_CDATA;
180 myPtr += 9;
181 } else break; // ERROR
182 aStartData = myPtr;
183 continue;
184 default:
185 if (::isName (&myPtr[1], myEndPtr, aNameEnd)) {
186 aStartData = myPtr + 1;
187 myPtr = aNameEnd;
188 if (myPtr < myEndPtr) {
189 myElement = & LDOM_BasicElement::Create (aStartData,
190 myPtr - aStartData,
191 myDocument);
192 myLastChild = NULL;
193 aState = STATE_ATTRIBUTE_NAME;
194 aStartData = NULL;
195 }else
196 aState = STATE_ELEMENT;
197 continue;
198 } // otherwise ERROR
199 } // end of switch
200 myError = "Unknown XML object: ";
201 myError += TCollection_AsciiString ((const Standard_CString)myPtr,
202 XML_MIN_BUFFER);
203 return XML_UNKNOWN;
204 case '\0':
205 if (myEOF == Standard_True) continue;
206 default:
207 // Limitation: we do not treat '&' as special character
208 aPtr = (const char *) memchr (myPtr, '<', myEndPtr - myPtr);
209 if (aPtr) {
210 // The end of text field reached
211 theData.rdbuf()->sputn(myPtr, aPtr - myPtr);
212 myPtr = aPtr;
213 return XML_TEXT;
214 }
215 aState = STATE_TEXT;
216 aStartData = myPtr;
217 myPtr = myEndPtr;
218 } // end of checking in STATE_WAITING
219 continue;
220
221 // Checking the characters in STATE_HEADER, seek for "?>" sequence
222 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
223 case STATE_HEADER:
224 aPtr = (const char *) memchr (aStartData, '?', (myEndPtr-1) - aStartData);
225 if (aPtr) {
226 // The end of XML declaration found
227 if (aPtr[1] != '>') { // ERROR
228 myError = "Character \'>\' is expected in the end of XML declaration";
229 return XML_UNKNOWN;
230 }
231 // The XML declaration is retrieved
232 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
233 myPtr = aPtr + 2;
234 return XML_HEADER;
235 }
236 myPtr = myEndPtr - 1;
237 continue;
238
239 // Checking the characters in STATE_DOCTYPE, seek for "]>" sequence
240 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
241 case STATE_DOCTYPE:
242 for (aPtr = aStartData; aPtr < myEndPtr-1; aPtr++) {
243 const int aChar = aPtr[0];
244 if (aChar == '[') {
245 aState = STATE_DOCTYPE_MARKUP;
246 aStartData = &aPtr[1];
247 goto state_doctype_markup;
248 }
249 if (aChar == '>') {
250 // The DOCTYPE declaration is retrieved
251 theData.rdbuf()->sputn(aStartData, aPtr - aStartData - 1);
252 myPtr = aPtr + 1;
253 return XML_DOCTYPE;
254 }
255 }
256 myPtr = myEndPtr - 1;
257 continue;
258
259 state_doctype_markup:
260 case STATE_DOCTYPE_MARKUP:
261 aPtr = (const char *) memchr (aStartData, ']', (myEndPtr-1) - aStartData);
262 if (aPtr) {
263 // The end of DOCTYPE declaration found
264 if (aPtr[1] != '>') { // ERROR
265 myError =
266 "Character \'>\' is expected in the end of DOCTYPE declaration";
267 return XML_UNKNOWN;
268 }
269 // The DOCTYPE declaration is retrieved
270 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
271 myPtr = aPtr + 2;
272 return XML_DOCTYPE;
273 }
274 myPtr = myEndPtr - 1;
275 continue;
276
277 // Checking the characters in STATE_COMMENT, seek for "-->" sequence
278 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
279 case STATE_COMMENT:
280 aPtr = aStartData;
281 while (1) {
282 aPtr = (const char *) memchr (aPtr, '-', (myEndPtr - 2) - aPtr);
283 if (aPtr == NULL) break;
284 if (aPtr[1] != '-') ++ aPtr;
285 else {
286 if (aPtr[2] != '>') { // ERROR
287 myError = "Character \'>\' is expected in the end of comment";
288 return XML_UNKNOWN;
289 }
290 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
291 myPtr = aPtr + 3;
292 return XML_COMMENT;
293 }
294 }
295 myPtr = myEndPtr - 2;
296 continue;
297
298 // Checking the characters in STATE_TEXT, seek for "<"
299 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
300 case STATE_TEXT:
301 aPtr = (const char *) memchr (aStartData, '<', myEndPtr - aStartData);
302 if (aPtr) {
303 // The end of text field reached
304 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
305 myPtr = aPtr;
306 return XML_TEXT;
307 }
308 myPtr = myEndPtr;
309 continue;
310
311 // Checking the characters in STATE_CDATA, seek for "]]"
312 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
313 case STATE_CDATA:
314 aPtr = aStartData;
315 while (1) {
316 aPtr = (const char *) memchr (aPtr, ']', (myEndPtr - 1) - aStartData);
317 if (aPtr == NULL) break;
318 if (aPtr[1] != ']') { // ERROR
319 myError = "Characters \']]\' are expected in the end of CDATA";
320 return XML_UNKNOWN;
321 }
322 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
323 myPtr = aPtr + 2;
324 return XML_CDATA;
325 }
326 myPtr = myEndPtr - 1;
327 continue;
328
329 // Checking the characters in STATE_ELEMENT, seek the end of TagName
330 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
331 case STATE_ELEMENT:
332 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
333 if (theData.Length() == 0 || aNameEnd != myPtr) {
334 myError = "Invalid tag name";
335 return XML_UNKNOWN;
336 }
337 {
338 theData.rdbuf()->sputn(aStartData, aNameEnd - aStartData);
339 char* aDataString = (char *)theData.str();
340 myElement = & LDOM_BasicElement::Create (aDataString, theData.Length(),
341 myDocument);
342 theData.Clear();
343 myLastChild = NULL;
344 delete [] aDataString;
345 aState = STATE_ATTRIBUTE_NAME;
346 aStartData = NULL;
347 myPtr = aNameEnd;
348 continue;
349 }
350 // Parsing a single attribute (STATE_ATTRIBUTE)
351 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
352 case STATE_ATTRIBUTE_NAME: // attribute name
353 switch (myPtr[0]) {
354 case ' ' :
355 case '\t':
356 case '\n':
357 case '\r':
358 if (aStartData) goto attr_name;
359 ++ myPtr;
360 continue;
361 case '/' :
362 if (aStartData)
363 myError = "Inexpected end of attribute";
364 else if (myPtr[1] != '>')
365 myError = "Improper element tag termination";
366 else {
367 myPtr += 2;
368#ifdef DEB
369 theData.Clear();
370 theData << myElement->GetTagName();
371#endif
372 return XML_FULL_ELEMENT;
373 }
374 return XML_UNKNOWN;
375 case '>' :
376 if (aStartData) {
377 myError = "Inexpected end of attribute";
378 return XML_UNKNOWN;
379 }
380 ++ myPtr;
381#ifdef DEB
382 theData.Clear();
383 theData << myElement->GetTagName();
384#endif
385 return XML_START_ELEMENT;
386 default :
387 if (::isName (myPtr, myEndPtr, aNameEnd) == Standard_False)
388 if (theData.Length() == 0 || aNameEnd != myPtr) {
389 myError = "Invalid attribute name";
390 return XML_UNKNOWN;
391 }
392 if (aNameEnd >= myEndPtr)
393 aStartData = myPtr;
394 else {
395 if (theData.Length() == 0)
396 anAttrName = LDOMBasicString(myPtr, aNameEnd - myPtr, myDocument);
397 else {
398 theData.rdbuf()->sputn(myPtr, aNameEnd - myPtr);
399attr_name:
400 char* aDataString = (char *)theData.str();
401 theData.Clear();
402 anAttrName = LDOMBasicString (aDataString, myDocument);
403 delete [] aDataString;
404 }
405 aStartData = NULL;
406 aState = STATE_ATTRIBUTE_EQUAL;
407 }
408 myPtr = aNameEnd;
409 continue;
410 }
411 case STATE_ATTRIBUTE_EQUAL: // attribute 'equal' sign
412 switch (myPtr[0]) {
413 case '=' :
414 aState = STATE_ATTRIBUTE_VALUE;
415 case ' ' :
416 case '\t':
417 case '\n':
418 case '\r':
419 ++ myPtr;
420 continue;
421 default:
422 myError = "Equal sign expected in attribute definition";
423 return XML_UNKNOWN;
424 }
425
426 case STATE_ATTRIBUTE_VALUE: // attribute value
427 switch (myPtr[0]) {
428 case ' ' :
429 case '\t':
430 case '\n':
431 case '\r':
432 if (aStartData == NULL) {
433 ++ myPtr;
434 continue;
435 default:
436 if (anAttDelimiter == '\0') {
437 myError = "Expected an attribute value";
438 return XML_UNKNOWN;
439 case '\"':
440 case '\'':
441 if (aStartData == NULL) {
442 aStartData = &myPtr[1];
443 anAttDelimiter = myPtr[0];
444 }
445 }
446 }
447 // Limitation: we do not take into account that '<' and '&'
448 // are not allowed in attribute values
449 aPtr = (const char *) memchr (aStartData, anAttDelimiter,
450 myEndPtr - aStartData);
451 if (aPtr) {
452 (char&) aPtr[0] = '\0';
453 anAttDelimiter = '\0';
454 char * aDataString = (char *) aStartData;
455 const char * ePtr = aPtr;
456
457 // Append the end of the string to previously taken data
458 if (theData.Length() > 0) {
459 theData.rdbuf()->sputn(aStartData, aPtr-aStartData);
460 aDataString = (char *)theData.str();
461 ePtr = strchr (aDataString, '\0');
462 }
463
464 Standard_Integer aDataLen;
465 aDataString = LDOM_CharReference::Decode (aDataString, aDataLen);
466 if (IsDigit(aDataString[0])) {
467 if (getInteger (anAttrValue, aDataString, ePtr))
468 anAttrValue = LDOMBasicString (aDataString,aDataLen,myDocument);
469 } else
470 anAttrValue = LDOMBasicString (aDataString, aDataLen, myDocument);
471
472 if (theData.Length() > 0) {
473 theData.Clear();
474 delete [] aDataString;
475 }
476 // Create an attribute
477 myLastChild = myElement -> AddAttribute (anAttrName, anAttrValue,
478 myDocument, myLastChild);
479 myPtr = aPtr + 1;
480 aStartData = NULL;
481 aState = STATE_ATTRIBUTE_NAME;
482 } else
483 myPtr = myEndPtr;
484 continue;
485 }
486 // Checking the characters in STATE_ELEMENT_END, seek for ">"
487 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
488 case STATE_ELEMENT_END:
489 aPtr = (const char *) memchr (aStartData, '>', myEndPtr - aStartData);
490 if (aPtr) {
491 // The end of the end-element markup
492 theData.rdbuf()->sputn(aStartData, aPtr - aStartData);
493 myPtr = aPtr + 1;
494 return XML_END_ELEMENT;
495 }
496 myPtr = myEndPtr;
497 continue;
498 }
499 }
500 if (aState != STATE_WAITING) {
501 myError = "Unexpected end of file";
502 return XML_UNKNOWN;
503 }
504 return XML_EOF;
505}
506
507//=======================================================================
508//function : isName
509//type : static
510//purpose : Check if aString is a valid XML Name
511//=======================================================================
512
513static Standard_Boolean isName (const char * aString,
514 const char * aStringEnd,
515 const char *& aNameEnd)
516{
517 Standard_Boolean aResult;
518 int aCh = aString[0];
519 if (IsAlphabetic(aCh) || aCh == '_' || aCh == ':') {
520 const char * aPtr = &aString[1];
521 while (aPtr < aStringEnd) {
522 aCh = * aPtr;
523 switch (aCh) {
524 case ' ' :
525 case '\n':
526 case '\r':
527 case '\t':
528 case '=' :
529 case '\0':
530 case '/' :
531 case '>' :
532 aNameEnd = aPtr;
533 return Standard_True;
534 default:
535 if (IsAlphanumeric(aCh) == 0) {
536 aNameEnd = aPtr;
537 return Standard_False;
538 }
539 case '.' :
540 case '-' :
541 case '_' :
542 case ':' :
543 ++ aPtr;
544 }
545 }
546 aNameEnd = aPtr;
547 aResult = Standard_True;
548 } else {
549 aNameEnd = aString;
550 aResult = Standard_False;
551 }
552 return aResult;
553}
554
555//=======================================================================
556//function : getInteger
557//purpose : Try to initialize theValue as Integer; return False on success
558//=======================================================================
559
560Standard_Boolean LDOM_XmlReader::getInteger (LDOMBasicString& theValue,
561 const char * theStart,
562 const char * theEnd)
563{
564 char * ptr;
565 errno = 0;
566 if (theEnd - theStart == 1 || theStart[0] != '0')
567 {
568 long aResult = strtol (theStart, &ptr, 10);
569 if (ptr == theEnd && errno == 0)
570 {
571 theValue = Standard_Integer(aResult);
572 return Standard_False;
573 }
574 }
575 return Standard_True;
576}