0031340: LDOM fails to read XML file starting with BOM
[occt.git] / src / LDOM / LDOMParser.cxx
CommitLineData
b311480e 1// Created on: 2001-07-20
2// Created by: Alexander GRIGORIEV
973c2be1 3// Copyright (c) 2001-2014 OPEN CASCADE SAS
b311480e 4//
973c2be1 5// This file is part of Open CASCADE Technology software library.
b311480e 6//
d5f74e42 7// This library is free software; you can redistribute it and/or modify it under
8// the terms of the GNU Lesser General Public License version 2.1 as published
973c2be1 9// by the Free Software Foundation, with special exception defined in the file
10// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
11// distribution for complete text of the license and disclaimer of any warranty.
b311480e 12//
973c2be1 13// Alternatively, this file may be used under the terms of Open CASCADE
14// commercial license or contractual agreement.
b311480e 15
04232180 16//AGV 060302: Input from std::istream
7fd59977 17// AGV 130302: Return error if there are data after the root element
18
19//#define LDOM_PARSER_TRACE
20
21#include <LDOMParser.hxx>
22#include <LDOM_MemManager.hxx>
23#include <LDOM_XmlReader.hxx>
24#include <LDOM_BasicText.hxx>
25#include <LDOM_CharReference.hxx>
d9ff84e8 26#include <TCollection_ExtendedString.hxx>
4ff92abe 27#include <OSD_OpenFile.hxx>
7fd59977 28
29#include <fcntl.h>
57c28b61 30#ifdef _MSC_VER
7fd59977 31#include <io.h>
32#else
33#include <unistd.h>
34#endif
35
36//=======================================================================
37//function : ~LDOMParser
38//purpose :
39//=======================================================================
40
41LDOMParser::~LDOMParser()
42{
43 if (myReader) delete myReader;
44}
45
46//=======================================================================
47//function : ReadRecord
48//purpose : Take the next lexical element from XML stream
49//=======================================================================
50
51#ifdef LDOM_PARSER_TRACE
52static
53#else
54inline
55#endif
56 LDOM_XmlReader::RecordType ReadRecord (LDOM_XmlReader& aReader,
4ff92abe 57 Standard_IStream& theIStream,
7fd59977 58 LDOM_OSStream& aData)
59{
60#ifdef LDOM_PARSER_TRACE
61 static aCounter = 0;
62 ++ aCounter;
63#endif
4ff92abe 64 const LDOM_XmlReader::RecordType aType = aReader.ReadRecord (theIStream, aData);
7fd59977 65#ifdef LDOM_PARSER_TRACE
66 static FILE * ff = NULL;
67 TCollection_AsciiString aTraceFileName;
57c28b61 68#ifdef _WIN32
7fd59977 69 aTraceFileName = TCollection_AsciiString (getenv("TEMP")) + "\\ldom.trace";
70#else
71 aTraceFileName = "/tmp/ldom.trace";
72#endif
73 ff = fopen (aTraceFileName.ToCString(),ff ? "at": "wt");
74 const char * aDataType;
75 switch (aType) {
76 case LDOM_XmlReader::XML_UNKNOWN: aDataType= "XML_UNKNOWN "; break;
77 case LDOM_XmlReader::XML_HEADER: aDataType= "XML_HEADER "; break;
78 case LDOM_XmlReader::XML_DOCTYPE: aDataType= "XML_DOCTYPE "; break;
79 case LDOM_XmlReader::XML_COMMENT: aDataType= "XML_COMMENT "; break;
80 case LDOM_XmlReader::XML_START_ELEMENT: aDataType= "XML_START_ELEMENT"; break;
81 case LDOM_XmlReader::XML_END_ELEMENT: aDataType= "XML_END_ELEMENT "; break;
82 case LDOM_XmlReader::XML_FULL_ELEMENT: aDataType= "XML_FULL_ELEMENT "; break;
83 case LDOM_XmlReader::XML_TEXT: aDataType= "XML_TEXT "; break;
84 case LDOM_XmlReader::XML_CDATA: aDataType= "XML_CDATA "; break;
85 case LDOM_XmlReader::XML_EOF: aDataType= "XML_EOF ";
86 }
87 char * aStr = aData.str();
88 fprintf (ff, "%5d %s: %s\n", aCounter, aDataType, aStr);
89 delete [] aStr;
90 fclose (ff);
91#endif
92 return aType;
93}
94
95//=======================================================================
96//function : GetError
97//purpose : Return text describing a parsing error
98//=======================================================================
99
100const TCollection_AsciiString& LDOMParser::GetError
101 (TCollection_AsciiString& aData) const
102{
103 char * aStr =(char *)myCurrentData.str();
104 aData = aStr;
105 delete [] aStr;
106 return myError;
107}
108
109//=======================================================================
8f34d47e 110//function : GetBOM
111//purpose : Returns the byte order mask defined at the start of a stream
112//=======================================================================
113
114LDOM_OSStream::BOMType LDOMParser::GetBOM() const
115{
116 if (myReader)
117 return myReader->GetBOM();
118 return LDOM_OSStream::BOM_UNDEFINED;
119}
120
121//=======================================================================
7fd59977 122//function : parse
123//purpose :
124//=======================================================================
125
04232180 126Standard_Boolean LDOMParser::parse (std::istream& anInput,
5fce1605 127 const Standard_Boolean theTagPerStep,
128 const Standard_Boolean theWithoutRoot)
7fd59977 129{
130 // Open the DOM Document
131 myDocument = new LDOM_MemManager (20000);
132 myError.Clear();
133
134 // Create the Reader instance
135 if (myReader) delete myReader;
5fce1605 136 myReader = new LDOM_XmlReader (myDocument, myError, theTagPerStep);
7fd59977 137
138 // Parse
5fce1605 139 return ParseDocument (anInput, theWithoutRoot);
7fd59977 140}
141
142//=======================================================================
143//function : parse
144//purpose :
145//=======================================================================
146
147Standard_Boolean LDOMParser::parse (const char * const aFileName)
148{
4ff92abe 149 std::ifstream aFileStream;
150 OSD_OpenStream (aFileStream, aFileName, std::ios::in);
7fd59977 151
4ff92abe 152 if (aFileStream.good())
153 {
154 return parse (aFileStream);
155 }
156 else
157 {
7fd59977 158 myError = "Fatal XML error: Cannot open XML file";
159 return Standard_True;
160 }
7fd59977 161}
162
163//=======================================================================
164//function : ParseDocument
165//purpose : parse the whole document (abstracted from the XML source)
166//=======================================================================
167
04232180 168Standard_Boolean LDOMParser::ParseDocument (std::istream& theIStream, const Standard_Boolean theWithoutRoot)
7fd59977 169{
170 Standard_Boolean isError = Standard_False;
171 Standard_Boolean isElement = Standard_False;
7fd59977 172 Standard_Boolean isDoctype = Standard_False;
173
5fce1605 174 Standard_Boolean isInsertFictRootElement = Standard_False;
175
302f96fb 176 for(;;) {
5fce1605 177 LDOM_XmlReader::RecordType aType = (theWithoutRoot && !isInsertFictRootElement ?
178 LDOM_XmlReader::XML_START_ELEMENT :
179 ReadRecord (*myReader, theIStream, myCurrentData));
7fd59977 180 switch (aType) {
181 case LDOM_XmlReader::XML_HEADER:
182 if (isDoctype || isElement) {
183 myError = "Unexpected XML declaration";
184 isError = Standard_True;
185 break;
186 }
7fd59977 187 continue;
188 case LDOM_XmlReader::XML_DOCTYPE:
189 if (isElement) {
190 myError = "Unexpected DOCTYPE declaration";
191 isError = Standard_True;
192 break;
193 }
194 isDoctype = Standard_True;
b1811c1d 195 continue;
7fd59977 196 case LDOM_XmlReader::XML_COMMENT:
197 continue;
198 case LDOM_XmlReader::XML_FULL_ELEMENT:
199 if (isElement == Standard_False) {
200 isElement = Standard_True;
201 myDocument -> myRootElement = &myReader -> GetElement ();
202 if (startElement()) {
203 isError = Standard_True;
204 myError = "User abort at startElement()";
205 break;
206 }
207 if (endElement()) {
208 isError = Standard_True;
209 myError = "User abort at endElement()";
210 break;
211 }
212 continue;
213 }
b1811c1d 214 isError = Standard_True;
215 myError = "Expected comment or end-of-file";
216 break;
7fd59977 217 case LDOM_XmlReader::XML_START_ELEMENT:
218 if (isElement == Standard_False) {
219 isElement = Standard_True;
5fce1605 220
221 if (theWithoutRoot && !isInsertFictRootElement)
222 {
223 isInsertFictRootElement = Standard_True;
224
225 // create fiction root element
226 TCollection_AsciiString aFicName ("document");
227 myReader->CreateElement (aFicName.ToCString(), aFicName.Length());
228 }
229
230 myDocument->myRootElement = &myReader->GetElement();
231
7fd59977 232 if (startElement()) {
233 isError = Standard_True;
234 myError = "User abort at startElement()";
235 break;
236 }
4ff92abe 237 isError = ParseElement (theIStream);
7fd59977 238 if (isError) break;
239 continue;
240 }
241 isError = Standard_True;
242 myError = "Expected comment or end-of-file";
b1811c1d 243 break;
7fd59977 244 case LDOM_XmlReader::XML_END_ELEMENT:
245 if (endElement()) {
246 isError = Standard_True;
247 myError = "User abort at endElement()";
248 }
b1811c1d 249 break;
7fd59977 250 case LDOM_XmlReader::XML_EOF:
251 break;
252 case LDOM_XmlReader::XML_UNKNOWN:
253 if (isElement) {
254 default:
255 myError = "Unexpected data beyond the Document Element";
256 }
257 isError = Standard_True;
258 }
259 break;
260 }
261 return isError;
262}
263
264//=======================================================================
265//function : ParseElement
266//purpose : parse one element, given the type of its XML presentation
267//=======================================================================
268
4ff92abe 269Standard_Boolean LDOMParser::ParseElement (Standard_IStream& theIStream)
7fd59977 270{
271 Standard_Boolean isError = Standard_False;
272 const LDOM_BasicElement * aParent = &myReader->GetElement();
273 const LDOM_BasicNode * aLastChild = NULL;
302f96fb 274 for(;;) {
7fd59977 275 LDOM_Node::NodeType aLocType;
276 LDOMBasicString aTextValue;
277 char *aTextStr;
4ff92abe 278 LDOM_XmlReader::RecordType aType = ReadRecord (* myReader, theIStream, myCurrentData);
7fd59977 279 switch (aType) {
280 case LDOM_XmlReader::XML_UNKNOWN:
281 isError = Standard_True;
282 break;
283 case LDOM_XmlReader::XML_FULL_ELEMENT:
284 aParent -> AppendChild (&myReader -> GetElement(), aLastChild);
285 if (startElement()) {
286 isError = Standard_True;
287 myError = "User abort at startElement()";
288 break;
289 }
290 if (endElement()) {
291 isError = Standard_True;
292 myError = "User abort at endElement()";
293 break;
294 }
295 break;
296 case LDOM_XmlReader::XML_START_ELEMENT:
297 aParent -> AppendChild (&myReader -> GetElement(), aLastChild);
298 if (startElement()) {
299 isError = Standard_True;
300 myError = "User abort at startElement()";
301 break;
302 }
4ff92abe 303 isError = ParseElement (theIStream);
7fd59977 304 break;
305 case LDOM_XmlReader::XML_END_ELEMENT:
306 {
307 Standard_CString aParentName = Standard_CString(aParent->GetTagName());
308 aTextStr = (char *)myCurrentData.str();
309 if (strcmp(aTextStr, aParentName) != 0) {
310 myError = "Expected end tag \'";
311 myError += aParentName;
312 myError += "\'";
313 isError = Standard_True;
314 }
315 else if (endElement()) {
316 isError = Standard_True;
317 myError = "User abort at endElement()";
318 }
319 delete [] aTextStr;
320 }
321 return isError;
322 case LDOM_XmlReader::XML_TEXT:
323 aLocType = LDOM_Node::TEXT_NODE;
324 {
325 Standard_Integer aTextLen;
326 aTextStr = LDOM_CharReference::Decode ((char *)myCurrentData.str(), aTextLen);
327 // try to convert to integer
328 if (IsDigit(aTextStr[0])) {
329 if (LDOM_XmlReader::getInteger (aTextValue, aTextStr,
330 aTextStr + aTextLen))
331 aTextValue = LDOMBasicString (aTextStr, aTextLen, myDocument);
332 } else
333 aTextValue = LDOMBasicString (aTextStr, aTextLen, myDocument);
334 }
335 goto create_text_node;
336 case LDOM_XmlReader::XML_COMMENT:
337 aLocType = LDOM_Node::COMMENT_NODE;
338 {
339 Standard_Integer aTextLen;
340 aTextStr = LDOM_CharReference::Decode ((char *)myCurrentData.str(), aTextLen);
341 aTextValue = LDOMBasicString (aTextStr, aTextLen, myDocument);
342 }
343 goto create_text_node;
344 case LDOM_XmlReader::XML_CDATA:
345 aLocType = LDOM_Node::CDATA_SECTION_NODE;
346 aTextStr = (char *)myCurrentData.str();
347 aTextValue = LDOMBasicString(aTextStr,myCurrentData.Length(),myDocument);
348 create_text_node:
349 {
350 LDOM_BasicNode& aTextNode =
351 LDOM_BasicText::Create (aLocType, aTextValue, myDocument);
352 aParent -> AppendChild (&aTextNode, aLastChild);
353 }
354 delete [] aTextStr;
355 break;
356 case LDOM_XmlReader::XML_EOF:
357 myError = "Inexpected end of file";
358 isError = Standard_True;
359 break;
360 default: ;
361 }
362 if (isError) break;
363 }
364 return isError;
365}
366
367//=======================================================================
368//function : startElement
369//purpose : virtual hook on 'StartElement' event for descendant classes
370//=======================================================================
371
372Standard_Boolean LDOMParser::startElement ()
373{
374 return Standard_False;
375}
376
377//=======================================================================
378//function : endElement
379//purpose : virtual hook on 'EndElement' event for descendant classes
380//=======================================================================
381
382Standard_Boolean LDOMParser::endElement ()
383{
384 return Standard_False;
385}
386
387//=======================================================================
388//function : getCurrentElement
389//purpose :
390//=======================================================================
391
392LDOM_Element LDOMParser::getCurrentElement () const
393{
394 return LDOM_Element (myReader -> GetElement(), myDocument);
395}
396
397//=======================================================================
398//function : getDocument
399//purpose :
400//=======================================================================
401
402LDOM_Document LDOMParser::getDocument ()
403{
404 return myDocument -> Self();
405}
406