b311480e |
1 | // Created on: 2001-07-20 |
2 | // Created by: Alexander GRIGORIEV |
973c2be1 |
3 | // Copyright (c) 2001-2014 OPEN CASCADE SAS |
b311480e |
4 | // |
973c2be1 |
5 | // This file is part of Open CASCADE Technology software library. |
b311480e |
6 | // |
d5f74e42 |
7 | // This library is free software; you can redistribute it and/or modify it under |
8 | // the terms of the GNU Lesser General Public License version 2.1 as published |
973c2be1 |
9 | // by the Free Software Foundation, with special exception defined in the file |
10 | // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT |
11 | // distribution for complete text of the license and disclaimer of any warranty. |
b311480e |
12 | // |
973c2be1 |
13 | // Alternatively, this file may be used under the terms of Open CASCADE |
14 | // commercial license or contractual agreement. |
b311480e |
15 | |
04232180 |
16 | //AGV 060302: Input from std::istream |
7fd59977 |
17 | // AGV 130302: Return error if there are data after the root element |
18 | |
19 | //#define LDOM_PARSER_TRACE |
20 | |
21 | #include <LDOMParser.hxx> |
22 | #include <LDOM_MemManager.hxx> |
23 | #include <LDOM_XmlReader.hxx> |
24 | #include <LDOM_BasicText.hxx> |
25 | #include <LDOM_CharReference.hxx> |
d9ff84e8 |
26 | #include <TCollection_ExtendedString.hxx> |
4ff92abe |
27 | #include <OSD_OpenFile.hxx> |
7fd59977 |
28 | |
29 | #include <fcntl.h> |
57c28b61 |
30 | #ifdef _MSC_VER |
7fd59977 |
31 | #include <io.h> |
32 | #else |
33 | #include <unistd.h> |
34 | #endif |
35 | |
36 | //======================================================================= |
37 | //function : ~LDOMParser |
38 | //purpose : |
39 | //======================================================================= |
40 | |
41 | LDOMParser::~LDOMParser() |
42 | { |
43 | if (myReader) delete myReader; |
44 | } |
45 | |
46 | //======================================================================= |
47 | //function : ReadRecord |
48 | //purpose : Take the next lexical element from XML stream |
49 | //======================================================================= |
50 | |
51 | #ifdef LDOM_PARSER_TRACE |
52 | static |
53 | #else |
54 | inline |
55 | #endif |
56 | LDOM_XmlReader::RecordType ReadRecord (LDOM_XmlReader& aReader, |
4ff92abe |
57 | Standard_IStream& theIStream, |
7fd59977 |
58 | LDOM_OSStream& aData) |
59 | { |
60 | #ifdef LDOM_PARSER_TRACE |
61 | static aCounter = 0; |
62 | ++ aCounter; |
63 | #endif |
4ff92abe |
64 | const LDOM_XmlReader::RecordType aType = aReader.ReadRecord (theIStream, aData); |
7fd59977 |
65 | #ifdef LDOM_PARSER_TRACE |
66 | static FILE * ff = NULL; |
67 | TCollection_AsciiString aTraceFileName; |
57c28b61 |
68 | #ifdef _WIN32 |
7fd59977 |
69 | aTraceFileName = TCollection_AsciiString (getenv("TEMP")) + "\\ldom.trace"; |
70 | #else |
71 | aTraceFileName = "/tmp/ldom.trace"; |
72 | #endif |
73 | ff = fopen (aTraceFileName.ToCString(),ff ? "at": "wt"); |
74 | const char * aDataType; |
75 | switch (aType) { |
76 | case LDOM_XmlReader::XML_UNKNOWN: aDataType= "XML_UNKNOWN "; break; |
77 | case LDOM_XmlReader::XML_HEADER: aDataType= "XML_HEADER "; break; |
78 | case LDOM_XmlReader::XML_DOCTYPE: aDataType= "XML_DOCTYPE "; break; |
79 | case LDOM_XmlReader::XML_COMMENT: aDataType= "XML_COMMENT "; break; |
80 | case LDOM_XmlReader::XML_START_ELEMENT: aDataType= "XML_START_ELEMENT"; break; |
81 | case LDOM_XmlReader::XML_END_ELEMENT: aDataType= "XML_END_ELEMENT "; break; |
82 | case LDOM_XmlReader::XML_FULL_ELEMENT: aDataType= "XML_FULL_ELEMENT "; break; |
83 | case LDOM_XmlReader::XML_TEXT: aDataType= "XML_TEXT "; break; |
84 | case LDOM_XmlReader::XML_CDATA: aDataType= "XML_CDATA "; break; |
85 | case LDOM_XmlReader::XML_EOF: aDataType= "XML_EOF "; |
86 | } |
87 | char * aStr = aData.str(); |
88 | fprintf (ff, "%5d %s: %s\n", aCounter, aDataType, aStr); |
89 | delete [] aStr; |
90 | fclose (ff); |
91 | #endif |
92 | return aType; |
93 | } |
94 | |
95 | //======================================================================= |
96 | //function : GetError |
97 | //purpose : Return text describing a parsing error |
98 | //======================================================================= |
99 | |
100 | const TCollection_AsciiString& LDOMParser::GetError |
101 | (TCollection_AsciiString& aData) const |
102 | { |
103 | char * aStr =(char *)myCurrentData.str(); |
104 | aData = aStr; |
105 | delete [] aStr; |
106 | return myError; |
107 | } |
108 | |
109 | //======================================================================= |
8f34d47e |
110 | //function : GetBOM |
111 | //purpose : Returns the byte order mask defined at the start of a stream |
112 | //======================================================================= |
113 | |
114 | LDOM_OSStream::BOMType LDOMParser::GetBOM() const |
115 | { |
116 | if (myReader) |
117 | return myReader->GetBOM(); |
118 | return LDOM_OSStream::BOM_UNDEFINED; |
119 | } |
120 | |
121 | //======================================================================= |
7fd59977 |
122 | //function : parse |
123 | //purpose : |
124 | //======================================================================= |
125 | |
04232180 |
126 | Standard_Boolean LDOMParser::parse (std::istream& anInput, |
5fce1605 |
127 | const Standard_Boolean theTagPerStep, |
128 | const Standard_Boolean theWithoutRoot) |
7fd59977 |
129 | { |
130 | // Open the DOM Document |
131 | myDocument = new LDOM_MemManager (20000); |
132 | myError.Clear(); |
133 | |
134 | // Create the Reader instance |
135 | if (myReader) delete myReader; |
5fce1605 |
136 | myReader = new LDOM_XmlReader (myDocument, myError, theTagPerStep); |
7fd59977 |
137 | |
138 | // Parse |
5fce1605 |
139 | return ParseDocument (anInput, theWithoutRoot); |
7fd59977 |
140 | } |
141 | |
142 | //======================================================================= |
143 | //function : parse |
144 | //purpose : |
145 | //======================================================================= |
146 | |
147 | Standard_Boolean LDOMParser::parse (const char * const aFileName) |
148 | { |
4ff92abe |
149 | std::ifstream aFileStream; |
150 | OSD_OpenStream (aFileStream, aFileName, std::ios::in); |
7fd59977 |
151 | |
4ff92abe |
152 | if (aFileStream.good()) |
153 | { |
154 | return parse (aFileStream); |
155 | } |
156 | else |
157 | { |
7fd59977 |
158 | myError = "Fatal XML error: Cannot open XML file"; |
159 | return Standard_True; |
160 | } |
7fd59977 |
161 | } |
162 | |
163 | //======================================================================= |
164 | //function : ParseDocument |
165 | //purpose : parse the whole document (abstracted from the XML source) |
166 | //======================================================================= |
167 | |
04232180 |
168 | Standard_Boolean LDOMParser::ParseDocument (std::istream& theIStream, const Standard_Boolean theWithoutRoot) |
7fd59977 |
169 | { |
170 | Standard_Boolean isError = Standard_False; |
171 | Standard_Boolean isElement = Standard_False; |
7fd59977 |
172 | Standard_Boolean isDoctype = Standard_False; |
173 | |
5fce1605 |
174 | Standard_Boolean isInsertFictRootElement = Standard_False; |
175 | |
302f96fb |
176 | for(;;) { |
5fce1605 |
177 | LDOM_XmlReader::RecordType aType = (theWithoutRoot && !isInsertFictRootElement ? |
178 | LDOM_XmlReader::XML_START_ELEMENT : |
179 | ReadRecord (*myReader, theIStream, myCurrentData)); |
7fd59977 |
180 | switch (aType) { |
181 | case LDOM_XmlReader::XML_HEADER: |
182 | if (isDoctype || isElement) { |
183 | myError = "Unexpected XML declaration"; |
184 | isError = Standard_True; |
185 | break; |
186 | } |
7fd59977 |
187 | continue; |
188 | case LDOM_XmlReader::XML_DOCTYPE: |
189 | if (isElement) { |
190 | myError = "Unexpected DOCTYPE declaration"; |
191 | isError = Standard_True; |
192 | break; |
193 | } |
194 | isDoctype = Standard_True; |
b1811c1d |
195 | continue; |
7fd59977 |
196 | case LDOM_XmlReader::XML_COMMENT: |
197 | continue; |
198 | case LDOM_XmlReader::XML_FULL_ELEMENT: |
199 | if (isElement == Standard_False) { |
200 | isElement = Standard_True; |
201 | myDocument -> myRootElement = &myReader -> GetElement (); |
202 | if (startElement()) { |
203 | isError = Standard_True; |
204 | myError = "User abort at startElement()"; |
205 | break; |
206 | } |
207 | if (endElement()) { |
208 | isError = Standard_True; |
209 | myError = "User abort at endElement()"; |
210 | break; |
211 | } |
212 | continue; |
213 | } |
b1811c1d |
214 | isError = Standard_True; |
215 | myError = "Expected comment or end-of-file"; |
216 | break; |
7fd59977 |
217 | case LDOM_XmlReader::XML_START_ELEMENT: |
218 | if (isElement == Standard_False) { |
219 | isElement = Standard_True; |
5fce1605 |
220 | |
221 | if (theWithoutRoot && !isInsertFictRootElement) |
222 | { |
223 | isInsertFictRootElement = Standard_True; |
224 | |
225 | // create fiction root element |
226 | TCollection_AsciiString aFicName ("document"); |
227 | myReader->CreateElement (aFicName.ToCString(), aFicName.Length()); |
228 | } |
229 | |
230 | myDocument->myRootElement = &myReader->GetElement(); |
231 | |
7fd59977 |
232 | if (startElement()) { |
233 | isError = Standard_True; |
234 | myError = "User abort at startElement()"; |
235 | break; |
236 | } |
4ff92abe |
237 | isError = ParseElement (theIStream); |
7fd59977 |
238 | if (isError) break; |
239 | continue; |
240 | } |
241 | isError = Standard_True; |
242 | myError = "Expected comment or end-of-file"; |
b1811c1d |
243 | break; |
7fd59977 |
244 | case LDOM_XmlReader::XML_END_ELEMENT: |
245 | if (endElement()) { |
246 | isError = Standard_True; |
247 | myError = "User abort at endElement()"; |
248 | } |
b1811c1d |
249 | break; |
7fd59977 |
250 | case LDOM_XmlReader::XML_EOF: |
251 | break; |
252 | case LDOM_XmlReader::XML_UNKNOWN: |
253 | if (isElement) { |
254 | default: |
255 | myError = "Unexpected data beyond the Document Element"; |
256 | } |
257 | isError = Standard_True; |
258 | } |
259 | break; |
260 | } |
261 | return isError; |
262 | } |
263 | |
264 | //======================================================================= |
265 | //function : ParseElement |
266 | //purpose : parse one element, given the type of its XML presentation |
267 | //======================================================================= |
268 | |
4ff92abe |
269 | Standard_Boolean LDOMParser::ParseElement (Standard_IStream& theIStream) |
7fd59977 |
270 | { |
271 | Standard_Boolean isError = Standard_False; |
272 | const LDOM_BasicElement * aParent = &myReader->GetElement(); |
273 | const LDOM_BasicNode * aLastChild = NULL; |
302f96fb |
274 | for(;;) { |
7fd59977 |
275 | LDOM_Node::NodeType aLocType; |
276 | LDOMBasicString aTextValue; |
277 | char *aTextStr; |
4ff92abe |
278 | LDOM_XmlReader::RecordType aType = ReadRecord (* myReader, theIStream, myCurrentData); |
7fd59977 |
279 | switch (aType) { |
280 | case LDOM_XmlReader::XML_UNKNOWN: |
281 | isError = Standard_True; |
282 | break; |
283 | case LDOM_XmlReader::XML_FULL_ELEMENT: |
284 | aParent -> AppendChild (&myReader -> GetElement(), aLastChild); |
285 | if (startElement()) { |
286 | isError = Standard_True; |
287 | myError = "User abort at startElement()"; |
288 | break; |
289 | } |
290 | if (endElement()) { |
291 | isError = Standard_True; |
292 | myError = "User abort at endElement()"; |
293 | break; |
294 | } |
295 | break; |
296 | case LDOM_XmlReader::XML_START_ELEMENT: |
297 | aParent -> AppendChild (&myReader -> GetElement(), aLastChild); |
298 | if (startElement()) { |
299 | isError = Standard_True; |
300 | myError = "User abort at startElement()"; |
301 | break; |
302 | } |
4ff92abe |
303 | isError = ParseElement (theIStream); |
7fd59977 |
304 | break; |
305 | case LDOM_XmlReader::XML_END_ELEMENT: |
306 | { |
307 | Standard_CString aParentName = Standard_CString(aParent->GetTagName()); |
308 | aTextStr = (char *)myCurrentData.str(); |
309 | if (strcmp(aTextStr, aParentName) != 0) { |
310 | myError = "Expected end tag \'"; |
311 | myError += aParentName; |
312 | myError += "\'"; |
313 | isError = Standard_True; |
314 | } |
315 | else if (endElement()) { |
316 | isError = Standard_True; |
317 | myError = "User abort at endElement()"; |
318 | } |
319 | delete [] aTextStr; |
320 | } |
321 | return isError; |
322 | case LDOM_XmlReader::XML_TEXT: |
323 | aLocType = LDOM_Node::TEXT_NODE; |
324 | { |
325 | Standard_Integer aTextLen; |
326 | aTextStr = LDOM_CharReference::Decode ((char *)myCurrentData.str(), aTextLen); |
327 | // try to convert to integer |
328 | if (IsDigit(aTextStr[0])) { |
329 | if (LDOM_XmlReader::getInteger (aTextValue, aTextStr, |
330 | aTextStr + aTextLen)) |
331 | aTextValue = LDOMBasicString (aTextStr, aTextLen, myDocument); |
332 | } else |
333 | aTextValue = LDOMBasicString (aTextStr, aTextLen, myDocument); |
334 | } |
335 | goto create_text_node; |
336 | case LDOM_XmlReader::XML_COMMENT: |
337 | aLocType = LDOM_Node::COMMENT_NODE; |
338 | { |
339 | Standard_Integer aTextLen; |
340 | aTextStr = LDOM_CharReference::Decode ((char *)myCurrentData.str(), aTextLen); |
341 | aTextValue = LDOMBasicString (aTextStr, aTextLen, myDocument); |
342 | } |
343 | goto create_text_node; |
344 | case LDOM_XmlReader::XML_CDATA: |
345 | aLocType = LDOM_Node::CDATA_SECTION_NODE; |
346 | aTextStr = (char *)myCurrentData.str(); |
347 | aTextValue = LDOMBasicString(aTextStr,myCurrentData.Length(),myDocument); |
348 | create_text_node: |
349 | { |
350 | LDOM_BasicNode& aTextNode = |
351 | LDOM_BasicText::Create (aLocType, aTextValue, myDocument); |
352 | aParent -> AppendChild (&aTextNode, aLastChild); |
353 | } |
354 | delete [] aTextStr; |
355 | break; |
356 | case LDOM_XmlReader::XML_EOF: |
357 | myError = "Inexpected end of file"; |
358 | isError = Standard_True; |
359 | break; |
360 | default: ; |
361 | } |
362 | if (isError) break; |
363 | } |
364 | return isError; |
365 | } |
366 | |
367 | //======================================================================= |
368 | //function : startElement |
369 | //purpose : virtual hook on 'StartElement' event for descendant classes |
370 | //======================================================================= |
371 | |
372 | Standard_Boolean LDOMParser::startElement () |
373 | { |
374 | return Standard_False; |
375 | } |
376 | |
377 | //======================================================================= |
378 | //function : endElement |
379 | //purpose : virtual hook on 'EndElement' event for descendant classes |
380 | //======================================================================= |
381 | |
382 | Standard_Boolean LDOMParser::endElement () |
383 | { |
384 | return Standard_False; |
385 | } |
386 | |
387 | //======================================================================= |
388 | //function : getCurrentElement |
389 | //purpose : |
390 | //======================================================================= |
391 | |
392 | LDOM_Element LDOMParser::getCurrentElement () const |
393 | { |
394 | return LDOM_Element (myReader -> GetElement(), myDocument); |
395 | } |
396 | |
397 | //======================================================================= |
398 | //function : getDocument |
399 | //purpose : |
400 | //======================================================================= |
401 | |
402 | LDOM_Document LDOMParser::getDocument () |
403 | { |
404 | return myDocument -> Self(); |
405 | } |
406 | |