7fd59977 |
1 | // File: LDOMParser.cxx |
2 | // Created: 20.07.01 14:58:24 |
3 | // Author: Alexander GRIGORIEV |
4 | // Copyright: OpenCascade 2001 |
5 | // History: AGV 060302: Input from istream |
6 | // AGV 130302: Return error if there are data after the root element |
7 | |
8 | //#define LDOM_PARSER_TRACE |
9 | |
10 | #include <LDOMParser.hxx> |
11 | #include <LDOM_MemManager.hxx> |
12 | #include <LDOM_XmlReader.hxx> |
13 | #include <LDOM_BasicText.hxx> |
14 | #include <LDOM_CharReference.hxx> |
15 | |
16 | #include <fcntl.h> |
17 | #ifdef WNT |
18 | #include <io.h> |
19 | #else |
20 | #include <unistd.h> |
21 | #endif |
22 | |
23 | //======================================================================= |
24 | //function : ~LDOMParser |
25 | //purpose : |
26 | //======================================================================= |
27 | |
28 | LDOMParser::~LDOMParser() |
29 | { |
30 | if (myReader) delete myReader; |
31 | } |
32 | |
33 | //======================================================================= |
34 | //function : ReadRecord |
35 | //purpose : Take the next lexical element from XML stream |
36 | //======================================================================= |
37 | |
38 | #ifdef LDOM_PARSER_TRACE |
39 | static |
40 | #else |
41 | inline |
42 | #endif |
43 | LDOM_XmlReader::RecordType ReadRecord (LDOM_XmlReader& aReader, |
44 | LDOM_OSStream& aData) |
45 | { |
46 | #ifdef LDOM_PARSER_TRACE |
47 | static aCounter = 0; |
48 | ++ aCounter; |
49 | #endif |
50 | const LDOM_XmlReader::RecordType aType = aReader.ReadRecord (aData); |
51 | #ifdef LDOM_PARSER_TRACE |
52 | static FILE * ff = NULL; |
53 | TCollection_AsciiString aTraceFileName; |
54 | #ifdef WNT |
55 | aTraceFileName = TCollection_AsciiString (getenv("TEMP")) + "\\ldom.trace"; |
56 | #else |
57 | aTraceFileName = "/tmp/ldom.trace"; |
58 | #endif |
59 | ff = fopen (aTraceFileName.ToCString(),ff ? "at": "wt"); |
60 | const char * aDataType; |
61 | switch (aType) { |
62 | case LDOM_XmlReader::XML_UNKNOWN: aDataType= "XML_UNKNOWN "; break; |
63 | case LDOM_XmlReader::XML_HEADER: aDataType= "XML_HEADER "; break; |
64 | case LDOM_XmlReader::XML_DOCTYPE: aDataType= "XML_DOCTYPE "; break; |
65 | case LDOM_XmlReader::XML_COMMENT: aDataType= "XML_COMMENT "; break; |
66 | case LDOM_XmlReader::XML_START_ELEMENT: aDataType= "XML_START_ELEMENT"; break; |
67 | case LDOM_XmlReader::XML_END_ELEMENT: aDataType= "XML_END_ELEMENT "; break; |
68 | case LDOM_XmlReader::XML_FULL_ELEMENT: aDataType= "XML_FULL_ELEMENT "; break; |
69 | case LDOM_XmlReader::XML_TEXT: aDataType= "XML_TEXT "; break; |
70 | case LDOM_XmlReader::XML_CDATA: aDataType= "XML_CDATA "; break; |
71 | case LDOM_XmlReader::XML_EOF: aDataType= "XML_EOF "; |
72 | } |
73 | char * aStr = aData.str(); |
74 | fprintf (ff, "%5d %s: %s\n", aCounter, aDataType, aStr); |
75 | delete [] aStr; |
76 | fclose (ff); |
77 | #endif |
78 | return aType; |
79 | } |
80 | |
81 | //======================================================================= |
82 | //function : GetError |
83 | //purpose : Return text describing a parsing error |
84 | //======================================================================= |
85 | |
86 | const TCollection_AsciiString& LDOMParser::GetError |
87 | (TCollection_AsciiString& aData) const |
88 | { |
89 | char * aStr =(char *)myCurrentData.str(); |
90 | aData = aStr; |
91 | delete [] aStr; |
92 | return myError; |
93 | } |
94 | |
95 | //======================================================================= |
96 | //function : parse |
97 | //purpose : |
98 | //======================================================================= |
99 | |
100 | Standard_Boolean LDOMParser::parse (istream& anInput) |
101 | { |
102 | // Open the DOM Document |
103 | myDocument = new LDOM_MemManager (20000); |
104 | myError.Clear(); |
105 | |
106 | // Create the Reader instance |
107 | if (myReader) delete myReader; |
108 | myReader = new LDOM_XmlReader (anInput, myDocument, myError); |
109 | |
110 | // Parse |
111 | return ParseDocument(); |
112 | } |
113 | |
114 | //======================================================================= |
115 | //function : parse |
116 | //purpose : |
117 | //======================================================================= |
118 | |
119 | Standard_Boolean LDOMParser::parse (const char * const aFileName) |
120 | { |
121 | // Open the DOM Document |
122 | myDocument = new LDOM_MemManager (20000); |
123 | myError.Clear (); |
124 | |
125 | // Open the file |
126 | int aFile = open (aFileName, O_RDONLY); |
127 | if (aFile < 0) { |
128 | myError = "Fatal XML error: Cannot open XML file"; |
129 | return Standard_True; |
130 | } |
131 | |
132 | // Create the Reader instance |
133 | if (myReader) delete myReader; |
134 | myReader = new LDOM_XmlReader (aFile, myDocument, myError); |
135 | |
136 | // Parse |
137 | Standard_Boolean isError = ParseDocument(); |
138 | close (aFile); |
139 | return isError; |
140 | } |
141 | |
142 | //======================================================================= |
143 | //function : ParseDocument |
144 | //purpose : parse the whole document (abstracted from the XML source) |
145 | //======================================================================= |
146 | |
147 | Standard_Boolean LDOMParser::ParseDocument () |
148 | { |
149 | Standard_Boolean isError = Standard_False; |
150 | Standard_Boolean isElement = Standard_False; |
151 | Standard_Boolean isHeader = Standard_False; |
152 | Standard_Boolean isDoctype = Standard_False; |
153 | |
154 | while (1) { |
155 | LDOM_XmlReader::RecordType aType = ReadRecord (*myReader, myCurrentData); |
156 | switch (aType) { |
157 | case LDOM_XmlReader::XML_HEADER: |
158 | if (isDoctype || isElement) { |
159 | myError = "Unexpected XML declaration"; |
160 | isError = Standard_True; |
161 | break; |
162 | } |
163 | isHeader = Standard_True; |
164 | continue; |
165 | case LDOM_XmlReader::XML_DOCTYPE: |
166 | if (isElement) { |
167 | myError = "Unexpected DOCTYPE declaration"; |
168 | isError = Standard_True; |
169 | break; |
170 | } |
171 | isDoctype = Standard_True; |
172 | case LDOM_XmlReader::XML_COMMENT: |
173 | continue; |
174 | case LDOM_XmlReader::XML_FULL_ELEMENT: |
175 | if (isElement == Standard_False) { |
176 | isElement = Standard_True; |
177 | myDocument -> myRootElement = &myReader -> GetElement (); |
178 | if (startElement()) { |
179 | isError = Standard_True; |
180 | myError = "User abort at startElement()"; |
181 | break; |
182 | } |
183 | if (endElement()) { |
184 | isError = Standard_True; |
185 | myError = "User abort at endElement()"; |
186 | break; |
187 | } |
188 | continue; |
189 | } |
190 | case LDOM_XmlReader::XML_START_ELEMENT: |
191 | if (isElement == Standard_False) { |
192 | isElement = Standard_True; |
193 | myDocument -> myRootElement = &myReader -> GetElement (); |
194 | if (startElement()) { |
195 | isError = Standard_True; |
196 | myError = "User abort at startElement()"; |
197 | break; |
198 | } |
199 | isError = ParseElement (); |
200 | if (isError) break; |
201 | continue; |
202 | } |
203 | isError = Standard_True; |
204 | myError = "Expected comment or end-of-file"; |
205 | case LDOM_XmlReader::XML_END_ELEMENT: |
206 | if (endElement()) { |
207 | isError = Standard_True; |
208 | myError = "User abort at endElement()"; |
209 | } |
210 | case LDOM_XmlReader::XML_EOF: |
211 | break; |
212 | case LDOM_XmlReader::XML_UNKNOWN: |
213 | if (isElement) { |
214 | default: |
215 | myError = "Unexpected data beyond the Document Element"; |
216 | } |
217 | isError = Standard_True; |
218 | } |
219 | break; |
220 | } |
221 | return isError; |
222 | } |
223 | |
224 | //======================================================================= |
225 | //function : ParseElement |
226 | //purpose : parse one element, given the type of its XML presentation |
227 | //======================================================================= |
228 | |
229 | Standard_Boolean LDOMParser::ParseElement () |
230 | { |
231 | Standard_Boolean isError = Standard_False; |
232 | const LDOM_BasicElement * aParent = &myReader->GetElement(); |
233 | const LDOM_BasicNode * aLastChild = NULL; |
234 | while (1) { |
235 | LDOM_Node::NodeType aLocType; |
236 | LDOMBasicString aTextValue; |
237 | char *aTextStr; |
238 | LDOM_XmlReader::RecordType aType = ReadRecord (* myReader, myCurrentData); |
239 | switch (aType) { |
240 | case LDOM_XmlReader::XML_UNKNOWN: |
241 | isError = Standard_True; |
242 | break; |
243 | case LDOM_XmlReader::XML_FULL_ELEMENT: |
244 | aParent -> AppendChild (&myReader -> GetElement(), aLastChild); |
245 | if (startElement()) { |
246 | isError = Standard_True; |
247 | myError = "User abort at startElement()"; |
248 | break; |
249 | } |
250 | if (endElement()) { |
251 | isError = Standard_True; |
252 | myError = "User abort at endElement()"; |
253 | break; |
254 | } |
255 | break; |
256 | case LDOM_XmlReader::XML_START_ELEMENT: |
257 | aParent -> AppendChild (&myReader -> GetElement(), aLastChild); |
258 | if (startElement()) { |
259 | isError = Standard_True; |
260 | myError = "User abort at startElement()"; |
261 | break; |
262 | } |
263 | isError = ParseElement (); |
264 | break; |
265 | case LDOM_XmlReader::XML_END_ELEMENT: |
266 | { |
267 | Standard_CString aParentName = Standard_CString(aParent->GetTagName()); |
268 | aTextStr = (char *)myCurrentData.str(); |
269 | if (strcmp(aTextStr, aParentName) != 0) { |
270 | myError = "Expected end tag \'"; |
271 | myError += aParentName; |
272 | myError += "\'"; |
273 | isError = Standard_True; |
274 | } |
275 | else if (endElement()) { |
276 | isError = Standard_True; |
277 | myError = "User abort at endElement()"; |
278 | } |
279 | delete [] aTextStr; |
280 | } |
281 | return isError; |
282 | case LDOM_XmlReader::XML_TEXT: |
283 | aLocType = LDOM_Node::TEXT_NODE; |
284 | { |
285 | Standard_Integer aTextLen; |
286 | aTextStr = LDOM_CharReference::Decode ((char *)myCurrentData.str(), aTextLen); |
287 | // try to convert to integer |
288 | if (IsDigit(aTextStr[0])) { |
289 | if (LDOM_XmlReader::getInteger (aTextValue, aTextStr, |
290 | aTextStr + aTextLen)) |
291 | aTextValue = LDOMBasicString (aTextStr, aTextLen, myDocument); |
292 | } else |
293 | aTextValue = LDOMBasicString (aTextStr, aTextLen, myDocument); |
294 | } |
295 | goto create_text_node; |
296 | case LDOM_XmlReader::XML_COMMENT: |
297 | aLocType = LDOM_Node::COMMENT_NODE; |
298 | { |
299 | Standard_Integer aTextLen; |
300 | aTextStr = LDOM_CharReference::Decode ((char *)myCurrentData.str(), aTextLen); |
301 | aTextValue = LDOMBasicString (aTextStr, aTextLen, myDocument); |
302 | } |
303 | goto create_text_node; |
304 | case LDOM_XmlReader::XML_CDATA: |
305 | aLocType = LDOM_Node::CDATA_SECTION_NODE; |
306 | aTextStr = (char *)myCurrentData.str(); |
307 | aTextValue = LDOMBasicString(aTextStr,myCurrentData.Length(),myDocument); |
308 | create_text_node: |
309 | { |
310 | LDOM_BasicNode& aTextNode = |
311 | LDOM_BasicText::Create (aLocType, aTextValue, myDocument); |
312 | aParent -> AppendChild (&aTextNode, aLastChild); |
313 | } |
314 | delete [] aTextStr; |
315 | break; |
316 | case LDOM_XmlReader::XML_EOF: |
317 | myError = "Inexpected end of file"; |
318 | isError = Standard_True; |
319 | break; |
320 | default: ; |
321 | } |
322 | if (isError) break; |
323 | } |
324 | return isError; |
325 | } |
326 | |
327 | //======================================================================= |
328 | //function : startElement |
329 | //purpose : virtual hook on 'StartElement' event for descendant classes |
330 | //======================================================================= |
331 | |
332 | Standard_Boolean LDOMParser::startElement () |
333 | { |
334 | return Standard_False; |
335 | } |
336 | |
337 | //======================================================================= |
338 | //function : endElement |
339 | //purpose : virtual hook on 'EndElement' event for descendant classes |
340 | //======================================================================= |
341 | |
342 | Standard_Boolean LDOMParser::endElement () |
343 | { |
344 | return Standard_False; |
345 | } |
346 | |
347 | //======================================================================= |
348 | //function : getCurrentElement |
349 | //purpose : |
350 | //======================================================================= |
351 | |
352 | LDOM_Element LDOMParser::getCurrentElement () const |
353 | { |
354 | return LDOM_Element (myReader -> GetElement(), myDocument); |
355 | } |
356 | |
357 | //======================================================================= |
358 | //function : getDocument |
359 | //purpose : |
360 | //======================================================================= |
361 | |
362 | LDOM_Document LDOMParser::getDocument () |
363 | { |
364 | return myDocument -> Self(); |
365 | } |
366 | |