// Created on: 2001-07-20
// Created by: Alexander GRIGORIEV
-// Copyright (c) 2001-2012 OPEN CASCADE SAS
+// Copyright (c) 2001-2014 OPEN CASCADE SAS
//
-// The content of this file is subject to the Open CASCADE Technology Public
-// License Version 6.5 (the "License"). You may not use the content of this file
-// except in compliance with the License. Please obtain a copy of the License
-// at http://www.opencascade.org and read it completely before using this file.
+// This file is part of Open CASCADE Technology software library.
//
-// The Initial Developer of the Original Code is Open CASCADE S.A.S., having its
-// main offices at: 1, place des Freres Montgolfier, 78280 Guyancourt, France.
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License version 2.1 as published
+// by the Free Software Foundation, with special exception defined in the file
+// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
+// distribution for complete text of the license and disclaimer of any warranty.
//
-// The Original Code and all software distributed under the License is
-// distributed on an "AS IS" basis, without warranty of any kind, and the
-// Initial Developer hereby disclaims all such warranties, including without
-// limitation, any warranties of merchantability, fitness for a particular
-// purpose or non-infringement. Please see the License for the specific terms
-// and conditions governing the rights and limitations under the License.
-
-//AGV 060302: Input from istream
+// Alternatively, this file may be used under the terms of Open CASCADE
+// commercial license or contractual agreement.
+
+//AGV 060302: Input from std::istream
// AGV 130302: bug corr: was error if strlen(root_elem_name) < 7
#include <LDOM_XmlReader.hxx>
#include <string.h>
#include <errno.h>
-#ifdef WNT
+#ifdef _MSC_VER
#include <io.h>
#else
#include <unistd.h>
//#include <ctype.h>
const int XML_MIN_BUFFER = 10;
-const int MAX_ATTRIBUTES = 512;
-const int FILE_NONVALUE = -1;
typedef enum {
STATE_WAITING = 0,
//purpose : Constructor (file descriptor)
//=======================================================================
-LDOM_XmlReader::LDOM_XmlReader (const int aFileDes,
- const Handle(LDOM_MemManager)& aDocument,
- TCollection_AsciiString& anErrorString)
- : myEOF (Standard_False),
- myFileDes (aFileDes),
-#ifdef WNT
- myIStream (cin), // one quirk of MSVC6.0: can't initialise by 0
-#else
- myIStream (* (istream *) UndefinedHandleAddress),
-#endif
- myError (anErrorString),
- myDocument (aDocument),
- myPtr (&myBuffer[0]),
- myEndPtr (&myBuffer[0])
-{}
-
-//=======================================================================
-//function : LDOM_XmlReader()
-//purpose : Constructor (istream)
-//=======================================================================
-
-LDOM_XmlReader::LDOM_XmlReader (istream& anInput,
- const Handle(LDOM_MemManager)& aDocument,
- TCollection_AsciiString& anErrorString)
- : myEOF (Standard_False),
- myFileDes (FILE_NONVALUE),
- myIStream (anInput),
- myError (anErrorString),
- myDocument (aDocument),
- myPtr (&myBuffer[0]),
- myEndPtr (&myBuffer[0])
-{}
+LDOM_XmlReader::LDOM_XmlReader (
+ const Handle(LDOM_MemManager)& theDocument,
+ TCollection_AsciiString& theErrorString,
+ const Standard_Boolean theTagPerStep)
+: myEOF (Standard_False),
+ myError (theErrorString),
+ myDocument (theDocument),
+ myElement (NULL),
+ myLastChild(NULL),
+ myPtr (&myBuffer[0]),
+ myEndPtr (&myBuffer[0]),
+ myTagPerStep (theTagPerStep),
+ myBOM (LDOM_OSStream::BOM_UNDEFINED)
+{
+}
//=======================================================================
//function : ReadRecord
//purpose : Read a record from XML file
//=======================================================================
-LDOM_XmlReader::RecordType LDOM_XmlReader::ReadRecord
- (LDOM_OSStream& theData)
+LDOM_XmlReader::RecordType LDOM_XmlReader::ReadRecord (Standard_IStream& theIStream,
+ LDOM_OSStream& theData)
{
theData.Clear();
myError.Clear();
const char * aStartData = NULL, * aNameEnd = NULL, * aPtr;
LDOMBasicString anAttrName, anAttrValue;
char anAttDelimiter = '\0';
+ Standard_Boolean aHasRead = Standard_False;
+ Standard_Boolean isFileStart = !myEOF && theIStream.tellg() == std::iostream::pos_type(0);
for(;;) {
// Check if the current file buffer is exhausted
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// There should always be some bytes available in the buffer for analysis
Standard_Integer aBytesRest = (Standard_Integer)(myEndPtr - myPtr);
- if (aBytesRest < XML_MIN_BUFFER) {
- if (myEOF == Standard_True) {
+ if (aBytesRest < XML_MIN_BUFFER)
+ {
+ if (myEOF == Standard_True)
+ {
if (aBytesRest <= 0)
break; // END of processing
- } else {
- // If we are reading some data, save the beginning and preserve the state
+ }
+ else if (myTagPerStep && aHasRead)
+ {
+ // in myTagPerStep mode, we should parse the buffer to the end before
+ // getting more characters from the stream.
+ }
+ else
+ {
+ // If we are reading some data, save the beginning and preserve the state
if (aStartData /* && aState != STATE_WAITING */) {
if (myPtr > aStartData)
theData.rdbuf()->sputn(aStartData, myPtr - aStartData);
aStartData = &myBuffer[0];
}
- // Copy the rest of file data to the beginning of buffer
+ // Copy the rest of file data to the beginning of buffer
if (aBytesRest > 0)
- memcpy (&myBuffer[0], myPtr, aBytesRest);
+ {
+ // do not use memcpy here because aBytesRest may be greater than myPtr-myBuffer, so, overlap
+ memmove (&myBuffer[0], myPtr, aBytesRest);
+ }
- // Read the full buffer and reset start and end buffer pointers
+ // Read the full buffer and reset start and end buffer pointers
myPtr = &myBuffer[0];
Standard_Size aNBytes;
- if (myFileDes != FILE_NONVALUE)
- aNBytes = read (myFileDes, &myBuffer[aBytesRest],
- XML_BUFFER_SIZE - aBytesRest);
- else {
- myIStream.read (&myBuffer[aBytesRest],
- XML_BUFFER_SIZE - aBytesRest);
- aNBytes = (Standard_Size)myIStream.gcount();
+
+ if (myTagPerStep)
+ {
+ theIStream.getline (&myBuffer[aBytesRest], XML_BUFFER_SIZE - aBytesRest, '>');
+ aHasRead = Standard_True;
}
+ else
+ {
+ theIStream.read (&myBuffer[aBytesRest], XML_BUFFER_SIZE - aBytesRest);
+ }
+ aNBytes = (Standard_Size)theIStream.gcount();
+
if (aNBytes == 0)
+ {
myEOF = Standard_True; // END-OF-FILE
+ }
+ else if (myTagPerStep)
+ {
+ // replace \0 (being inserted by getline method) with >
+ myBuffer[aBytesRest + aNBytes - 1] = '>';
+ }
myEndPtr = &myBuffer[aBytesRest + aNBytes];
myBuffer[aBytesRest + aNBytes] = '\0';
}
}
+ if (isFileStart)
+ {
+ isFileStart = Standard_False;
+ // check for BOM block
+ Standard_Utf8UChar aFirstChar = Standard_Utf8UChar(myPtr[0]);
+ switch(aFirstChar) {
+ case 0xEF:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xBB && Standard_Utf8UChar(myPtr[2]) == 0xBF)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF8;
+ myPtr += 3;
+ }
+ break;
+ case 0xFE:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xFF)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF16BE;
+ myPtr += 2;
+ }
+ break;
+ case 0xFF:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xFE)
+ {
+ if (myPtr[2] == 0 && myPtr[3] == 0)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF32LE;
+ myPtr += 4;
+ }
+ else
+ {
+ myBOM = LDOM_OSStream::BOM_UTF16LE;
+ myPtr += 2;
+ }
+ }
+ break;
+ case 0x00:
+ if (myPtr[1] == 0 && Standard_Utf8UChar(myPtr[2]) == 0xFE && Standard_Utf8UChar(myPtr[3]) == 0xFF)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF32BE;
+ myPtr += 4;
+ }
+ break;
+ case 0x2B:
+ if (myPtr[1] == 47 && myPtr[2] == 118 &&
+ (myPtr[3] == 43 || myPtr[3] == 47 || myPtr[3] == 56 || myPtr[3] == 57))
+ {
+ myBOM = LDOM_OSStream::BOM_UTF7;
+ if (myPtr[3] == 56 && myPtr[3] == 45)
+ myPtr += 5;
+ else
+ myPtr += 4;
+ }
+ break;
+ case 0xF7:
+ if (myPtr[1] == 100 && myPtr[2] == 76)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF1;
+ myPtr += 3;
+ }
+ break;
+ case 0xDD:
+ if (myPtr[1] == 115 && myPtr[2] == 102 && myPtr[3] == 115)
+ {
+ myBOM = LDOM_OSStream::BOM_UTFEBCDIC;
+ myPtr += 4;
+ }
+ break;
+ case 0x0E:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xFE && Standard_Utf8UChar(myPtr[2]) == 0xFF)
+ {
+ myBOM = LDOM_OSStream::BOM_SCSU;
+ myPtr += 3;
+ }
+ break;
+ case 0xFB:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xEE && myPtr[2] == 40)
+ {
+ myBOM = LDOM_OSStream::BOM_BOCU1;
+ myPtr += 3;
+ }
+ break;
+ case 0x84:
+ if (myPtr[1] == 49 && Standard_Utf8UChar(myPtr[2]) == 0x95 && myPtr[3] == 51)
+ {
+ myBOM = LDOM_OSStream::BOM_GB18030;
+ myPtr += 4;
+ }
+ break;
+ }
+ if (myBOM != LDOM_OSStream::BOM_UNDEFINED)
+ continue;
+ }
// Check the character data
switch (aState) {
} // otherwise ERROR
} // end of switch
myError = "Unknown XML object: ";
- myError += TCollection_AsciiString ((const Standard_CString)myPtr,
- XML_MIN_BUFFER);
+ myError += TCollection_AsciiString (myPtr, XML_MIN_BUFFER);
return XML_UNKNOWN;
case '\0':
if (myEOF == Standard_True) continue;
+ Standard_FALLTHROUGH
default:
// Limitation: we do not treat '&' as special character
aPtr = (const char *) memchr (myPtr, '<', myEndPtr - myPtr);
aState = STATE_TEXT;
aStartData = myPtr;
myPtr = myEndPtr;
+ aHasRead = Standard_False;
} // end of checking in STATE_WAITING
continue;
return XML_HEADER;
}
myPtr = myEndPtr - 1;
+ aHasRead = Standard_False;
continue;
// Checking the characters in STATE_DOCTYPE, seek for "]>" sequence
}
}
myPtr = myEndPtr - 1;
+ aHasRead = Standard_False;
continue;
state_doctype_markup:
return XML_DOCTYPE;
}
myPtr = myEndPtr - 1;
+ aHasRead = Standard_False;
continue;
// Checking the characters in STATE_COMMENT, seek for "-->" sequence
}
}
myPtr = myEndPtr - 2;
+ aHasRead = Standard_False;
continue;
// Checking the characters in STATE_TEXT, seek for "<"
return XML_TEXT;
}
myPtr = myEndPtr;
+ aHasRead = Standard_False;
continue;
// Checking the characters in STATE_CDATA, seek for "]]"
return XML_CDATA;
}
myPtr = myEndPtr - 1;
+ aHasRead = Standard_False;
continue;
// Checking the characters in STATE_ELEMENT, seek the end of TagName
myError = "Improper element tag termination";
else {
myPtr += 2;
-#ifdef DEB
+#ifdef OCCT_DEBUG
theData.Clear();
theData << myElement->GetTagName();
#endif
return XML_UNKNOWN;
}
++ myPtr;
-#ifdef DEB
+#ifdef OCCT_DEBUG
theData.Clear();
theData << myElement->GetTagName();
#endif
switch (myPtr[0]) {
case '=' :
aState = STATE_ATTRIBUTE_VALUE;
+ Standard_FALLTHROUGH
case ' ' :
case '\t':
case '\n':
myPtr = aPtr + 1;
aStartData = NULL;
aState = STATE_ATTRIBUTE_NAME;
- } else
+ }
+ else {
myPtr = myEndPtr;
+ aHasRead = Standard_False;
+ }
continue;
}
// Checking the characters in STATE_ELEMENT_END, seek for ">"
return XML_END_ELEMENT;
}
myPtr = myEndPtr;
+ aHasRead = Standard_False;
continue;
}
}
aNameEnd = aPtr;
return Standard_False;
}
+ Standard_FALLTHROUGH
case '.' :
case '-' :
case '_' :
return aResult;
}
+//=======================================================================
+//function : CreateElement
+//purpose :
+//=======================================================================
+void LDOM_XmlReader::CreateElement( const char *theName, const Standard_Integer theLen )
+{
+ myElement = &LDOM_BasicElement::Create (theName, theLen, myDocument);
+}
+
//=======================================================================
//function : getInteger
//purpose : Try to initialize theValue as Integer; return False on success