// Alternatively, this file may be used under the terms of Open CASCADE
// commercial license or contractual agreement.
-//AGV 060302: Input from istream
+//AGV 060302: Input from std::istream
// AGV 130302: bug corr: was error if strlen(root_elem_name) < 7
#include <LDOM_XmlReader.hxx>
myLastChild(NULL),
myPtr (&myBuffer[0]),
myEndPtr (&myBuffer[0]),
- myTagPerStep (theTagPerStep)
+ myTagPerStep (theTagPerStep),
+ myBOM (LDOM_OSStream::BOM_UNDEFINED)
{
}
LDOMBasicString anAttrName, anAttrValue;
char anAttDelimiter = '\0';
Standard_Boolean aHasRead = Standard_False;
+ Standard_Boolean isFileStart = !myEOF && theIStream.tellg() == std::iostream::pos_type(0);
for(;;) {
// Check if the current file buffer is exhausted
}
else
{
- // If we are reading some data, save the beginning and preserve the state
+ // If we are reading some data, save the beginning and preserve the state
if (aStartData /* && aState != STATE_WAITING */) {
if (myPtr > aStartData)
theData.rdbuf()->sputn(aStartData, myPtr - aStartData);
aStartData = &myBuffer[0];
}
- // Copy the rest of file data to the beginning of buffer
+ // Copy the rest of file data to the beginning of buffer
if (aBytesRest > 0)
- memcpy (&myBuffer[0], myPtr, aBytesRest);
+ {
+ // do not use memcpy here because aBytesRest may be greater than myPtr-myBuffer, so, overlap
+ memmove (&myBuffer[0], myPtr, aBytesRest);
+ }
- // Read the full buffer and reset start and end buffer pointers
+ // Read the full buffer and reset start and end buffer pointers
myPtr = &myBuffer[0];
Standard_Size aNBytes;
myBuffer[aBytesRest + aNBytes] = '\0';
}
}
+ if (isFileStart)
+ {
+ isFileStart = Standard_False;
+ // check for BOM block
+ Standard_Utf8UChar aFirstChar = Standard_Utf8UChar(myPtr[0]);
+ switch(aFirstChar) {
+ case 0xEF:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xBB && Standard_Utf8UChar(myPtr[2]) == 0xBF)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF8;
+ myPtr += 3;
+ }
+ break;
+ case 0xFE:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xFF)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF16BE;
+ myPtr += 2;
+ }
+ break;
+ case 0xFF:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xFE)
+ {
+ if (myPtr[2] == 0 && myPtr[3] == 0)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF32LE;
+ myPtr += 4;
+ }
+ else
+ {
+ myBOM = LDOM_OSStream::BOM_UTF16LE;
+ myPtr += 2;
+ }
+ }
+ break;
+ case 0x00:
+ if (myPtr[1] == 0 && Standard_Utf8UChar(myPtr[2]) == 0xFE && Standard_Utf8UChar(myPtr[3]) == 0xFF)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF32BE;
+ myPtr += 4;
+ }
+ break;
+ case 0x2B:
+ if (myPtr[1] == 47 && myPtr[2] == 118 &&
+ (myPtr[3] == 43 || myPtr[3] == 47 || myPtr[3] == 56 || myPtr[3] == 57))
+ {
+ myBOM = LDOM_OSStream::BOM_UTF7;
+ if (myPtr[3] == 56 && myPtr[3] == 45)
+ myPtr += 5;
+ else
+ myPtr += 4;
+ }
+ break;
+ case 0xF7:
+ if (myPtr[1] == 100 && myPtr[2] == 76)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF1;
+ myPtr += 3;
+ }
+ break;
+ case 0xDD:
+ if (myPtr[1] == 115 && myPtr[2] == 102 && myPtr[3] == 115)
+ {
+ myBOM = LDOM_OSStream::BOM_UTFEBCDIC;
+ myPtr += 4;
+ }
+ break;
+ case 0x0E:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xFE && Standard_Utf8UChar(myPtr[2]) == 0xFF)
+ {
+ myBOM = LDOM_OSStream::BOM_SCSU;
+ myPtr += 3;
+ }
+ break;
+ case 0xFB:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xEE && myPtr[2] == 40)
+ {
+ myBOM = LDOM_OSStream::BOM_BOCU1;
+ myPtr += 3;
+ }
+ break;
+ case 0x84:
+ if (myPtr[1] == 49 && Standard_Utf8UChar(myPtr[2]) == 0x95 && myPtr[3] == 51)
+ {
+ myBOM = LDOM_OSStream::BOM_GB18030;
+ myPtr += 4;
+ }
+ break;
+ }
+ if (myBOM != LDOM_OSStream::BOM_UNDEFINED)
+ continue;
+ }
// Check the character data
switch (aState) {
} // otherwise ERROR
} // end of switch
myError = "Unknown XML object: ";
- myError += TCollection_AsciiString ((const Standard_CString)myPtr,
- XML_MIN_BUFFER);
+ myError += TCollection_AsciiString (myPtr, XML_MIN_BUFFER);
return XML_UNKNOWN;
case '\0':
if (myEOF == Standard_True) continue;
+ Standard_FALLTHROUGH
default:
// Limitation: we do not treat '&' as special character
aPtr = (const char *) memchr (myPtr, '<', myEndPtr - myPtr);
switch (myPtr[0]) {
case '=' :
aState = STATE_ATTRIBUTE_VALUE;
+ Standard_FALLTHROUGH
case ' ' :
case '\t':
case '\n':
aNameEnd = aPtr;
return Standard_False;
}
+ Standard_FALLTHROUGH
case '.' :
case '-' :
case '_' :