// Alternatively, this file may be used under the terms of Open CASCADE
// commercial license or contractual agreement.
-//AGV 060302: Input from istream
+//AGV 060302: Input from std::istream
// AGV 130302: bug corr: was error if strlen(root_elem_name) < 7
#include <LDOM_XmlReader.hxx>
//#include <ctype.h>
const int XML_MIN_BUFFER = 10;
-const int FILE_NONVALUE = -1;
typedef enum {
STATE_WAITING = 0,
LDOM_XmlReader::LDOM_XmlReader (
const Handle(LDOM_MemManager)& theDocument,
- TCollection_AsciiString& theErrorString)
+ TCollection_AsciiString& theErrorString,
+ const Standard_Boolean theTagPerStep)
: myEOF (Standard_False),
myError (theErrorString),
myDocument (theDocument),
myElement (NULL),
myLastChild(NULL),
myPtr (&myBuffer[0]),
- myEndPtr (&myBuffer[0])
+ myEndPtr (&myBuffer[0]),
+ myTagPerStep (theTagPerStep),
+ myBOM (LDOM_OSStream::BOM_UNDEFINED)
{
}
const char * aStartData = NULL, * aNameEnd = NULL, * aPtr;
LDOMBasicString anAttrName, anAttrValue;
char anAttDelimiter = '\0';
+ Standard_Boolean aHasRead = Standard_False;
+ Standard_Boolean isFileStart = !myEOF && theIStream.tellg() == std::iostream::pos_type(0);
for(;;) {
// Check if the current file buffer is exhausted
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
// There should always be some bytes available in the buffer for analysis
Standard_Integer aBytesRest = (Standard_Integer)(myEndPtr - myPtr);
- if (aBytesRest < XML_MIN_BUFFER) {
- if (myEOF == Standard_True) {
+ if (aBytesRest < XML_MIN_BUFFER)
+ {
+ if (myEOF == Standard_True)
+ {
if (aBytesRest <= 0)
break; // END of processing
- } else {
- // If we are reading some data, save the beginning and preserve the state
+ }
+ else if (myTagPerStep && aHasRead)
+ {
+ // in myTagPerStep mode, we should parse the buffer to the end before
+ // getting more characters from the stream.
+ }
+ else
+ {
+ // If we are reading some data, save the beginning and preserve the state
if (aStartData /* && aState != STATE_WAITING */) {
if (myPtr > aStartData)
theData.rdbuf()->sputn(aStartData, myPtr - aStartData);
aStartData = &myBuffer[0];
}
- // Copy the rest of file data to the beginning of buffer
+ // Copy the rest of file data to the beginning of buffer
if (aBytesRest > 0)
- memcpy (&myBuffer[0], myPtr, aBytesRest);
+ {
+ // do not use memcpy here because aBytesRest may be greater than myPtr-myBuffer, so, overlap
+ memmove (&myBuffer[0], myPtr, aBytesRest);
+ }
- // Read the full buffer and reset start and end buffer pointers
+ // Read the full buffer and reset start and end buffer pointers
myPtr = &myBuffer[0];
Standard_Size aNBytes;
- theIStream.read (&myBuffer[aBytesRest],
- XML_BUFFER_SIZE - aBytesRest);
- aNBytes = (Standard_Size)theIStream.gcount();
+
+ if (myTagPerStep)
+ {
+ theIStream.getline (&myBuffer[aBytesRest], XML_BUFFER_SIZE - aBytesRest, '>');
+ aHasRead = Standard_True;
+ }
+ else
+ {
+ theIStream.read (&myBuffer[aBytesRest], XML_BUFFER_SIZE - aBytesRest);
+ }
+ aNBytes = (Standard_Size)theIStream.gcount();
+
if (aNBytes == 0)
+ {
myEOF = Standard_True; // END-OF-FILE
+ }
+ else if (myTagPerStep)
+ {
+ // replace \0 (being inserted by getline method) with >
+ myBuffer[aBytesRest + aNBytes - 1] = '>';
+ }
myEndPtr = &myBuffer[aBytesRest + aNBytes];
myBuffer[aBytesRest + aNBytes] = '\0';
}
}
+ if (isFileStart)
+ {
+ isFileStart = Standard_False;
+ // check for BOM block
+ Standard_Utf8UChar aFirstChar = Standard_Utf8UChar(myPtr[0]);
+ switch(aFirstChar) {
+ case 0xEF:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xBB && Standard_Utf8UChar(myPtr[2]) == 0xBF)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF8;
+ myPtr += 3;
+ }
+ break;
+ case 0xFE:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xFF)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF16BE;
+ myPtr += 2;
+ }
+ break;
+ case 0xFF:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xFE)
+ {
+ if (myPtr[2] == 0 && myPtr[3] == 0)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF32LE;
+ myPtr += 4;
+ }
+ else
+ {
+ myBOM = LDOM_OSStream::BOM_UTF16LE;
+ myPtr += 2;
+ }
+ }
+ break;
+ case 0x00:
+ if (myPtr[1] == 0 && Standard_Utf8UChar(myPtr[2]) == 0xFE && Standard_Utf8UChar(myPtr[3]) == 0xFF)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF32BE;
+ myPtr += 4;
+ }
+ break;
+ case 0x2B:
+ if (myPtr[1] == 47 && myPtr[2] == 118 &&
+ (myPtr[3] == 43 || myPtr[3] == 47 || myPtr[3] == 56 || myPtr[3] == 57))
+ {
+ myBOM = LDOM_OSStream::BOM_UTF7;
+ if (myPtr[3] == 56 && myPtr[3] == 45)
+ myPtr += 5;
+ else
+ myPtr += 4;
+ }
+ break;
+ case 0xF7:
+ if (myPtr[1] == 100 && myPtr[2] == 76)
+ {
+ myBOM = LDOM_OSStream::BOM_UTF1;
+ myPtr += 3;
+ }
+ break;
+ case 0xDD:
+ if (myPtr[1] == 115 && myPtr[2] == 102 && myPtr[3] == 115)
+ {
+ myBOM = LDOM_OSStream::BOM_UTFEBCDIC;
+ myPtr += 4;
+ }
+ break;
+ case 0x0E:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xFE && Standard_Utf8UChar(myPtr[2]) == 0xFF)
+ {
+ myBOM = LDOM_OSStream::BOM_SCSU;
+ myPtr += 3;
+ }
+ break;
+ case 0xFB:
+ if (Standard_Utf8UChar(myPtr[1]) == 0xEE && myPtr[2] == 40)
+ {
+ myBOM = LDOM_OSStream::BOM_BOCU1;
+ myPtr += 3;
+ }
+ break;
+ case 0x84:
+ if (myPtr[1] == 49 && Standard_Utf8UChar(myPtr[2]) == 0x95 && myPtr[3] == 51)
+ {
+ myBOM = LDOM_OSStream::BOM_GB18030;
+ myPtr += 4;
+ }
+ break;
+ }
+ if (myBOM != LDOM_OSStream::BOM_UNDEFINED)
+ continue;
+ }
// Check the character data
switch (aState) {
} // otherwise ERROR
} // end of switch
myError = "Unknown XML object: ";
- myError += TCollection_AsciiString ((const Standard_CString)myPtr,
- XML_MIN_BUFFER);
+ myError += TCollection_AsciiString (myPtr, XML_MIN_BUFFER);
return XML_UNKNOWN;
case '\0':
if (myEOF == Standard_True) continue;
+ Standard_FALLTHROUGH
default:
// Limitation: we do not treat '&' as special character
aPtr = (const char *) memchr (myPtr, '<', myEndPtr - myPtr);
aState = STATE_TEXT;
aStartData = myPtr;
myPtr = myEndPtr;
+ aHasRead = Standard_False;
} // end of checking in STATE_WAITING
continue;
return XML_HEADER;
}
myPtr = myEndPtr - 1;
+ aHasRead = Standard_False;
continue;
// Checking the characters in STATE_DOCTYPE, seek for "]>" sequence
}
}
myPtr = myEndPtr - 1;
+ aHasRead = Standard_False;
continue;
state_doctype_markup:
return XML_DOCTYPE;
}
myPtr = myEndPtr - 1;
+ aHasRead = Standard_False;
continue;
// Checking the characters in STATE_COMMENT, seek for "-->" sequence
}
}
myPtr = myEndPtr - 2;
+ aHasRead = Standard_False;
continue;
// Checking the characters in STATE_TEXT, seek for "<"
return XML_TEXT;
}
myPtr = myEndPtr;
+ aHasRead = Standard_False;
continue;
// Checking the characters in STATE_CDATA, seek for "]]"
return XML_CDATA;
}
myPtr = myEndPtr - 1;
+ aHasRead = Standard_False;
continue;
// Checking the characters in STATE_ELEMENT, seek the end of TagName
switch (myPtr[0]) {
case '=' :
aState = STATE_ATTRIBUTE_VALUE;
+ Standard_FALLTHROUGH
case ' ' :
case '\t':
case '\n':
myPtr = aPtr + 1;
aStartData = NULL;
aState = STATE_ATTRIBUTE_NAME;
- } else
+ }
+ else {
myPtr = myEndPtr;
+ aHasRead = Standard_False;
+ }
continue;
}
// Checking the characters in STATE_ELEMENT_END, seek for ">"
return XML_END_ELEMENT;
}
myPtr = myEndPtr;
+ aHasRead = Standard_False;
continue;
}
}
aNameEnd = aPtr;
return Standard_False;
}
+ Standard_FALLTHROUGH
case '.' :
case '-' :
case '_' :
return aResult;
}
+//=======================================================================
+//function : CreateElement
+//purpose :
+//=======================================================================
+void LDOM_XmlReader::CreateElement( const char *theName, const Standard_Integer theLen )
+{
+ myElement = &LDOM_BasicElement::Create (theName, theLen, myDocument);
+}
+
//=======================================================================
//function : getInteger
//purpose : Try to initialize theValue as Integer; return False on success