1 // Created on: 2002-02-08
2 // Created by: Alexander GRIGORIEV
3 // Copyright (c) 2002-2014 OPEN CASCADE SAS
5 // This file is part of Open CASCADE Technology software library.
7 // This library is free software; you can redistribute it and/or modify it under
8 // the terms of the GNU Lesser General Public License version 2.1 as published
9 // by the Free Software Foundation, with special exception defined in the file
10 // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
11 // distribution for complete text of the license and disclaimer of any warranty.
13 // Alternatively, this file may be used under the terms of Open CASCADE
14 // commercial license or contractual agreement.
16 #include <LDOM_CharReference.hxx>
21 // Uncomment this line if you want that your XML files contain codes 0xc0-0xff
22 // as defined in Latin-1 code set. Otherwise these codes are written
23 // numerically as &#x..;
24 //#define LDOM_ALLOW_LATIN_1
28 const int NORMAL_C = 0;
29 const int CHAR_REF = -1;
30 const int ENTI_AMP = 1;
31 const int ENTI_LT = 2;
32 const int ENTI_GT = 3;
33 const int ENTI_QUOT = 4;
34 //const int ENTI_APOS = 5;
40 entityRef (const char * aName, const int aLen) : name(aName), length(aLen) {}
44 //=======================================================================
46 //purpose : Convertes entity and character references on input
47 // Always returns the same string (shortened after replacements)
48 //=======================================================================
50 char * LDOM_CharReference::Decode (char * theSrc, Standard_Integer& theLen)
52 #define IS_EQUAL(_ptr,_string) (!memcmp(_ptr, _string, sizeof(_string)-1))
54 char * aSrcPtr = theSrc, * aDstPtr = theSrc;
55 Standard_Integer anIncrCount = 0;
57 char * aPtr = strchr (aSrcPtr, '&');
60 aPtr = strchr (aSrcPtr, '\0');
62 theLen = (Standard_Integer)(aPtr - theSrc);
64 Standard_Integer aByteCount = (Standard_Integer)(aPtr - aSrcPtr);
65 memmove (aDstPtr, aSrcPtr, aByteCount + 1);
66 theLen = (Standard_Integer)(aDstPtr - theSrc) + aByteCount;
70 Standard_Integer aByteCount = (Standard_Integer)(aPtr - aSrcPtr);
71 if (aByteCount > 0 && aDstPtr != aSrcPtr)
72 memmove (aDstPtr, aSrcPtr, aByteCount);
74 if (aSrcPtr[1] == '#') {
77 aDstPtr = aSrcPtr - anIncrCount + 1;
78 if (aSrcPtr[2] == 'x')
79 aChar = strtoul (&aSrcPtr[3], &aNewPtr, 16); // hex encoding
81 aChar = strtoul (&aSrcPtr[2], &aNewPtr, 10); // decimal encoding
82 if (aNewPtr[0] != ';' || aChar == 0 || aChar > 255UL)
83 // Error reading an XML string
85 aDstPtr[-1] = (char) aChar;
86 anIncrCount += (Standard_Integer)(aNewPtr - aSrcPtr);
87 aSrcPtr = &aNewPtr[1];
89 else if (IS_EQUAL(aSrcPtr+1, "amp;")) {
90 aDstPtr = aSrcPtr - anIncrCount + 1;
95 else if (IS_EQUAL(aSrcPtr+1, "lt;")) {
96 aDstPtr = aSrcPtr - anIncrCount + 1;
101 else if (IS_EQUAL(aSrcPtr+1, "gt;")) {
102 aDstPtr = aSrcPtr - anIncrCount + 1;
107 else if (IS_EQUAL(aSrcPtr+1, "quot;")) {
108 aDstPtr = aSrcPtr - anIncrCount + 1;
113 else if (IS_EQUAL(aSrcPtr+1, "apos;")) {
114 aDstPtr = aSrcPtr - anIncrCount + 1;
120 aDstPtr = aSrcPtr - anIncrCount;
121 * aDstPtr++ = * aSrcPtr++;
128 //=======================================================================
130 //purpose : This method takes the input string theSrc and returns:
131 // - the pointer equal to theSrc if there are no replacements, or
132 // - the pointer to a newly allocated string with replacements
133 // The output parameter theLen is assigned to the length of
134 // the returned string (whatever the case)
135 //=======================================================================
137 char * LDOM_CharReference::Encode (const char* theSrc, Standard_Integer& theLen,
138 const Standard_Boolean isAttribute)
140 // Initialising the constants
141 static const struct entityRef entity_ref[6] = {
143 entityRef("&", 5),
144 entityRef("<", 4),
145 entityRef(">", 4),
146 entityRef(""", 6),
147 entityRef("'", 6)
150 const char * endSrc, * ptrSrc = theSrc;
151 char * aDest = (char *) theSrc;
152 Standard_Integer aCount = 0;
153 // Analyse if there is a non-standard character in the string
155 const unsigned int iSrc = (unsigned int ) *(const unsigned char* )ptrSrc;
160 if (myTab[iSrc] != NORMAL_C)
161 if (isAttribute || myTab[iSrc] != ENTI_QUOT)
165 // If there are such, copy the string with replacements
167 theLen = (Standard_Integer)(endSrc - theSrc);
169 char * ptrDest = new char [(endSrc - theSrc) + aCount * 5 + 1];
171 for (ptrSrc = theSrc; ptrSrc < endSrc; ptrSrc++) {
172 const unsigned int iSrc = (unsigned int ) *(const unsigned char* )ptrSrc;
173 const int aCode = myTab[iSrc];
174 if (aCode == NORMAL_C) // normal (regular) character
175 * ptrDest++ = * ptrSrc;
176 else if (aCode == CHAR_REF) { // character reference
177 sprintf (ptrDest, "&#x%02x;", iSrc);
179 } else // predefined entity reference
180 if (isAttribute == Standard_False && aCode == ENTI_QUOT)
181 * ptrDest++ = * ptrSrc;
183 memcpy (ptrDest, entity_ref[aCode].name, entity_ref[aCode].length+1);
184 ptrDest += entity_ref[aCode].length;
187 theLen = (Standard_Integer)(ptrDest - aDest);
193 int LDOM_CharReference::myTab [256] = {
233 // ENTI_APOS, // 027: ' Here we do never use apostrophe as delimiter
287 NORMAL_C, /* 05c: \ */
322 NORMAL_C, // 07f:
\7f
387 #ifdef LDOM_ALLOW_LATIN_1
519 #endif // LDOM_ALLOW_LATIN_1