1 // Created on: 2002-02-08
2 // Created by: Alexander GRIGORIEV
3 // Copyright (c) 2002-2012 OPEN CASCADE SAS
5 // The content of this file is subject to the Open CASCADE Technology Public
6 // License Version 6.5 (the "License"). You may not use the content of this file
7 // except in compliance with the License. Please obtain a copy of the License
8 // at http://www.opencascade.org and read it completely before using this file.
10 // The Initial Developer of the Original Code is Open CASCADE S.A.S., having its
11 // main offices at: 1, place des Freres Montgolfier, 78280 Guyancourt, France.
13 // The Original Code and all software distributed under the License is
14 // distributed on an "AS IS" basis, without warranty of any kind, and the
15 // Initial Developer hereby disclaims all such warranties, including without
16 // limitation, any warranties of merchantability, fitness for a particular
17 // purpose or non-infringement. Please see the License for the specific terms
18 // and conditions governing the rights and limitations under the License.
21 #include <LDOM_CharReference.hxx>
26 // Uncomment this line if you want that your XML files contain codes 0xc0-0xff
27 // as defined in Latin-1 code set. Otherwise these codes are written
28 // numerically as &#x..;
29 //#define LDOM_ALLOW_LATIN_1
31 const int NORMAL_C = 0;
32 const int CHAR_REF = -1;
33 const int ENTI_AMP = 1;
34 const int ENTI_LT = 2;
35 const int ENTI_GT = 3;
36 const int ENTI_QUOT = 4;
37 const int ENTI_APOS = 5;
42 entityRef (const char * aName, const int aLen) : name(aName), length(aLen) {}
43 void operator= (const entityRef&);
46 //=======================================================================
48 //purpose : Convertes entity and character references on input
49 // Always returns the same string (shortened after replacements)
50 //=======================================================================
52 char * LDOM_CharReference::Decode (char * theSrc, Standard_Integer& theLen)
54 #define IS_EQUAL(_ptr,_string) (!memcmp(_ptr, _string, sizeof(_string)-1))
56 char * aSrcPtr = theSrc, * aDstPtr = theSrc;
57 Standard_Integer anIncrCount = 0;
59 char * aPtr = strchr (aSrcPtr, '&');
62 aPtr = strchr (aSrcPtr, '\0');
64 theLen = aPtr - theSrc;
66 Standard_Integer aByteCount = aPtr - aSrcPtr;
67 memmove (aDstPtr, aSrcPtr, aByteCount + 1);
68 theLen = (aDstPtr - theSrc) + aByteCount;
72 Standard_Integer aByteCount = aPtr - aSrcPtr;
73 if (aByteCount > 0 && aDstPtr != aSrcPtr)
74 memmove (aDstPtr, aSrcPtr, aByteCount);
76 if (aSrcPtr[1] == '#') {
79 aDstPtr = aSrcPtr - anIncrCount + 1;
80 if (aSrcPtr[2] == 'x')
81 aChar = strtoul (&aSrcPtr[3], &aNewPtr, 16); // hex encoding
83 aChar = strtoul (&aSrcPtr[2], &aNewPtr, 10); // decimal encoding
84 if (aNewPtr[0] != ';' || aChar == 0 || aChar > 255UL)
85 // Error reading an XML string
87 aDstPtr[-1] = (char) aChar;
88 anIncrCount += aNewPtr - aSrcPtr;
89 aSrcPtr = &aNewPtr[1];
91 else if (IS_EQUAL(aSrcPtr+1, "amp;")) {
92 aDstPtr = aSrcPtr - anIncrCount + 1;
97 else if (IS_EQUAL(aSrcPtr+1, "lt;")) {
98 aDstPtr = aSrcPtr - anIncrCount + 1;
103 else if (IS_EQUAL(aSrcPtr+1, "gt;")) {
104 aDstPtr = aSrcPtr - anIncrCount + 1;
109 else if (IS_EQUAL(aSrcPtr+1, "quot;")) {
110 aDstPtr = aSrcPtr - anIncrCount + 1;
115 else if (IS_EQUAL(aSrcPtr+1, "apos;")) {
116 aDstPtr = aSrcPtr - anIncrCount + 1;
122 aDstPtr = aSrcPtr - anIncrCount;
123 * aDstPtr++ = * aSrcPtr++;
130 //=======================================================================
132 //purpose : This method takes the input string theSrc and returns:
133 // - the pointer equal to theSrc if there are no replacements, or
134 // - the pointer to a newly allocated string with replacements
135 // The output parameter theLen is assigned to the length of
136 // the returned string (whatever the case)
137 //=======================================================================
139 char * LDOM_CharReference::Encode (const char* theSrc, Standard_Integer& theLen,
140 const Standard_Boolean isAttribute)
142 // Initialising the constants
143 static const struct entityRef entity_ref[6] = {
145 entityRef("&", 5),
146 entityRef("<", 4),
147 entityRef(">", 4),
148 entityRef(""", 6),
149 entityRef("'", 6)
152 const char * endSrc, * ptrSrc = theSrc;
153 char * aDest = (char *) theSrc;
154 Standard_Integer aCount = 0;
155 // Analyse if there is a non-standard character in the string
157 const unsigned int iSrc =
158 (const unsigned int) * (const unsigned char *) ptrSrc;
163 if (myTab[iSrc] != NORMAL_C)
164 if (isAttribute || myTab[iSrc] != ENTI_QUOT)
168 // If there are such, copy the string with replacements
170 theLen = endSrc - theSrc;
172 char * ptrDest = new char [(endSrc - theSrc) + aCount * 5 + 1];
174 for (ptrSrc = theSrc; ptrSrc < endSrc; ptrSrc++) {
175 const unsigned int iSrc =
176 (const unsigned int) * (const unsigned char *) ptrSrc;
177 const int aCode = myTab[iSrc];
178 if (aCode == NORMAL_C) // normal (regular) character
179 * ptrDest++ = * ptrSrc;
180 else if (aCode == CHAR_REF) { // character reference
181 sprintf (ptrDest, "&#x%02x;", iSrc);
183 } else // predefined entity reference
184 if (isAttribute == Standard_False && aCode == ENTI_QUOT)
185 * ptrDest++ = * ptrSrc;
187 memcpy (ptrDest, entity_ref[aCode].name, entity_ref[aCode].length+1);
188 ptrDest += entity_ref[aCode].length;
191 theLen = ptrDest - aDest;
197 int LDOM_CharReference::myTab [256] = {
237 // ENTI_APOS, // 027: ' Here we do never use apostrophe as delimiter
326 NORMAL_C, // 07f:
\7f
391 #ifdef LDOM_ALLOW_LATIN_1
523 #endif // LDOM_ALLOW_LATIN_1