1 // Created on: 2002-02-08
2 // Created by: Alexander GRIGORIEV
3 // Copyright (c) 2002-2014 OPEN CASCADE SAS
5 // This file is part of Open CASCADE Technology software library.
7 // This library is free software; you can redistribute it and/or modify it under
8 // the terms of the GNU Lesser General Public License version 2.1 as published
9 // by the Free Software Foundation, with special exception defined in the file
10 // OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
11 // distribution for complete text of the license and disclaimer of any warranty.
13 // Alternatively, this file may be used under the terms of Open CASCADE
14 // commercial license or contractual agreement.
16 #include <LDOM_CharReference.hxx>
21 // Uncomment this line if you want that your XML files contain codes 0xc0-0xff
22 // as defined in Latin-1 code set. Otherwise these codes are written
23 // numerically as &#x..;
24 //#define LDOM_ALLOW_LATIN_1
26 const int NORMAL_C = 0;
27 const int CHAR_REF = -1;
28 const int ENTI_AMP = 1;
29 const int ENTI_LT = 2;
30 const int ENTI_GT = 3;
31 const int ENTI_QUOT = 4;
32 //const int ENTI_APOS = 5;
37 entityRef (const char * aName, const int aLen) : name(aName), length(aLen) {}
38 void operator= (const entityRef&);
41 //=======================================================================
43 //purpose : Convertes entity and character references on input
44 // Always returns the same string (shortened after replacements)
45 //=======================================================================
47 char * LDOM_CharReference::Decode (char * theSrc, Standard_Integer& theLen)
49 #define IS_EQUAL(_ptr,_string) (!memcmp(_ptr, _string, sizeof(_string)-1))
51 char * aSrcPtr = theSrc, * aDstPtr = theSrc;
52 Standard_Integer anIncrCount = 0;
54 char * aPtr = strchr (aSrcPtr, '&');
57 aPtr = strchr (aSrcPtr, '\0');
59 theLen = (Standard_Integer)(aPtr - theSrc);
61 Standard_Integer aByteCount = (Standard_Integer)(aPtr - aSrcPtr);
62 memmove (aDstPtr, aSrcPtr, aByteCount + 1);
63 theLen = (Standard_Integer)(aDstPtr - theSrc) + aByteCount;
67 Standard_Integer aByteCount = (Standard_Integer)(aPtr - aSrcPtr);
68 if (aByteCount > 0 && aDstPtr != aSrcPtr)
69 memmove (aDstPtr, aSrcPtr, aByteCount);
71 if (aSrcPtr[1] == '#') {
74 aDstPtr = aSrcPtr - anIncrCount + 1;
75 if (aSrcPtr[2] == 'x')
76 aChar = strtoul (&aSrcPtr[3], &aNewPtr, 16); // hex encoding
78 aChar = strtoul (&aSrcPtr[2], &aNewPtr, 10); // decimal encoding
79 if (aNewPtr[0] != ';' || aChar == 0 || aChar > 255UL)
80 // Error reading an XML string
82 aDstPtr[-1] = (char) aChar;
83 anIncrCount += (Standard_Integer)(aNewPtr - aSrcPtr);
84 aSrcPtr = &aNewPtr[1];
86 else if (IS_EQUAL(aSrcPtr+1, "amp;")) {
87 aDstPtr = aSrcPtr - anIncrCount + 1;
92 else if (IS_EQUAL(aSrcPtr+1, "lt;")) {
93 aDstPtr = aSrcPtr - anIncrCount + 1;
98 else if (IS_EQUAL(aSrcPtr+1, "gt;")) {
99 aDstPtr = aSrcPtr - anIncrCount + 1;
104 else if (IS_EQUAL(aSrcPtr+1, "quot;")) {
105 aDstPtr = aSrcPtr - anIncrCount + 1;
110 else if (IS_EQUAL(aSrcPtr+1, "apos;")) {
111 aDstPtr = aSrcPtr - anIncrCount + 1;
117 aDstPtr = aSrcPtr - anIncrCount;
118 * aDstPtr++ = * aSrcPtr++;
125 //=======================================================================
127 //purpose : This method takes the input string theSrc and returns:
128 // - the pointer equal to theSrc if there are no replacements, or
129 // - the pointer to a newly allocated string with replacements
130 // The output parameter theLen is assigned to the length of
131 // the returned string (whatever the case)
132 //=======================================================================
134 char * LDOM_CharReference::Encode (const char* theSrc, Standard_Integer& theLen,
135 const Standard_Boolean isAttribute)
137 // Initialising the constants
138 static const struct entityRef entity_ref[6] = {
140 entityRef("&", 5),
141 entityRef("<", 4),
142 entityRef(">", 4),
143 entityRef(""", 6),
144 entityRef("'", 6)
147 const char * endSrc, * ptrSrc = theSrc;
148 char * aDest = (char *) theSrc;
149 Standard_Integer aCount = 0;
150 // Analyse if there is a non-standard character in the string
152 const unsigned int iSrc = (unsigned int ) *(const unsigned char* )ptrSrc;
157 if (myTab[iSrc] != NORMAL_C)
158 if (isAttribute || myTab[iSrc] != ENTI_QUOT)
162 // If there are such, copy the string with replacements
164 theLen = (Standard_Integer)(endSrc - theSrc);
166 char * ptrDest = new char [(endSrc - theSrc) + aCount * 5 + 1];
168 for (ptrSrc = theSrc; ptrSrc < endSrc; ptrSrc++) {
169 const unsigned int iSrc = (unsigned int ) *(const unsigned char* )ptrSrc;
170 const int aCode = myTab[iSrc];
171 if (aCode == NORMAL_C) // normal (regular) character
172 * ptrDest++ = * ptrSrc;
173 else if (aCode == CHAR_REF) { // character reference
174 sprintf (ptrDest, "&#x%02x;", iSrc);
176 } else // predefined entity reference
177 if (isAttribute == Standard_False && aCode == ENTI_QUOT)
178 * ptrDest++ = * ptrSrc;
180 memcpy (ptrDest, entity_ref[aCode].name, entity_ref[aCode].length+1);
181 ptrDest += entity_ref[aCode].length;
184 theLen = (Standard_Integer)(ptrDest - aDest);
190 int LDOM_CharReference::myTab [256] = {
230 // ENTI_APOS, // 027: ' Here we do never use apostrophe as delimiter
284 NORMAL_C, /* 05c: \ */
319 NORMAL_C, // 07f:
\7f
384 #ifdef LDOM_ALLOW_LATIN_1
516 #endif // LDOM_ALLOW_LATIN_1