0027960: Configuration - fix compilation of OSD_Directory with MinGW-w64
[occt.git] / src / NCollection / NCollection_UtfString.hxx
CommitLineData
a174a3c5 1// Created on: 2013-01-28
2// Created by: Kirill GAVRILOV
d5f74e42 3// Copyright (c) 2013-2014 OPEN CASCADE SAS
a174a3c5 4//
973c2be1 5// This file is part of Open CASCADE Technology software library.
a174a3c5 6//
d5f74e42 7// This library is free software; you can redistribute it and/or modify it under
8// the terms of the GNU Lesser General Public License version 2.1 as published
973c2be1 9// by the Free Software Foundation, with special exception defined in the file
10// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
11// distribution for complete text of the license and disclaimer of any warranty.
a174a3c5 12//
973c2be1 13// Alternatively, this file may be used under the terms of Open CASCADE
14// commercial license or contractual agreement.
a174a3c5 15
16#ifndef _NCollection_UtfString_H__
17#define _NCollection_UtfString_H__
18
19#include "NCollection_UtfIterator.hxx"
20
21#include <Standard.hxx>
22
2cb44241 23#include <cstring>
a174a3c5 24#include <cstdlib>
25
26//! This template class represent constant UTF-* string.
27//! String stored in memory continuously, always NULL-terminated
28//! and can be used as standard C-string using ToCString() method.
29//!
30//! Notice that changing the string is not allowed
31//! and any modifications should produce new string.
32template<typename Type>
33class NCollection_UtfString
34{
35
36public:
37
38 NCollection_UtfIterator<Type> Iterator() const
39 {
40 return NCollection_UtfIterator<Type> (myString);
41 }
42
43 //! @return the size of the buffer, excluding NULL-termination symbol
44 Standard_Integer Size() const
45 {
46 return mySize;
47 }
48
49 //! @return the length of the string in Unicode symbols
50 Standard_Integer Length() const
51 {
52 return myLength;
53 }
54
55 //! Retrieve Unicode symbol at specified position.
56 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
57 //! @param theCharIndex the index of the symbol, should be lesser than Length()
58 //! @return the Unicode symbol value
59 Standard_Utf32Char GetChar (const Standard_Integer theCharIndex) const;
60
61 //! Retrieve string buffer at specified position.
62 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
63 //! @param theCharIndex the index of the symbol, should be lesser than Length()
64 //! @return the pointer to the symbol
65 const Type* GetCharBuffer (const Standard_Integer theCharIndex) const;
66
67 //! Retrieve Unicode symbol at specified position.
68 //! Warning! This is a slow access. Iterator should be used for consecutive parsing.
69 Standard_Utf32Char operator[] (const Standard_Integer theCharIndex) const
70 {
71 return GetChar (theCharIndex);
72 }
73
74 //! Initialize empty string.
75 NCollection_UtfString();
76
77 //! Copy constructor.
78 //! @param theCopy string to copy.
79 NCollection_UtfString (const NCollection_UtfString& theCopy);
80
81 //! Copy constructor from NULL-terminated UTF-8 string.
82 //! @param theCopyUtf8 NULL-terminated UTF-8 string to copy
83 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
84 NCollection_UtfString (const char* theCopyUtf8,
85 const Standard_Integer theLength = -1);
86
87 //! Copy constructor from NULL-terminated UTF-16 string.
88 //! @param theCopyUtf16 NULL-terminated UTF-16 string to copy
89 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
90 NCollection_UtfString (const Standard_Utf16Char* theCopyUtf16,
91 const Standard_Integer theLength = -1);
92
93 //! Copy constructor from NULL-terminated UTF-32 string.
94 //! @param theCopyUtf32 NULL-terminated UTF-32 string to copy
95 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
96 NCollection_UtfString (const Standard_Utf32Char* theCopyUtf32,
97 const Standard_Integer theLength = -1);
98
15173be5 99#if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) || (defined(_MSC_VER) && _MSC_VER >= 1900)
a174a3c5 100 //! Copy constructor from NULL-terminated wide UTF string.
101 //! @param theCopyUtfWide NULL-terminated wide UTF string to copy
102 //! @param theLength the length limit in Unicode symbols (NOT bytes!)
fb0b0531 103 //!
104 //! This constructor is undefined if Standard_WideChar is the same type as Standard_Utf16Char.
a174a3c5 105 NCollection_UtfString (const Standard_WideChar* theCopyUtfWide,
106 const Standard_Integer theLength = -1);
fb0b0531 107#endif
a174a3c5 108
109 //! Copy from NULL-terminated Unicode string.
110 //! @param theStringUtf NULL-terminated Unicode string
111 //! @param theLength the length limit in Unicode symbols
112 template <typename TypeFrom>
113 void FromUnicode (const TypeFrom* theStringUtf,
114 const Standard_Integer theLength = -1);
115
116 //! Copy from NULL-terminated multibyte string in system locale.
117 //! You should avoid this function unless extreme necessity.
118 //! @param theString NULL-terminated multibyte string
119 //! @param theLength the length limit in Unicode symbols
120 void FromLocale (const char* theString,
121 const Standard_Integer theLength = -1);
122
123 //! Destructor.
124 ~NCollection_UtfString();
125
126 //! Compares this string with another one.
127 bool IsEqual (const NCollection_UtfString& theCompare) const;
128
129 //! Returns the substring.
130 //! @param theStart start index (inclusive) of subString
131 //! @param theEnd end index (exclusive) of subString
132 //! @return the substring
133 NCollection_UtfString SubString (const Standard_Integer theStart,
134 const Standard_Integer theEnd) const;
135
136 //! Returns NULL-terminated Unicode string.
137 //! Should not be modifed or deleted!
138 //! @return (const Type* ) pointer to string
139 const Type* ToCString() const
140 {
141 return myString;
142 }
143
144 //! @return copy in UTF-8 format
145 const NCollection_UtfString<Standard_Utf8Char> ToUtf8() const;
146
147 //! @return copy in UTF-16 format
148 const NCollection_UtfString<Standard_Utf16Char> ToUtf16() const;
149
150 //! @return copy in UTF-32 format
151 const NCollection_UtfString<Standard_Utf32Char> ToUtf32() const;
152
153 //! @return copy in wide format (UTF-16 on Windows and UTF-32 on Linux)
154 const NCollection_UtfString<Standard_WideChar> ToUtfWide() const;
155
156 //! Converts the string into multibyte string.
157 //! You should avoid this function unless extreme necessity.
158 //! @param theBuffer output buffer
159 //! @param theSizeBytes buffer size in bytes
160 //! @return true on success
161 bool ToLocale (char* theBuffer,
162 const Standard_Integer theSizeBytes) const;
163
164 //! @return true if string is empty
165 bool IsEmpty() const
166 {
167 return myString[0] == Type(0);
168 }
169
170 //! Zero string.
171 void Clear();
172
173public: //! @name assign operators
174
175 //! Copy from another string.
176 const NCollection_UtfString& operator= (const NCollection_UtfString& theOther);
177
178 //! Copy from UTF-8 NULL-terminated string.
179 const NCollection_UtfString& operator= (const char* theStringUtf8);
180
181 //! Copy from wchar_t UTF NULL-terminated string.
182 const NCollection_UtfString& operator= (const Standard_WideChar* theStringUtfWide);
183
184 //! Join strings.
185 NCollection_UtfString& operator+= (const NCollection_UtfString& theAppend);
186
187 //! Join two strings.
188 friend NCollection_UtfString operator+ (const NCollection_UtfString& theLeft,
189 const NCollection_UtfString& theRight)
190 {
191 NCollection_UtfString aSumm;
192 strFree (aSumm.myString);
193 aSumm.mySize = theLeft.mySize + theRight.mySize;
194 aSumm.myLength = theLeft.myLength + theRight.myLength;
195 aSumm.myString = strAlloc (aSumm.mySize);
196
197 // copy bytes
198 strCopy ((Standard_Byte* )aSumm.myString, (const Standard_Byte* )theLeft.myString, theLeft.mySize);
199 strCopy ((Standard_Byte* )aSumm.myString + theLeft.mySize, (const Standard_Byte* )theRight.myString, theRight.mySize);
200 return aSumm;
201 }
202
203public: //! @name compare operators
204
205 bool operator== (const NCollection_UtfString& theCompare) const
206 {
207 return IsEqual (theCompare);
208 }
209 bool operator!= (const NCollection_UtfString& theCompare) const;
210
211private: //! @name low-level methods
212
213 //! Compute advance for specified string.
214 //! @param theStringUtf pointer to the NULL-terminated Unicode string
215 //! @param theLengthMax length limit (to cut the string), set to -1 to compute up to NULL-termination symbol
216 //! @param theSizeBytes advance in bytes (out)
217 //! @param theLength string length (out)
218 template<typename TypeFrom>
219 static void strGetAdvance (const TypeFrom* theStringUtf,
220 const Standard_Integer theLengthMax,
221 Standard_Integer& theSizeBytes,
222 Standard_Integer& theLength);
223
224 //! Allocate NULL-terminated string buffer.
225 static Type* strAlloc (const Standard_Size theSizeBytes)
226 {
227 Type* aPtr = (Type* )Standard::Allocate (theSizeBytes + sizeof(Type));
228 if (aPtr != NULL)
229 {
230 // always NULL-terminate the string
231 aPtr[theSizeBytes / sizeof(Type)] = Type(0);
232 }
233 return aPtr;
234 }
235
236 //! Release string buffer and nullify the pointer.
237 static void strFree (Type*& thePtr)
238 {
547702a1 239 Standard::Free (thePtr);
a174a3c5 240 }
241
242 //! Provides bytes interface to avoid incorrect pointer arithmetics.
243 static void strCopy (Standard_Byte* theStrDst,
244 const Standard_Byte* theStrSrc,
245 const Standard_Integer theSizeBytes)
246 {
2cb44241 247 std::memcpy (theStrDst, theStrSrc, (Standard_Size )theSizeBytes);
a174a3c5 248 }
249
250 //! Compare two Unicode strings per-byte.
251 static bool strAreEqual (const Type* theString1,
252 const Standard_Integer theSizeBytes1,
253 const Type* theString2,
254 const Standard_Integer theSizeBytes2)
255 {
256 return (theSizeBytes1 == theSizeBytes2)
2cb44241 257 && (std::memcmp (theString1, theString2, (Standard_Size )theSizeBytes1) == 0);
a174a3c5 258 }
259
260private: //! @name private fields
261
262 Type* myString; //!< string buffer
263 Standard_Integer mySize; //!< buffer size in bytes, excluding NULL-termination symbol
264 Standard_Integer myLength; //!< length of the string in Unicode symbols (cached value, excluding NULL-termination symbol)
265
266};
267
268typedef NCollection_UtfString<Standard_Utf8Char> NCollection_Utf8String;
269typedef NCollection_UtfString<Standard_Utf16Char> NCollection_Utf16String;
270typedef NCollection_UtfString<Standard_Utf32Char> NCollection_Utf32String;
271typedef NCollection_UtfString<Standard_WideChar> NCollection_UtfWideString;
272
273// template implementation (inline methods)
274#include "NCollection_UtfString.lxx"
275
276#endif // _NCollection_UtfString_H__