- Introduces helper functions for consistent padded memory allocation across AsciiString and ExtendedString classes
- Improves documentation for UTF-8 conversion methods with clearer buffer allocation guidance
- Adds comprehensive test coverage for string constructors, memory allocation, and UTF-8 conversion functionality
// Alternatively, this file may be used under the terms of Open CASCADE
// commercial license or contractual agreement.
+#include <TCollection_ExtendedString.hxx>
#include <TCollection_AsciiString.hxx>
#include <gtest/gtest.h>
EXPECT_STREQ("abcde", aString.ToCString());
EXPECT_STREQ("fghij", remainder.ToCString());
}
+
+TEST(TCollection_AsciiStringTest, MemoryAllocation)
+{
+ // Test memory allocation with various string lengths
+ TCollection_AsciiString aString1("test");
+ EXPECT_EQ(4, aString1.Length());
+
+ // Test allocation with different lengths
+ for (Standard_Integer anIdx = 0; anIdx <= 20; ++anIdx)
+ {
+ TCollection_AsciiString aStr(anIdx, 'A');
+ EXPECT_EQ(anIdx, aStr.Length());
+ }
+}
+
+TEST(TCollection_AsciiStringTest, LengthConstructor)
+{
+ // Test constructor with string and maximum length
+ const Standard_CString aSourceString = "This is a very long string";
+
+ TCollection_AsciiString aString1(aSourceString, 4);
+ EXPECT_EQ(4, aString1.Length());
+ EXPECT_STREQ("This", aString1.ToCString());
+
+ TCollection_AsciiString aString2(aSourceString, 7);
+ EXPECT_EQ(7, aString2.Length());
+ EXPECT_STREQ("This is", aString2.ToCString());
+
+ // Test with length exceeding source string
+ TCollection_AsciiString aString3(aSourceString, 100);
+ EXPECT_EQ(26, aString3.Length());
+ EXPECT_STREQ(aSourceString, aString3.ToCString());
+}
+
+TEST(TCollection_AsciiStringTest, ExtendedStringConversion)
+{
+ // Test conversion from ExtendedString
+ TCollection_ExtendedString anExtString("Hello World");
+ TCollection_AsciiString anAsciiString(anExtString);
+
+ EXPECT_EQ(anExtString.Length(), anAsciiString.Length());
+ EXPECT_STREQ("Hello World", anAsciiString.ToCString());
+}
+
+TEST(TCollection_AsciiStringTest, NumericalConstructors)
+{
+ // Test integer constructor
+ TCollection_AsciiString anIntString(42);
+ EXPECT_STREQ("42", anIntString.ToCString());
+
+ // Test real constructor
+ TCollection_AsciiString aRealString(3.14);
+ const Standard_CString aRealCStr = aRealString.ToCString();
+ EXPECT_TRUE(strstr(aRealCStr, "3.14") != NULL);
+}
+
+TEST(TCollection_AsciiStringTest, FillerConstructor)
+{
+ // Test constructor with length and filler character
+ TCollection_AsciiString aFilledString(5, '*');
+ EXPECT_EQ(5, aFilledString.Length());
+ EXPECT_STREQ("*****", aFilledString.ToCString());
+}
+
+TEST(TCollection_AsciiStringTest, ConcatenationConstructors)
+{
+ // Test string + character constructor
+ TCollection_AsciiString aBaseString("Hello");
+ TCollection_AsciiString aStringWithChar(aBaseString, '!');
+ EXPECT_STREQ("Hello!", aStringWithChar.ToCString());
+
+ // Test string + C string constructor
+ TCollection_AsciiString aStringWithCStr(aBaseString, " World");
+ EXPECT_STREQ("Hello World", aStringWithCStr.ToCString());
+
+ // Test string + string constructor
+ TCollection_AsciiString aSecondString(" Universe");
+ TCollection_AsciiString aCombinedString(aBaseString, aSecondString);
+ EXPECT_STREQ("Hello Universe", aCombinedString.ToCString());
+}
+
+TEST(TCollection_AsciiStringTest, EdgeCases)
+{
+ // Test empty string operations
+ TCollection_AsciiString anEmptyString1;
+ TCollection_AsciiString anEmptyString2("");
+
+ EXPECT_TRUE(anEmptyString1.IsEqual(anEmptyString2));
+ EXPECT_EQ(0, anEmptyString1.Length());
+ EXPECT_TRUE(anEmptyString1.IsEmpty());
+
+ // Test null character handling
+ TCollection_AsciiString aNullCharString('\0');
+ EXPECT_EQ(0, aNullCharString.Length());
+ EXPECT_TRUE(aNullCharString.IsEmpty());
+}
+
+TEST(TCollection_AsciiStringTest, LargeStrings)
+{
+ // Test with large strings to verify memory allocation
+ const Standard_Integer aLargeSize = 1000;
+ TCollection_AsciiString aLargeString(aLargeSize, 'X');
+
+ EXPECT_EQ(aLargeSize, aLargeString.Length());
+ EXPECT_EQ('X', aLargeString.Value(1));
+ EXPECT_EQ('X', aLargeString.Value(aLargeSize));
+}
+
+TEST(TCollection_AsciiStringTest, PaddingSafety)
+{
+ // Test that internal padding works correctly for various lengths
+ for (Standard_Integer anIdx = 1; anIdx <= 16; ++anIdx)
+ {
+ TCollection_AsciiString aTestString(anIdx, 'A');
+ EXPECT_EQ(anIdx, aTestString.Length());
+
+ // Verify null termination
+ const Standard_CString aCString = aTestString.ToCString();
+ EXPECT_EQ('\0', aCString[anIdx]);
+
+ // Verify content
+ for (Standard_Integer aCharIdx = 0; aCharIdx < anIdx; ++aCharIdx)
+ {
+ EXPECT_EQ('A', aCString[aCharIdx]);
+ }
+ }
+}
TCollection_AsciiString asciiResult(aString);
EXPECT_STREQ("HellXX WXXrld", asciiResult.ToCString());
}
+
+TEST(TCollection_ExtendedStringTest, UTF8Conversion)
+{
+ // Test the LengthOfCString() and ToUTF8CString() combination
+ TCollection_ExtendedString aString("Hello World");
+
+ Standard_Integer aBufferSize = aString.LengthOfCString();
+ EXPECT_GT(aBufferSize, 0);
+
+ // Allocate buffer with +1 for null terminator (external usage pattern)
+ Standard_PCharacter aBuffer = new Standard_Character[aBufferSize + 1];
+ Standard_Integer anActualLength = aString.ToUTF8CString(aBuffer);
+
+ EXPECT_EQ(aBufferSize, anActualLength);
+ EXPECT_EQ('\0', aBuffer[anActualLength]);
+ EXPECT_STREQ("Hello World", aBuffer);
+
+ delete[] aBuffer;
+}
+
+TEST(TCollection_ExtendedStringTest, UTF8ConversionUnicode)
+{
+ // Test UTF-8 conversion with Unicode characters
+ const Standard_ExtCharacter aUnicodeStr[] =
+ {0x0048, 0x00E9, 0x006C, 0x006C, 0x006F, 0}; // "H(e-acute)llo"
+ TCollection_ExtendedString aString(aUnicodeStr);
+
+ Standard_Integer aBufferSize = aString.LengthOfCString();
+ EXPECT_GT(aBufferSize, 5); // Should be more than 5 due to UTF-8 encoding
+
+ Standard_PCharacter aBuffer = new Standard_Character[aBufferSize + 1];
+ Standard_Integer anActualLength = aString.ToUTF8CString(aBuffer);
+
+ EXPECT_EQ(aBufferSize, anActualLength);
+ EXPECT_EQ('\0', aBuffer[anActualLength]);
+
+ delete[] aBuffer;
+}
+
+TEST(TCollection_ExtendedStringTest, WideCharConstructor)
+{
+ // Test constructor with wide characters
+ const Standard_WideChar* aWideStr = L"Wide string test";
+ TCollection_ExtendedString aString(aWideStr);
+
+ EXPECT_GT(aString.Length(), 0);
+ EXPECT_FALSE(aString.IsEmpty());
+}
+
+TEST(TCollection_ExtendedStringTest, NumericalConstructors)
+{
+ // Test integer constructor
+ TCollection_ExtendedString anIntString(42);
+ TCollection_AsciiString anAsciiFromInt(anIntString);
+ EXPECT_STREQ("42", anAsciiFromInt.ToCString());
+
+ // Test real constructor
+ TCollection_ExtendedString aRealString(3.14);
+ TCollection_AsciiString anAsciiFromReal(aRealString);
+ const Standard_CString aRealCStr = anAsciiFromReal.ToCString();
+ EXPECT_TRUE(strstr(aRealCStr, "3.14") != NULL);
+}
+
+TEST(TCollection_ExtendedStringTest, FillerConstructor)
+{
+ // Test constructor with length and filler character
+ TCollection_ExtendedString aFilledString(5, 'X');
+ EXPECT_EQ(5, aFilledString.Length());
+
+ TCollection_AsciiString anAsciiFromFilled(aFilledString);
+ EXPECT_STREQ("XXXXX", anAsciiFromFilled.ToCString());
+}
+
+TEST(TCollection_ExtendedStringTest, ExtendedCharConstructor)
+{
+ // Test constructor with ExtendedCharacter
+ const Standard_ExtCharacter aEuroChar = 0x20AC; // Euro symbol
+ TCollection_ExtendedString aString(aEuroChar);
+
+ EXPECT_EQ(1, aString.Length());
+ EXPECT_FALSE(aString.IsAscii());
+ EXPECT_EQ(aEuroChar, aString.Value(1));
+}
+
+TEST(TCollection_ExtendedStringTest, UnicodeCharacters)
+{
+ // Test various Unicode characters
+ const Standard_ExtCharacter aLatinA = 0x0041; // 'A'
+ const Standard_ExtCharacter aLatinE = 0x00E9; // 'e-acute'
+ const Standard_ExtCharacter aEuro = 0x20AC; // Euro symbol
+ const Standard_ExtCharacter aCJK = 0x4E2D; // Chinese character
+
+ const Standard_ExtCharacter aUnicodeStr[] = {aLatinA, aLatinE, aEuro, aCJK, 0};
+ TCollection_ExtendedString aString(aUnicodeStr);
+
+ EXPECT_EQ(4, aString.Length());
+ EXPECT_EQ(aLatinA, aString.Value(1));
+ EXPECT_EQ(aLatinE, aString.Value(2));
+ EXPECT_EQ(aEuro, aString.Value(3));
+ EXPECT_EQ(aCJK, aString.Value(4));
+ EXPECT_FALSE(aString.IsAscii());
+}
+
+TEST(TCollection_ExtendedStringTest, AsciiDetection)
+{
+ // Test ASCII detection
+ TCollection_ExtendedString anAsciiString("Simple ASCII");
+ EXPECT_TRUE(anAsciiString.IsAscii());
+
+ const Standard_ExtCharacter aNonAsciiStr[] = {0x0041, 0x20AC, 0}; // A + Euro
+ TCollection_ExtendedString aNonAsciiString(aNonAsciiStr);
+ EXPECT_FALSE(aNonAsciiString.IsAscii());
+}
+
+TEST(TCollection_ExtendedStringTest, EmptyStringHandling)
+{
+ // Test empty string operations
+ TCollection_ExtendedString anEmptyString;
+ EXPECT_EQ(0, anEmptyString.Length());
+ EXPECT_TRUE(anEmptyString.IsEmpty());
+ EXPECT_EQ(0, anEmptyString.LengthOfCString());
+
+ Standard_PCharacter aBuffer = new Standard_Character[1];
+ Standard_Integer aLength = anEmptyString.ToUTF8CString(aBuffer);
+ EXPECT_EQ(0, aLength);
+ EXPECT_EQ('\0', aBuffer[0]);
+
+ delete[] aBuffer;
+}
+
+TEST(TCollection_ExtendedStringTest, ConversionRoundTrip)
+{
+ // Test AsciiString <-> ExtendedString conversion
+ const Standard_CString anOriginalStr = "Test conversion with special chars: !@#$%";
+
+ TCollection_AsciiString anAsciiOriginal(anOriginalStr);
+ TCollection_ExtendedString anExtendedConverted(anAsciiOriginal);
+ TCollection_AsciiString anAsciiRoundTrip(anExtendedConverted);
+
+ EXPECT_STREQ(anOriginalStr, anAsciiRoundTrip.ToCString());
+ EXPECT_EQ(anAsciiOriginal.Length(), anExtendedConverted.Length());
+ EXPECT_EQ(anAsciiOriginal.Length(), anAsciiRoundTrip.Length());
+}
+
+TEST(TCollection_ExtendedStringTest, LargeStrings)
+{
+ // Test with large strings
+ const Standard_Integer aLargeSize = 1000;
+ TCollection_ExtendedString aLargeString(aLargeSize, 'A');
+
+ EXPECT_EQ(aLargeSize, aLargeString.Length());
+ EXPECT_EQ('A', aLargeString.Value(1));
+ EXPECT_EQ('A', aLargeString.Value(aLargeSize));
+ EXPECT_TRUE(aLargeString.IsAscii());
+}
+
+TEST(TCollection_ExtendedStringTest, MemoryAllocation)
+{
+ // Test memory allocation with various string lengths
+ for (Standard_Integer anIdx = 1; anIdx <= 16; ++anIdx)
+ {
+ TCollection_ExtendedString aTestString(anIdx, 'X');
+ EXPECT_EQ(anIdx, aTestString.Length());
+ EXPECT_EQ('X', aTestString.Value(1));
+
+ if (anIdx > 1)
+ {
+ EXPECT_EQ('X', aTestString.Value(anIdx));
+ }
+ }
+}
+
+TEST(TCollection_ExtendedStringTest, MultiByteCString)
+{
+ // Test constructor with multibyte flag
+ const Standard_CString aMultiByteStr = "Multi-byte test";
+ TCollection_ExtendedString aString(aMultiByteStr, Standard_True);
+
+ EXPECT_GT(aString.Length(), 0);
+ EXPECT_FALSE(aString.IsEmpty());
+}
+
+TEST(TCollection_ExtendedStringTest, BoundaryValues)
+{
+ // Test boundary Unicode values
+ // Note: OCCT's IsAnAscii considers 0x00-0xFF as ASCII (full 8-bit range)
+ const Standard_ExtCharacter aLastStandardAscii = 0x007F;
+ const Standard_ExtCharacter aLastOCCTAscii = 0x00FF;
+ const Standard_ExtCharacter aFirstExtended = 0x0100;
+ const Standard_ExtCharacter aMaxBMP = 0xFFFF;
+
+ // Test individual characters
+ TCollection_ExtendedString aStringLastStandardAscii(aLastStandardAscii);
+ EXPECT_EQ(1, aStringLastStandardAscii.Length());
+ EXPECT_TRUE(aStringLastStandardAscii.IsAscii());
+
+ TCollection_ExtendedString aStringLastOCCTAscii(aLastOCCTAscii);
+ EXPECT_EQ(1, aStringLastOCCTAscii.Length());
+ EXPECT_TRUE(aStringLastOCCTAscii.IsAscii());
+
+ TCollection_ExtendedString aStringFirstExtended(aFirstExtended);
+ EXPECT_EQ(1, aStringFirstExtended.Length());
+ EXPECT_FALSE(aStringFirstExtended.IsAscii());
+
+ TCollection_ExtendedString aStringMaxBMP(aMaxBMP);
+ EXPECT_EQ(1, aStringMaxBMP.Length());
+ EXPECT_FALSE(aStringMaxBMP.IsAscii());
+}
namespace
{
static char THE_DEFAULT_CHAR_STRING[1] = {'\0'};
+
+//! Calculate padded allocation size: minimum +1 byte guaranteed, up to +4 bytes
+//! This provides automatic space for null terminator and some extra buffer, aligned to 4-byte
+//! boundary
+inline Standard_Size calculatePaddedSize(const int theLength)
+{
+ return (theLength + 4) & ~0x3; // Always guarantees at least +1 byte, up to +4 bytes
}
+} // namespace
// ----------------------------------------------------------------------------
// Create an empty AsciiString
else
{
// create UTF-8 string
+ // Note: allocate() adds padding (theLength + 4) & ~0x3, so no need for +1 for null terminator
allocate(astring.LengthOfCString());
astring.ToUTF8CString(mystring);
}
}
else
{
- const Standard_Size aRoundSize = (theLength + 4) & ~0x3;
+ const Standard_Size aRoundSize = calculatePaddedSize(theLength);
mystring = static_cast<Standard_PCharacter>(Standard::AllocateOptimal(aRoundSize));
mystring[mylength] = '\0';
}
{
if (mystring == THE_DEFAULT_CHAR_STRING)
{
- const Standard_Size aRoundSize = (theLength + 4) & ~0x3;
+ // Use same padding strategy as allocate() for consistency
+ const Standard_Size aRoundSize = calculatePaddedSize(theLength);
mystring = static_cast<Standard_PCharacter>(Standard::AllocateOptimal(aRoundSize));
}
else
{
- mystring = static_cast<Standard_PCharacter>(Standard::Reallocate(mystring, theLength + 1));
+ // For existing allocations, use padding size with Standard::Reallocate
+ const Standard_Size aRoundSize = calculatePaddedSize(theLength);
+ mystring = static_cast<Standard_PCharacter>(Standard::Reallocate(mystring, aRoundSize));
}
mystring[theLength] = '\0';
}
{
static Standard_ExtCharacter THE_DEFAULT_EXT_CHAR_STRING[1] = {0};
+//! Calculate padded allocation size for ExtendedString (2-byte characters)
+//! Guarantees at least +1 character space for null terminator, aligned to 4-byte boundary
+inline Standard_Size calculatePaddedSize(const int theLength)
+{
+ return (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3;
+}
+
//! Returns the number of 16-bit code units in Unicode string
template <typename T>
static Standard_Integer nbSymbols(const T* theUtfString)
{
return THE_DEFAULT_EXT_CHAR_STRING;
}
- const Standard_Size aRoundSize = (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3;
+ const Standard_Size aRoundSize = calculatePaddedSize(theLength);
Standard_ExtCharacter* aString =
static_cast<Standard_PExtCharacter>(Standard::AllocateOptimal(aRoundSize));
NCollection_UtfWideIter anIterRead(theUtfString);
{
return THE_DEFAULT_EXT_CHAR_STRING;
}
- const Standard_Size aRoundSize = (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3;
+ const Standard_Size aRoundSize = calculatePaddedSize(theLength);
Standard_ExtCharacter* aString =
static_cast<Standard_PExtCharacter>(Standard::AllocateOptimal(aRoundSize));
const Standard_Integer aSize = theLength * sizeof(Standard_ExtCharacter);
return Standard_True;
}
-//----------------------------------------------------------------------------
-// Returns expected CString length in UTF8 coding.
-//----------------------------------------------------------------------------
+//=================================================================================================
+
Standard_Integer TCollection_ExtendedString::LengthOfCString() const
{
Standard_Integer aSizeBytes = 0;
}
else
{
- const Standard_Size aRoundSize = (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3;
+ const Standard_Size aRoundSize = calculatePaddedSize(theLength);
mystring = static_cast<Standard_PExtCharacter>(Standard::AllocateOptimal(aRoundSize));
mystring[mylength] = '\0';
}
{
if (mystring == THE_DEFAULT_EXT_CHAR_STRING)
{
- const Standard_Size aRoundSize =
- (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3;
+ const Standard_Size aRoundSize = calculatePaddedSize(theLength);
mystring = static_cast<Standard_PExtCharacter>(Standard::AllocateOptimal(aRoundSize));
}
else
{
- mystring = static_cast<Standard_PExtCharacter>(
- Standard::Reallocate(mystring, (theLength + 1) * sizeof(Standard_ExtCharacter)));
+ // For reallocate, use padded size for consistency
+ const Standard_Size aRoundSize = calculatePaddedSize(theLength);
+ mystring = static_cast<Standard_PExtCharacter>(Standard::Reallocate(mystring, aRoundSize));
}
mystring[theLength] = 0;
}
//! <theCString> should be allocated before call!
Standard_EXPORT Standard_Integer ToUTF8CString(Standard_PCharacter& theCString) const;
- //! Returns expected CString length in UTF8 coding.
- //! It can be used for memory calculation before converting
- //! to CString containing symbols in UTF8 coding.
+ //! Returns expected CString length in UTF8 coding (like strlen, without null terminator).
+ //! It can be used for memory calculation before converting to CString containing symbols in UTF8
+ //! coding. For external allocation, use: char* buf = new char[str.LengthOfCString() + 1];
Standard_EXPORT Standard_Integer LengthOfCString() const;
private: