From: Pasukhin Dmitry Date: Fri, 8 Aug 2025 09:31:54 +0000 (+0100) Subject: Foundation Classes - Documentation update for TCollection (#665) X-Git-Url: http://git.dev.opencascade.org/gitweb/?a=commitdiff_plain;h=3c14e29dfc507e76c39011370b73d907783ddc12;p=occt.git Foundation Classes - Documentation update for TCollection (#665) - Introduces helper functions for consistent padded memory allocation across AsciiString and ExtendedString classes - Improves documentation for UTF-8 conversion methods with clearer buffer allocation guidance - Adds comprehensive test coverage for string constructors, memory allocation, and UTF-8 conversion functionality --- diff --git a/src/FoundationClasses/TKernel/GTests/TCollection_AsciiString_Test.cxx b/src/FoundationClasses/TKernel/GTests/TCollection_AsciiString_Test.cxx index 7249dd9ff6..75b02fb8f9 100644 --- a/src/FoundationClasses/TKernel/GTests/TCollection_AsciiString_Test.cxx +++ b/src/FoundationClasses/TKernel/GTests/TCollection_AsciiString_Test.cxx @@ -11,6 +11,7 @@ // Alternatively, this file may be used under the terms of Open CASCADE // commercial license or contractual agreement. +#include #include #include @@ -231,3 +232,130 @@ TEST(TCollection_AsciiStringTest, Split) EXPECT_STREQ("abcde", aString.ToCString()); EXPECT_STREQ("fghij", remainder.ToCString()); } + +TEST(TCollection_AsciiStringTest, MemoryAllocation) +{ + // Test memory allocation with various string lengths + TCollection_AsciiString aString1("test"); + EXPECT_EQ(4, aString1.Length()); + + // Test allocation with different lengths + for (Standard_Integer anIdx = 0; anIdx <= 20; ++anIdx) + { + TCollection_AsciiString aStr(anIdx, 'A'); + EXPECT_EQ(anIdx, aStr.Length()); + } +} + +TEST(TCollection_AsciiStringTest, LengthConstructor) +{ + // Test constructor with string and maximum length + const Standard_CString aSourceString = "This is a very long string"; + + TCollection_AsciiString aString1(aSourceString, 4); + EXPECT_EQ(4, aString1.Length()); + EXPECT_STREQ("This", aString1.ToCString()); + + TCollection_AsciiString aString2(aSourceString, 7); + EXPECT_EQ(7, aString2.Length()); + EXPECT_STREQ("This is", aString2.ToCString()); + + // Test with length exceeding source string + TCollection_AsciiString aString3(aSourceString, 100); + EXPECT_EQ(26, aString3.Length()); + EXPECT_STREQ(aSourceString, aString3.ToCString()); +} + +TEST(TCollection_AsciiStringTest, ExtendedStringConversion) +{ + // Test conversion from ExtendedString + TCollection_ExtendedString anExtString("Hello World"); + TCollection_AsciiString anAsciiString(anExtString); + + EXPECT_EQ(anExtString.Length(), anAsciiString.Length()); + EXPECT_STREQ("Hello World", anAsciiString.ToCString()); +} + +TEST(TCollection_AsciiStringTest, NumericalConstructors) +{ + // Test integer constructor + TCollection_AsciiString anIntString(42); + EXPECT_STREQ("42", anIntString.ToCString()); + + // Test real constructor + TCollection_AsciiString aRealString(3.14); + const Standard_CString aRealCStr = aRealString.ToCString(); + EXPECT_TRUE(strstr(aRealCStr, "3.14") != NULL); +} + +TEST(TCollection_AsciiStringTest, FillerConstructor) +{ + // Test constructor with length and filler character + TCollection_AsciiString aFilledString(5, '*'); + EXPECT_EQ(5, aFilledString.Length()); + EXPECT_STREQ("*****", aFilledString.ToCString()); +} + +TEST(TCollection_AsciiStringTest, ConcatenationConstructors) +{ + // Test string + character constructor + TCollection_AsciiString aBaseString("Hello"); + TCollection_AsciiString aStringWithChar(aBaseString, '!'); + EXPECT_STREQ("Hello!", aStringWithChar.ToCString()); + + // Test string + C string constructor + TCollection_AsciiString aStringWithCStr(aBaseString, " World"); + EXPECT_STREQ("Hello World", aStringWithCStr.ToCString()); + + // Test string + string constructor + TCollection_AsciiString aSecondString(" Universe"); + TCollection_AsciiString aCombinedString(aBaseString, aSecondString); + EXPECT_STREQ("Hello Universe", aCombinedString.ToCString()); +} + +TEST(TCollection_AsciiStringTest, EdgeCases) +{ + // Test empty string operations + TCollection_AsciiString anEmptyString1; + TCollection_AsciiString anEmptyString2(""); + + EXPECT_TRUE(anEmptyString1.IsEqual(anEmptyString2)); + EXPECT_EQ(0, anEmptyString1.Length()); + EXPECT_TRUE(anEmptyString1.IsEmpty()); + + // Test null character handling + TCollection_AsciiString aNullCharString('\0'); + EXPECT_EQ(0, aNullCharString.Length()); + EXPECT_TRUE(aNullCharString.IsEmpty()); +} + +TEST(TCollection_AsciiStringTest, LargeStrings) +{ + // Test with large strings to verify memory allocation + const Standard_Integer aLargeSize = 1000; + TCollection_AsciiString aLargeString(aLargeSize, 'X'); + + EXPECT_EQ(aLargeSize, aLargeString.Length()); + EXPECT_EQ('X', aLargeString.Value(1)); + EXPECT_EQ('X', aLargeString.Value(aLargeSize)); +} + +TEST(TCollection_AsciiStringTest, PaddingSafety) +{ + // Test that internal padding works correctly for various lengths + for (Standard_Integer anIdx = 1; anIdx <= 16; ++anIdx) + { + TCollection_AsciiString aTestString(anIdx, 'A'); + EXPECT_EQ(anIdx, aTestString.Length()); + + // Verify null termination + const Standard_CString aCString = aTestString.ToCString(); + EXPECT_EQ('\0', aCString[anIdx]); + + // Verify content + for (Standard_Integer aCharIdx = 0; aCharIdx < anIdx; ++aCharIdx) + { + EXPECT_EQ('A', aCString[aCharIdx]); + } + } +} diff --git a/src/FoundationClasses/TKernel/GTests/TCollection_ExtendedString_Test.cxx b/src/FoundationClasses/TKernel/GTests/TCollection_ExtendedString_Test.cxx index a261578ad7..2088fe054e 100644 --- a/src/FoundationClasses/TKernel/GTests/TCollection_ExtendedString_Test.cxx +++ b/src/FoundationClasses/TKernel/GTests/TCollection_ExtendedString_Test.cxx @@ -205,3 +205,211 @@ TEST(TCollection_ExtendedStringTest, ChangeAll) TCollection_AsciiString asciiResult(aString); EXPECT_STREQ("HellXX WXXrld", asciiResult.ToCString()); } + +TEST(TCollection_ExtendedStringTest, UTF8Conversion) +{ + // Test the LengthOfCString() and ToUTF8CString() combination + TCollection_ExtendedString aString("Hello World"); + + Standard_Integer aBufferSize = aString.LengthOfCString(); + EXPECT_GT(aBufferSize, 0); + + // Allocate buffer with +1 for null terminator (external usage pattern) + Standard_PCharacter aBuffer = new Standard_Character[aBufferSize + 1]; + Standard_Integer anActualLength = aString.ToUTF8CString(aBuffer); + + EXPECT_EQ(aBufferSize, anActualLength); + EXPECT_EQ('\0', aBuffer[anActualLength]); + EXPECT_STREQ("Hello World", aBuffer); + + delete[] aBuffer; +} + +TEST(TCollection_ExtendedStringTest, UTF8ConversionUnicode) +{ + // Test UTF-8 conversion with Unicode characters + const Standard_ExtCharacter aUnicodeStr[] = + {0x0048, 0x00E9, 0x006C, 0x006C, 0x006F, 0}; // "H(e-acute)llo" + TCollection_ExtendedString aString(aUnicodeStr); + + Standard_Integer aBufferSize = aString.LengthOfCString(); + EXPECT_GT(aBufferSize, 5); // Should be more than 5 due to UTF-8 encoding + + Standard_PCharacter aBuffer = new Standard_Character[aBufferSize + 1]; + Standard_Integer anActualLength = aString.ToUTF8CString(aBuffer); + + EXPECT_EQ(aBufferSize, anActualLength); + EXPECT_EQ('\0', aBuffer[anActualLength]); + + delete[] aBuffer; +} + +TEST(TCollection_ExtendedStringTest, WideCharConstructor) +{ + // Test constructor with wide characters + const Standard_WideChar* aWideStr = L"Wide string test"; + TCollection_ExtendedString aString(aWideStr); + + EXPECT_GT(aString.Length(), 0); + EXPECT_FALSE(aString.IsEmpty()); +} + +TEST(TCollection_ExtendedStringTest, NumericalConstructors) +{ + // Test integer constructor + TCollection_ExtendedString anIntString(42); + TCollection_AsciiString anAsciiFromInt(anIntString); + EXPECT_STREQ("42", anAsciiFromInt.ToCString()); + + // Test real constructor + TCollection_ExtendedString aRealString(3.14); + TCollection_AsciiString anAsciiFromReal(aRealString); + const Standard_CString aRealCStr = anAsciiFromReal.ToCString(); + EXPECT_TRUE(strstr(aRealCStr, "3.14") != NULL); +} + +TEST(TCollection_ExtendedStringTest, FillerConstructor) +{ + // Test constructor with length and filler character + TCollection_ExtendedString aFilledString(5, 'X'); + EXPECT_EQ(5, aFilledString.Length()); + + TCollection_AsciiString anAsciiFromFilled(aFilledString); + EXPECT_STREQ("XXXXX", anAsciiFromFilled.ToCString()); +} + +TEST(TCollection_ExtendedStringTest, ExtendedCharConstructor) +{ + // Test constructor with ExtendedCharacter + const Standard_ExtCharacter aEuroChar = 0x20AC; // Euro symbol + TCollection_ExtendedString aString(aEuroChar); + + EXPECT_EQ(1, aString.Length()); + EXPECT_FALSE(aString.IsAscii()); + EXPECT_EQ(aEuroChar, aString.Value(1)); +} + +TEST(TCollection_ExtendedStringTest, UnicodeCharacters) +{ + // Test various Unicode characters + const Standard_ExtCharacter aLatinA = 0x0041; // 'A' + const Standard_ExtCharacter aLatinE = 0x00E9; // 'e-acute' + const Standard_ExtCharacter aEuro = 0x20AC; // Euro symbol + const Standard_ExtCharacter aCJK = 0x4E2D; // Chinese character + + const Standard_ExtCharacter aUnicodeStr[] = {aLatinA, aLatinE, aEuro, aCJK, 0}; + TCollection_ExtendedString aString(aUnicodeStr); + + EXPECT_EQ(4, aString.Length()); + EXPECT_EQ(aLatinA, aString.Value(1)); + EXPECT_EQ(aLatinE, aString.Value(2)); + EXPECT_EQ(aEuro, aString.Value(3)); + EXPECT_EQ(aCJK, aString.Value(4)); + EXPECT_FALSE(aString.IsAscii()); +} + +TEST(TCollection_ExtendedStringTest, AsciiDetection) +{ + // Test ASCII detection + TCollection_ExtendedString anAsciiString("Simple ASCII"); + EXPECT_TRUE(anAsciiString.IsAscii()); + + const Standard_ExtCharacter aNonAsciiStr[] = {0x0041, 0x20AC, 0}; // A + Euro + TCollection_ExtendedString aNonAsciiString(aNonAsciiStr); + EXPECT_FALSE(aNonAsciiString.IsAscii()); +} + +TEST(TCollection_ExtendedStringTest, EmptyStringHandling) +{ + // Test empty string operations + TCollection_ExtendedString anEmptyString; + EXPECT_EQ(0, anEmptyString.Length()); + EXPECT_TRUE(anEmptyString.IsEmpty()); + EXPECT_EQ(0, anEmptyString.LengthOfCString()); + + Standard_PCharacter aBuffer = new Standard_Character[1]; + Standard_Integer aLength = anEmptyString.ToUTF8CString(aBuffer); + EXPECT_EQ(0, aLength); + EXPECT_EQ('\0', aBuffer[0]); + + delete[] aBuffer; +} + +TEST(TCollection_ExtendedStringTest, ConversionRoundTrip) +{ + // Test AsciiString <-> ExtendedString conversion + const Standard_CString anOriginalStr = "Test conversion with special chars: !@#$%"; + + TCollection_AsciiString anAsciiOriginal(anOriginalStr); + TCollection_ExtendedString anExtendedConverted(anAsciiOriginal); + TCollection_AsciiString anAsciiRoundTrip(anExtendedConverted); + + EXPECT_STREQ(anOriginalStr, anAsciiRoundTrip.ToCString()); + EXPECT_EQ(anAsciiOriginal.Length(), anExtendedConverted.Length()); + EXPECT_EQ(anAsciiOriginal.Length(), anAsciiRoundTrip.Length()); +} + +TEST(TCollection_ExtendedStringTest, LargeStrings) +{ + // Test with large strings + const Standard_Integer aLargeSize = 1000; + TCollection_ExtendedString aLargeString(aLargeSize, 'A'); + + EXPECT_EQ(aLargeSize, aLargeString.Length()); + EXPECT_EQ('A', aLargeString.Value(1)); + EXPECT_EQ('A', aLargeString.Value(aLargeSize)); + EXPECT_TRUE(aLargeString.IsAscii()); +} + +TEST(TCollection_ExtendedStringTest, MemoryAllocation) +{ + // Test memory allocation with various string lengths + for (Standard_Integer anIdx = 1; anIdx <= 16; ++anIdx) + { + TCollection_ExtendedString aTestString(anIdx, 'X'); + EXPECT_EQ(anIdx, aTestString.Length()); + EXPECT_EQ('X', aTestString.Value(1)); + + if (anIdx > 1) + { + EXPECT_EQ('X', aTestString.Value(anIdx)); + } + } +} + +TEST(TCollection_ExtendedStringTest, MultiByteCString) +{ + // Test constructor with multibyte flag + const Standard_CString aMultiByteStr = "Multi-byte test"; + TCollection_ExtendedString aString(aMultiByteStr, Standard_True); + + EXPECT_GT(aString.Length(), 0); + EXPECT_FALSE(aString.IsEmpty()); +} + +TEST(TCollection_ExtendedStringTest, BoundaryValues) +{ + // Test boundary Unicode values + // Note: OCCT's IsAnAscii considers 0x00-0xFF as ASCII (full 8-bit range) + const Standard_ExtCharacter aLastStandardAscii = 0x007F; + const Standard_ExtCharacter aLastOCCTAscii = 0x00FF; + const Standard_ExtCharacter aFirstExtended = 0x0100; + const Standard_ExtCharacter aMaxBMP = 0xFFFF; + + // Test individual characters + TCollection_ExtendedString aStringLastStandardAscii(aLastStandardAscii); + EXPECT_EQ(1, aStringLastStandardAscii.Length()); + EXPECT_TRUE(aStringLastStandardAscii.IsAscii()); + + TCollection_ExtendedString aStringLastOCCTAscii(aLastOCCTAscii); + EXPECT_EQ(1, aStringLastOCCTAscii.Length()); + EXPECT_TRUE(aStringLastOCCTAscii.IsAscii()); + + TCollection_ExtendedString aStringFirstExtended(aFirstExtended); + EXPECT_EQ(1, aStringFirstExtended.Length()); + EXPECT_FALSE(aStringFirstExtended.IsAscii()); + + TCollection_ExtendedString aStringMaxBMP(aMaxBMP); + EXPECT_EQ(1, aStringMaxBMP.Length()); + EXPECT_FALSE(aStringMaxBMP.IsAscii()); +} diff --git a/src/FoundationClasses/TKernel/TCollection/TCollection_AsciiString.cxx b/src/FoundationClasses/TKernel/TCollection/TCollection_AsciiString.cxx index 5e51c871b9..6c8f9b2c32 100644 --- a/src/FoundationClasses/TKernel/TCollection/TCollection_AsciiString.cxx +++ b/src/FoundationClasses/TKernel/TCollection/TCollection_AsciiString.cxx @@ -30,7 +30,15 @@ namespace { static char THE_DEFAULT_CHAR_STRING[1] = {'\0'}; + +//! Calculate padded allocation size: minimum +1 byte guaranteed, up to +4 bytes +//! This provides automatic space for null terminator and some extra buffer, aligned to 4-byte +//! boundary +inline Standard_Size calculatePaddedSize(const int theLength) +{ + return (theLength + 4) & ~0x3; // Always guarantees at least +1 byte, up to +4 bytes } +} // namespace // ---------------------------------------------------------------------------- // Create an empty AsciiString @@ -214,6 +222,7 @@ TCollection_AsciiString::TCollection_AsciiString(const TCollection_ExtendedStrin else { // create UTF-8 string + // Note: allocate() adds padding (theLength + 4) & ~0x3, so no need for +1 for null terminator allocate(astring.LengthOfCString()); astring.ToUTF8CString(mystring); } @@ -1340,7 +1349,7 @@ void TCollection_AsciiString::allocate(const int theLength) } else { - const Standard_Size aRoundSize = (theLength + 4) & ~0x3; + const Standard_Size aRoundSize = calculatePaddedSize(theLength); mystring = static_cast(Standard::AllocateOptimal(aRoundSize)); mystring[mylength] = '\0'; } @@ -1354,12 +1363,15 @@ void TCollection_AsciiString::reallocate(const int theLength) { if (mystring == THE_DEFAULT_CHAR_STRING) { - const Standard_Size aRoundSize = (theLength + 4) & ~0x3; + // Use same padding strategy as allocate() for consistency + const Standard_Size aRoundSize = calculatePaddedSize(theLength); mystring = static_cast(Standard::AllocateOptimal(aRoundSize)); } else { - mystring = static_cast(Standard::Reallocate(mystring, theLength + 1)); + // For existing allocations, use padding size with Standard::Reallocate + const Standard_Size aRoundSize = calculatePaddedSize(theLength); + mystring = static_cast(Standard::Reallocate(mystring, aRoundSize)); } mystring[theLength] = '\0'; } diff --git a/src/FoundationClasses/TKernel/TCollection/TCollection_ExtendedString.cxx b/src/FoundationClasses/TKernel/TCollection/TCollection_ExtendedString.cxx index 3ac022b514..418a79f6ac 100644 --- a/src/FoundationClasses/TKernel/TCollection/TCollection_ExtendedString.cxx +++ b/src/FoundationClasses/TKernel/TCollection/TCollection_ExtendedString.cxx @@ -25,6 +25,13 @@ namespace { static Standard_ExtCharacter THE_DEFAULT_EXT_CHAR_STRING[1] = {0}; +//! Calculate padded allocation size for ExtendedString (2-byte characters) +//! Guarantees at least +1 character space for null terminator, aligned to 4-byte boundary +inline Standard_Size calculatePaddedSize(const int theLength) +{ + return (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3; +} + //! Returns the number of 16-bit code units in Unicode string template static Standard_Integer nbSymbols(const T* theUtfString) @@ -48,7 +55,7 @@ inline Standard_ExtCharacter* Standard_UNUSED fromWideString(const Standard_Wide { return THE_DEFAULT_EXT_CHAR_STRING; } - const Standard_Size aRoundSize = (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3; + const Standard_Size aRoundSize = calculatePaddedSize(theLength); Standard_ExtCharacter* aString = static_cast(Standard::AllocateOptimal(aRoundSize)); NCollection_UtfWideIter anIterRead(theUtfString); @@ -73,7 +80,7 @@ inline Standard_ExtCharacter* Standard_UNUSED { return THE_DEFAULT_EXT_CHAR_STRING; } - const Standard_Size aRoundSize = (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3; + const Standard_Size aRoundSize = calculatePaddedSize(theLength); Standard_ExtCharacter* aString = static_cast(Standard::AllocateOptimal(aRoundSize)); const Standard_Integer aSize = theLength * sizeof(Standard_ExtCharacter); @@ -901,9 +908,8 @@ Standard_Boolean TCollection_ExtendedString::ConvertToUnicode(const Standard_CSt return Standard_True; } -//---------------------------------------------------------------------------- -// Returns expected CString length in UTF8 coding. -//---------------------------------------------------------------------------- +//================================================================================================= + Standard_Integer TCollection_ExtendedString::LengthOfCString() const { Standard_Integer aSizeBytes = 0; @@ -947,7 +953,7 @@ void TCollection_ExtendedString::allocate(const int theLength) } else { - const Standard_Size aRoundSize = (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3; + const Standard_Size aRoundSize = calculatePaddedSize(theLength); mystring = static_cast(Standard::AllocateOptimal(aRoundSize)); mystring[mylength] = '\0'; } @@ -961,14 +967,14 @@ void TCollection_ExtendedString::reallocate(const int theLength) { if (mystring == THE_DEFAULT_EXT_CHAR_STRING) { - const Standard_Size aRoundSize = - (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3; + const Standard_Size aRoundSize = calculatePaddedSize(theLength); mystring = static_cast(Standard::AllocateOptimal(aRoundSize)); } else { - mystring = static_cast( - Standard::Reallocate(mystring, (theLength + 1) * sizeof(Standard_ExtCharacter))); + // For reallocate, use padded size for consistency + const Standard_Size aRoundSize = calculatePaddedSize(theLength); + mystring = static_cast(Standard::Reallocate(mystring, aRoundSize)); } mystring[theLength] = 0; } diff --git a/src/FoundationClasses/TKernel/TCollection/TCollection_ExtendedString.hxx b/src/FoundationClasses/TKernel/TCollection/TCollection_ExtendedString.hxx index 5cc27e4a85..5085c4cefe 100644 --- a/src/FoundationClasses/TKernel/TCollection/TCollection_ExtendedString.hxx +++ b/src/FoundationClasses/TKernel/TCollection/TCollection_ExtendedString.hxx @@ -364,9 +364,9 @@ public: //! should be allocated before call! Standard_EXPORT Standard_Integer ToUTF8CString(Standard_PCharacter& theCString) const; - //! Returns expected CString length in UTF8 coding. - //! It can be used for memory calculation before converting - //! to CString containing symbols in UTF8 coding. + //! Returns expected CString length in UTF8 coding (like strlen, without null terminator). + //! It can be used for memory calculation before converting to CString containing symbols in UTF8 + //! coding. For external allocation, use: char* buf = new char[str.LengthOfCString() + 1]; Standard_EXPORT Standard_Integer LengthOfCString() const; private: