]> OCCT Git - occt.git/commitdiff
Foundation Classes - Documentation update for TCollection (#665)
authorPasukhin Dmitry <dpasukhi@opencascade.com>
Fri, 8 Aug 2025 09:31:54 +0000 (10:31 +0100)
committerGitHub <noreply@github.com>
Fri, 8 Aug 2025 09:31:54 +0000 (10:31 +0100)
- Introduces helper functions for consistent padded memory allocation across AsciiString and ExtendedString classes
- Improves documentation for UTF-8 conversion methods with clearer buffer allocation guidance
- Adds comprehensive test coverage for string constructors, memory allocation, and UTF-8 conversion functionality

src/FoundationClasses/TKernel/GTests/TCollection_AsciiString_Test.cxx
src/FoundationClasses/TKernel/GTests/TCollection_ExtendedString_Test.cxx
src/FoundationClasses/TKernel/TCollection/TCollection_AsciiString.cxx
src/FoundationClasses/TKernel/TCollection/TCollection_ExtendedString.cxx
src/FoundationClasses/TKernel/TCollection/TCollection_ExtendedString.hxx

index 7249dd9ff6a6916ba6b50a0835d30ad5c902f759..75b02fb8f921b799599224a0b8df739a785e4b99 100644 (file)
@@ -11,6 +11,7 @@
 // Alternatively, this file may be used under the terms of Open CASCADE
 // commercial license or contractual agreement.
 
+#include <TCollection_ExtendedString.hxx>
 #include <TCollection_AsciiString.hxx>
 
 #include <gtest/gtest.h>
@@ -231,3 +232,130 @@ TEST(TCollection_AsciiStringTest, Split)
   EXPECT_STREQ("abcde", aString.ToCString());
   EXPECT_STREQ("fghij", remainder.ToCString());
 }
+
+TEST(TCollection_AsciiStringTest, MemoryAllocation)
+{
+  // Test memory allocation with various string lengths
+  TCollection_AsciiString aString1("test");
+  EXPECT_EQ(4, aString1.Length());
+
+  // Test allocation with different lengths
+  for (Standard_Integer anIdx = 0; anIdx <= 20; ++anIdx)
+  {
+    TCollection_AsciiString aStr(anIdx, 'A');
+    EXPECT_EQ(anIdx, aStr.Length());
+  }
+}
+
+TEST(TCollection_AsciiStringTest, LengthConstructor)
+{
+  // Test constructor with string and maximum length
+  const Standard_CString aSourceString = "This is a very long string";
+
+  TCollection_AsciiString aString1(aSourceString, 4);
+  EXPECT_EQ(4, aString1.Length());
+  EXPECT_STREQ("This", aString1.ToCString());
+
+  TCollection_AsciiString aString2(aSourceString, 7);
+  EXPECT_EQ(7, aString2.Length());
+  EXPECT_STREQ("This is", aString2.ToCString());
+
+  // Test with length exceeding source string
+  TCollection_AsciiString aString3(aSourceString, 100);
+  EXPECT_EQ(26, aString3.Length());
+  EXPECT_STREQ(aSourceString, aString3.ToCString());
+}
+
+TEST(TCollection_AsciiStringTest, ExtendedStringConversion)
+{
+  // Test conversion from ExtendedString
+  TCollection_ExtendedString anExtString("Hello World");
+  TCollection_AsciiString    anAsciiString(anExtString);
+
+  EXPECT_EQ(anExtString.Length(), anAsciiString.Length());
+  EXPECT_STREQ("Hello World", anAsciiString.ToCString());
+}
+
+TEST(TCollection_AsciiStringTest, NumericalConstructors)
+{
+  // Test integer constructor
+  TCollection_AsciiString anIntString(42);
+  EXPECT_STREQ("42", anIntString.ToCString());
+
+  // Test real constructor
+  TCollection_AsciiString aRealString(3.14);
+  const Standard_CString  aRealCStr = aRealString.ToCString();
+  EXPECT_TRUE(strstr(aRealCStr, "3.14") != NULL);
+}
+
+TEST(TCollection_AsciiStringTest, FillerConstructor)
+{
+  // Test constructor with length and filler character
+  TCollection_AsciiString aFilledString(5, '*');
+  EXPECT_EQ(5, aFilledString.Length());
+  EXPECT_STREQ("*****", aFilledString.ToCString());
+}
+
+TEST(TCollection_AsciiStringTest, ConcatenationConstructors)
+{
+  // Test string + character constructor
+  TCollection_AsciiString aBaseString("Hello");
+  TCollection_AsciiString aStringWithChar(aBaseString, '!');
+  EXPECT_STREQ("Hello!", aStringWithChar.ToCString());
+
+  // Test string + C string constructor
+  TCollection_AsciiString aStringWithCStr(aBaseString, " World");
+  EXPECT_STREQ("Hello World", aStringWithCStr.ToCString());
+
+  // Test string + string constructor
+  TCollection_AsciiString aSecondString(" Universe");
+  TCollection_AsciiString aCombinedString(aBaseString, aSecondString);
+  EXPECT_STREQ("Hello Universe", aCombinedString.ToCString());
+}
+
+TEST(TCollection_AsciiStringTest, EdgeCases)
+{
+  // Test empty string operations
+  TCollection_AsciiString anEmptyString1;
+  TCollection_AsciiString anEmptyString2("");
+
+  EXPECT_TRUE(anEmptyString1.IsEqual(anEmptyString2));
+  EXPECT_EQ(0, anEmptyString1.Length());
+  EXPECT_TRUE(anEmptyString1.IsEmpty());
+
+  // Test null character handling
+  TCollection_AsciiString aNullCharString('\0');
+  EXPECT_EQ(0, aNullCharString.Length());
+  EXPECT_TRUE(aNullCharString.IsEmpty());
+}
+
+TEST(TCollection_AsciiStringTest, LargeStrings)
+{
+  // Test with large strings to verify memory allocation
+  const Standard_Integer  aLargeSize = 1000;
+  TCollection_AsciiString aLargeString(aLargeSize, 'X');
+
+  EXPECT_EQ(aLargeSize, aLargeString.Length());
+  EXPECT_EQ('X', aLargeString.Value(1));
+  EXPECT_EQ('X', aLargeString.Value(aLargeSize));
+}
+
+TEST(TCollection_AsciiStringTest, PaddingSafety)
+{
+  // Test that internal padding works correctly for various lengths
+  for (Standard_Integer anIdx = 1; anIdx <= 16; ++anIdx)
+  {
+    TCollection_AsciiString aTestString(anIdx, 'A');
+    EXPECT_EQ(anIdx, aTestString.Length());
+
+    // Verify null termination
+    const Standard_CString aCString = aTestString.ToCString();
+    EXPECT_EQ('\0', aCString[anIdx]);
+
+    // Verify content
+    for (Standard_Integer aCharIdx = 0; aCharIdx < anIdx; ++aCharIdx)
+    {
+      EXPECT_EQ('A', aCString[aCharIdx]);
+    }
+  }
+}
index a261578ad758a5643e2753b4205c30f116b7520f..2088fe054e302bbd650114707e255a73e350b959 100644 (file)
@@ -205,3 +205,211 @@ TEST(TCollection_ExtendedStringTest, ChangeAll)
   TCollection_AsciiString asciiResult(aString);
   EXPECT_STREQ("HellXX WXXrld", asciiResult.ToCString());
 }
+
+TEST(TCollection_ExtendedStringTest, UTF8Conversion)
+{
+  // Test the LengthOfCString() and ToUTF8CString() combination
+  TCollection_ExtendedString aString("Hello World");
+
+  Standard_Integer aBufferSize = aString.LengthOfCString();
+  EXPECT_GT(aBufferSize, 0);
+
+  // Allocate buffer with +1 for null terminator (external usage pattern)
+  Standard_PCharacter aBuffer        = new Standard_Character[aBufferSize + 1];
+  Standard_Integer    anActualLength = aString.ToUTF8CString(aBuffer);
+
+  EXPECT_EQ(aBufferSize, anActualLength);
+  EXPECT_EQ('\0', aBuffer[anActualLength]);
+  EXPECT_STREQ("Hello World", aBuffer);
+
+  delete[] aBuffer;
+}
+
+TEST(TCollection_ExtendedStringTest, UTF8ConversionUnicode)
+{
+  // Test UTF-8 conversion with Unicode characters
+  const Standard_ExtCharacter aUnicodeStr[] =
+    {0x0048, 0x00E9, 0x006C, 0x006C, 0x006F, 0}; // "H(e-acute)llo"
+  TCollection_ExtendedString aString(aUnicodeStr);
+
+  Standard_Integer aBufferSize = aString.LengthOfCString();
+  EXPECT_GT(aBufferSize, 5); // Should be more than 5 due to UTF-8 encoding
+
+  Standard_PCharacter aBuffer        = new Standard_Character[aBufferSize + 1];
+  Standard_Integer    anActualLength = aString.ToUTF8CString(aBuffer);
+
+  EXPECT_EQ(aBufferSize, anActualLength);
+  EXPECT_EQ('\0', aBuffer[anActualLength]);
+
+  delete[] aBuffer;
+}
+
+TEST(TCollection_ExtendedStringTest, WideCharConstructor)
+{
+  // Test constructor with wide characters
+  const Standard_WideChar*   aWideStr = L"Wide string test";
+  TCollection_ExtendedString aString(aWideStr);
+
+  EXPECT_GT(aString.Length(), 0);
+  EXPECT_FALSE(aString.IsEmpty());
+}
+
+TEST(TCollection_ExtendedStringTest, NumericalConstructors)
+{
+  // Test integer constructor
+  TCollection_ExtendedString anIntString(42);
+  TCollection_AsciiString    anAsciiFromInt(anIntString);
+  EXPECT_STREQ("42", anAsciiFromInt.ToCString());
+
+  // Test real constructor
+  TCollection_ExtendedString aRealString(3.14);
+  TCollection_AsciiString    anAsciiFromReal(aRealString);
+  const Standard_CString     aRealCStr = anAsciiFromReal.ToCString();
+  EXPECT_TRUE(strstr(aRealCStr, "3.14") != NULL);
+}
+
+TEST(TCollection_ExtendedStringTest, FillerConstructor)
+{
+  // Test constructor with length and filler character
+  TCollection_ExtendedString aFilledString(5, 'X');
+  EXPECT_EQ(5, aFilledString.Length());
+
+  TCollection_AsciiString anAsciiFromFilled(aFilledString);
+  EXPECT_STREQ("XXXXX", anAsciiFromFilled.ToCString());
+}
+
+TEST(TCollection_ExtendedStringTest, ExtendedCharConstructor)
+{
+  // Test constructor with ExtendedCharacter
+  const Standard_ExtCharacter aEuroChar = 0x20AC; // Euro symbol
+  TCollection_ExtendedString  aString(aEuroChar);
+
+  EXPECT_EQ(1, aString.Length());
+  EXPECT_FALSE(aString.IsAscii());
+  EXPECT_EQ(aEuroChar, aString.Value(1));
+}
+
+TEST(TCollection_ExtendedStringTest, UnicodeCharacters)
+{
+  // Test various Unicode characters
+  const Standard_ExtCharacter aLatinA = 0x0041; // 'A'
+  const Standard_ExtCharacter aLatinE = 0x00E9; // 'e-acute'
+  const Standard_ExtCharacter aEuro   = 0x20AC; // Euro symbol
+  const Standard_ExtCharacter aCJK    = 0x4E2D; // Chinese character
+
+  const Standard_ExtCharacter aUnicodeStr[] = {aLatinA, aLatinE, aEuro, aCJK, 0};
+  TCollection_ExtendedString  aString(aUnicodeStr);
+
+  EXPECT_EQ(4, aString.Length());
+  EXPECT_EQ(aLatinA, aString.Value(1));
+  EXPECT_EQ(aLatinE, aString.Value(2));
+  EXPECT_EQ(aEuro, aString.Value(3));
+  EXPECT_EQ(aCJK, aString.Value(4));
+  EXPECT_FALSE(aString.IsAscii());
+}
+
+TEST(TCollection_ExtendedStringTest, AsciiDetection)
+{
+  // Test ASCII detection
+  TCollection_ExtendedString anAsciiString("Simple ASCII");
+  EXPECT_TRUE(anAsciiString.IsAscii());
+
+  const Standard_ExtCharacter aNonAsciiStr[] = {0x0041, 0x20AC, 0}; // A + Euro
+  TCollection_ExtendedString  aNonAsciiString(aNonAsciiStr);
+  EXPECT_FALSE(aNonAsciiString.IsAscii());
+}
+
+TEST(TCollection_ExtendedStringTest, EmptyStringHandling)
+{
+  // Test empty string operations
+  TCollection_ExtendedString anEmptyString;
+  EXPECT_EQ(0, anEmptyString.Length());
+  EXPECT_TRUE(anEmptyString.IsEmpty());
+  EXPECT_EQ(0, anEmptyString.LengthOfCString());
+
+  Standard_PCharacter aBuffer = new Standard_Character[1];
+  Standard_Integer    aLength = anEmptyString.ToUTF8CString(aBuffer);
+  EXPECT_EQ(0, aLength);
+  EXPECT_EQ('\0', aBuffer[0]);
+
+  delete[] aBuffer;
+}
+
+TEST(TCollection_ExtendedStringTest, ConversionRoundTrip)
+{
+  // Test AsciiString <-> ExtendedString conversion
+  const Standard_CString anOriginalStr = "Test conversion with special chars: !@#$%";
+
+  TCollection_AsciiString    anAsciiOriginal(anOriginalStr);
+  TCollection_ExtendedString anExtendedConverted(anAsciiOriginal);
+  TCollection_AsciiString    anAsciiRoundTrip(anExtendedConverted);
+
+  EXPECT_STREQ(anOriginalStr, anAsciiRoundTrip.ToCString());
+  EXPECT_EQ(anAsciiOriginal.Length(), anExtendedConverted.Length());
+  EXPECT_EQ(anAsciiOriginal.Length(), anAsciiRoundTrip.Length());
+}
+
+TEST(TCollection_ExtendedStringTest, LargeStrings)
+{
+  // Test with large strings
+  const Standard_Integer     aLargeSize = 1000;
+  TCollection_ExtendedString aLargeString(aLargeSize, 'A');
+
+  EXPECT_EQ(aLargeSize, aLargeString.Length());
+  EXPECT_EQ('A', aLargeString.Value(1));
+  EXPECT_EQ('A', aLargeString.Value(aLargeSize));
+  EXPECT_TRUE(aLargeString.IsAscii());
+}
+
+TEST(TCollection_ExtendedStringTest, MemoryAllocation)
+{
+  // Test memory allocation with various string lengths
+  for (Standard_Integer anIdx = 1; anIdx <= 16; ++anIdx)
+  {
+    TCollection_ExtendedString aTestString(anIdx, 'X');
+    EXPECT_EQ(anIdx, aTestString.Length());
+    EXPECT_EQ('X', aTestString.Value(1));
+
+    if (anIdx > 1)
+    {
+      EXPECT_EQ('X', aTestString.Value(anIdx));
+    }
+  }
+}
+
+TEST(TCollection_ExtendedStringTest, MultiByteCString)
+{
+  // Test constructor with multibyte flag
+  const Standard_CString     aMultiByteStr = "Multi-byte test";
+  TCollection_ExtendedString aString(aMultiByteStr, Standard_True);
+
+  EXPECT_GT(aString.Length(), 0);
+  EXPECT_FALSE(aString.IsEmpty());
+}
+
+TEST(TCollection_ExtendedStringTest, BoundaryValues)
+{
+  // Test boundary Unicode values
+  // Note: OCCT's IsAnAscii considers 0x00-0xFF as ASCII (full 8-bit range)
+  const Standard_ExtCharacter aLastStandardAscii = 0x007F;
+  const Standard_ExtCharacter aLastOCCTAscii     = 0x00FF;
+  const Standard_ExtCharacter aFirstExtended     = 0x0100;
+  const Standard_ExtCharacter aMaxBMP            = 0xFFFF;
+
+  // Test individual characters
+  TCollection_ExtendedString aStringLastStandardAscii(aLastStandardAscii);
+  EXPECT_EQ(1, aStringLastStandardAscii.Length());
+  EXPECT_TRUE(aStringLastStandardAscii.IsAscii());
+
+  TCollection_ExtendedString aStringLastOCCTAscii(aLastOCCTAscii);
+  EXPECT_EQ(1, aStringLastOCCTAscii.Length());
+  EXPECT_TRUE(aStringLastOCCTAscii.IsAscii());
+
+  TCollection_ExtendedString aStringFirstExtended(aFirstExtended);
+  EXPECT_EQ(1, aStringFirstExtended.Length());
+  EXPECT_FALSE(aStringFirstExtended.IsAscii());
+
+  TCollection_ExtendedString aStringMaxBMP(aMaxBMP);
+  EXPECT_EQ(1, aStringMaxBMP.Length());
+  EXPECT_FALSE(aStringMaxBMP.IsAscii());
+}
index 5e51c871b9f420f45b4770a9658c7770588fce25..6c8f9b2c32a9e9a3e789506ac03e41894af03a40 100644 (file)
 namespace
 {
 static char THE_DEFAULT_CHAR_STRING[1] = {'\0'};
+
+//! Calculate padded allocation size: minimum +1 byte guaranteed, up to +4 bytes
+//! This provides automatic space for null terminator and some extra buffer, aligned to 4-byte
+//! boundary
+inline Standard_Size calculatePaddedSize(const int theLength)
+{
+  return (theLength + 4) & ~0x3; // Always guarantees at least +1 byte, up to +4 bytes
 }
+} // namespace
 
 // ----------------------------------------------------------------------------
 // Create an empty AsciiString
@@ -214,6 +222,7 @@ TCollection_AsciiString::TCollection_AsciiString(const TCollection_ExtendedStrin
   else
   {
     // create UTF-8 string
+    // Note: allocate() adds padding (theLength + 4) & ~0x3, so no need for +1 for null terminator
     allocate(astring.LengthOfCString());
     astring.ToUTF8CString(mystring);
   }
@@ -1340,7 +1349,7 @@ void TCollection_AsciiString::allocate(const int theLength)
   }
   else
   {
-    const Standard_Size aRoundSize = (theLength + 4) & ~0x3;
+    const Standard_Size aRoundSize = calculatePaddedSize(theLength);
     mystring           = static_cast<Standard_PCharacter>(Standard::AllocateOptimal(aRoundSize));
     mystring[mylength] = '\0';
   }
@@ -1354,12 +1363,15 @@ void TCollection_AsciiString::reallocate(const int theLength)
   {
     if (mystring == THE_DEFAULT_CHAR_STRING)
     {
-      const Standard_Size aRoundSize = (theLength + 4) & ~0x3;
+      // Use same padding strategy as allocate() for consistency
+      const Standard_Size aRoundSize = calculatePaddedSize(theLength);
       mystring = static_cast<Standard_PCharacter>(Standard::AllocateOptimal(aRoundSize));
     }
     else
     {
-      mystring = static_cast<Standard_PCharacter>(Standard::Reallocate(mystring, theLength + 1));
+      // For existing allocations, use padding size with Standard::Reallocate
+      const Standard_Size aRoundSize = calculatePaddedSize(theLength);
+      mystring = static_cast<Standard_PCharacter>(Standard::Reallocate(mystring, aRoundSize));
     }
     mystring[theLength] = '\0';
   }
index 3ac022b5148b5272de361a225c890e5aaebdb959..418a79f6ac97675144c3c3e7ccb9d1195caab3bb 100644 (file)
@@ -25,6 +25,13 @@ namespace
 {
 static Standard_ExtCharacter THE_DEFAULT_EXT_CHAR_STRING[1] = {0};
 
+//! Calculate padded allocation size for ExtendedString (2-byte characters)
+//! Guarantees at least +1 character space for null terminator, aligned to 4-byte boundary
+inline Standard_Size calculatePaddedSize(const int theLength)
+{
+  return (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3;
+}
+
 //! Returns the number of 16-bit code units in Unicode string
 template <typename T>
 static Standard_Integer nbSymbols(const T* theUtfString)
@@ -48,7 +55,7 @@ inline Standard_ExtCharacter* Standard_UNUSED fromWideString(const Standard_Wide
   {
     return THE_DEFAULT_EXT_CHAR_STRING;
   }
-  const Standard_Size aRoundSize = (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3;
+  const Standard_Size    aRoundSize = calculatePaddedSize(theLength);
   Standard_ExtCharacter* aString =
     static_cast<Standard_PExtCharacter>(Standard::AllocateOptimal(aRoundSize));
   NCollection_UtfWideIter anIterRead(theUtfString);
@@ -73,7 +80,7 @@ inline Standard_ExtCharacter* Standard_UNUSED
   {
     return THE_DEFAULT_EXT_CHAR_STRING;
   }
-  const Standard_Size aRoundSize = (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3;
+  const Standard_Size    aRoundSize = calculatePaddedSize(theLength);
   Standard_ExtCharacter* aString =
     static_cast<Standard_PExtCharacter>(Standard::AllocateOptimal(aRoundSize));
   const Standard_Integer aSize = theLength * sizeof(Standard_ExtCharacter);
@@ -901,9 +908,8 @@ Standard_Boolean TCollection_ExtendedString::ConvertToUnicode(const Standard_CSt
   return Standard_True;
 }
 
-//----------------------------------------------------------------------------
-// Returns expected CString length in UTF8 coding.
-//----------------------------------------------------------------------------
+//=================================================================================================
+
 Standard_Integer TCollection_ExtendedString::LengthOfCString() const
 {
   Standard_Integer aSizeBytes = 0;
@@ -947,7 +953,7 @@ void TCollection_ExtendedString::allocate(const int theLength)
   }
   else
   {
-    const Standard_Size aRoundSize = (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3;
+    const Standard_Size aRoundSize = calculatePaddedSize(theLength);
     mystring           = static_cast<Standard_PExtCharacter>(Standard::AllocateOptimal(aRoundSize));
     mystring[mylength] = '\0';
   }
@@ -961,14 +967,14 @@ void TCollection_ExtendedString::reallocate(const int theLength)
   {
     if (mystring == THE_DEFAULT_EXT_CHAR_STRING)
     {
-      const Standard_Size aRoundSize =
-        (((theLength + 1) * sizeof(Standard_ExtCharacter)) + 3) & ~0x3;
+      const Standard_Size aRoundSize = calculatePaddedSize(theLength);
       mystring = static_cast<Standard_PExtCharacter>(Standard::AllocateOptimal(aRoundSize));
     }
     else
     {
-      mystring = static_cast<Standard_PExtCharacter>(
-        Standard::Reallocate(mystring, (theLength + 1) * sizeof(Standard_ExtCharacter)));
+      // For reallocate, use padded size for consistency
+      const Standard_Size aRoundSize = calculatePaddedSize(theLength);
+      mystring = static_cast<Standard_PExtCharacter>(Standard::Reallocate(mystring, aRoundSize));
     }
     mystring[theLength] = 0;
   }
index 5cc27e4a851770b636385cac9fe5a46951ca13c0..5085c4cefe297233e8fbef6c1de0714635c1ef83 100644 (file)
@@ -364,9 +364,9 @@ public:
   //! <theCString> should be allocated before call!
   Standard_EXPORT Standard_Integer ToUTF8CString(Standard_PCharacter& theCString) const;
 
-  //! Returns expected CString length in UTF8 coding.
-  //! It can be used for  memory  calculation  before converting
-  //! to CString containing symbols in UTF8 coding.
+  //! Returns expected CString length in UTF8 coding (like strlen, without null terminator).
+  //! It can be used for memory calculation before converting to CString containing symbols in UTF8
+  //! coding. For external allocation, use: char* buf = new char[str.LengthOfCString() + 1];
   Standard_EXPORT Standard_Integer LengthOfCString() const;
 
 private: