]> OCCT Git - occt.git/commitdiff
Data Exchange, Step Export - Preserving control directives (#601)
authorPasukhin Dmitry <dpasukhi@opencascade.com>
Sat, 12 Jul 2025 16:05:39 +0000 (17:05 +0100)
committerGitHub <noreply@github.com>
Sat, 12 Jul 2025 16:05:39 +0000 (17:05 +0100)
- Introduced `CleanTextForSend` static helper with detailed documentation.
- Updated `StepData_StepWriter::Send` to use the new helper and simplified quoting/line‐wrapping logic.
- Added comprehensive GTests for `CleanTextForSend` and updated the test suite configuration.

src/DataExchange/TKDESTEP/GTests/FILES.cmake
src/DataExchange/TKDESTEP/GTests/StepData_StepWriter_Test.cxx [new file with mode: 0644]
src/DataExchange/TKDESTEP/StepData/StepData_StepWriter.cxx
src/DataExchange/TKDESTEP/StepData/StepData_StepWriter.hxx

index 478b5cfcb5d778e8182c8d6120ac61c25f5eea1d..88e540224417891817cecc583fa23e69341d0428 100644 (file)
@@ -3,6 +3,7 @@ set(OCCT_TKDESTEP_GTests_FILES_LOCATION "${CMAKE_CURRENT_LIST_DIR}")
 
 set(OCCT_TKDESTEP_GTests_FILES
     STEPConstruct_RenderingProperties_Test.cxx
+    StepData_StepWriter_Test.cxx
     StepTidy_BaseTestFixture.pxx
     StepTidy_Axis2Placement3dReducer_Test.cxx
     StepTidy_CartesianPointReducer_Test.cxx
diff --git a/src/DataExchange/TKDESTEP/GTests/StepData_StepWriter_Test.cxx b/src/DataExchange/TKDESTEP/GTests/StepData_StepWriter_Test.cxx
new file mode 100644 (file)
index 0000000..2685300
--- /dev/null
@@ -0,0 +1,160 @@
+// Copyright (c) 2025 OPEN CASCADE SAS
+//
+// This file is part of Open CASCADE Technology software library.
+//
+// This library is free software; you can redistribute it and/or modify it under
+// the terms of the GNU Lesser General Public License version 2.1 as published
+// by the Free Software Foundation, with special exception defined in the file
+// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT
+// distribution for complete text of the license and disclaimer of any warranty.
+//
+// Alternatively, this file may be used under the terms of Open CASCADE
+// commercial license or contractual agreement.
+
+#include <StepData_StepWriter.hxx>
+#include <TCollection_AsciiString.hxx>
+
+#include <gtest/gtest.h>
+
+// Test CleanTextForSend with basic character escaping
+TEST(StepData_StepWriterTest, CleanTextForSend_BasicEscaping)
+{
+  // Test single quote escaping
+  TCollection_AsciiString anInput1("text with 'single quotes'");
+  TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
+  EXPECT_STREQ(aResult1.ToCString(), "text with ''single quotes''");
+
+  // Test backslash escaping
+  TCollection_AsciiString anInput2("path\\with\\backslashes");
+  TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
+  EXPECT_STREQ(aResult2.ToCString(), "path\\\\with\\\\backslashes");
+
+  // Test newline escaping
+  TCollection_AsciiString anInput3("line1\nline2");
+  TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3);
+  EXPECT_STREQ(aResult3.ToCString(), "line1\\N\\line2");
+
+  // Test tab escaping
+  TCollection_AsciiString anInput4("text\twith\ttabs");
+  TCollection_AsciiString aResult4 = StepData_StepWriter::CleanTextForSend(anInput4);
+  EXPECT_STREQ(aResult4.ToCString(), "text\\T\\with\\T\\tabs");
+}
+
+// Test CleanTextForSend with control directives preservation
+TEST(StepData_StepWriterTest, CleanTextForSend_ControlDirectivePreservation)
+{
+  // Test \X\ control directive preservation
+  TCollection_AsciiString anInput1("text with \\XA7\\ section sign");
+  TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
+  EXPECT_STREQ(aResult1.ToCString(), "text with \\XA7\\ section sign");
+
+  // Test \X2\ control directive preservation
+  TCollection_AsciiString anInput2("\\X2\\03C0\\X0\\ is pi");
+  TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
+  EXPECT_STREQ(aResult2.ToCString(), "\\X2\\03C0\\X0\\ is pi");
+
+  // Test \X4\ control directive preservation
+  TCollection_AsciiString anInput3("emoji \\X4\\001F600\\X0\\ face");
+  TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3);
+  EXPECT_STREQ(aResult3.ToCString(), "emoji \\X4\\001F600\\X0\\ face");
+
+  // Test \S\ control directive preservation
+  TCollection_AsciiString anInput4("text with \\S\\ directive");
+  TCollection_AsciiString aResult4 = StepData_StepWriter::CleanTextForSend(anInput4);
+  EXPECT_STREQ(aResult4.ToCString(), "text with \\S\\ directive");
+
+  // Test \P\ control directive preservation
+  TCollection_AsciiString anInput5("\\PA\\ code page setting");
+  TCollection_AsciiString aResult5 = StepData_StepWriter::CleanTextForSend(anInput5);
+  EXPECT_STREQ(aResult5.ToCString(), "\\PA\\ code page setting");
+}
+
+// Test CleanTextForSend with existing \N\ and \T\ directive preservation
+TEST(StepData_StepWriterTest, CleanTextForSend_ExistingDirectivePreservation)
+{
+  // Test existing \N\ directive preservation
+  TCollection_AsciiString anInput1("line1\\N\\line2");
+  TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
+  EXPECT_STREQ(aResult1.ToCString(), "line1\\N\\line2");
+
+  // Test existing \T\ directive preservation
+  TCollection_AsciiString anInput2("text\\T\\with\\T\\tab");
+  TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
+  EXPECT_STREQ(aResult2.ToCString(), "text\\T\\with\\T\\tab");
+}
+
+// Test CleanTextForSend with mixed content
+TEST(StepData_StepWriterTest, CleanTextForSend_MixedContent)
+{
+  // Test quotes outside control directives
+  TCollection_AsciiString anInput1("see \\XA7\\ section and 'quotes'");
+  TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
+  EXPECT_STREQ(aResult1.ToCString(), "see \\XA7\\ section and ''quotes''");
+
+  // Test backslashes outside control directives
+  TCollection_AsciiString anInput2("\\XA7\\ and path\\file");
+  TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
+  EXPECT_STREQ(aResult2.ToCString(), "\\XA7\\ and path\\\\file");
+
+  // Test complex mixture
+  TCollection_AsciiString anInput3("prefix \\X2\\03B103B2\\X0\\ 'text' with\ttab");
+  TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3);
+  EXPECT_STREQ(aResult3.ToCString(), "prefix \\X2\\03B103B2\\X0\\ ''text'' with\\T\\tab");
+}
+
+// Test CleanTextForSend with edge cases
+TEST(StepData_StepWriterTest, CleanTextForSend_EdgeCases)
+{
+  // Test empty string
+  TCollection_AsciiString anInput1("");
+  TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
+  EXPECT_STREQ(aResult1.ToCString(), "");
+
+  // Test string with only quotes
+  TCollection_AsciiString anInput2("''");
+  TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
+  EXPECT_STREQ(aResult2.ToCString(), "''''");
+
+  // Test string with only control directive
+  TCollection_AsciiString anInput3("\\XA7\\");
+  TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3);
+  EXPECT_STREQ(aResult3.ToCString(), "\\XA7\\");
+
+  // Test consecutive control directives
+  TCollection_AsciiString anInput4("\\XA7\\\\XB6\\");
+  TCollection_AsciiString aResult4 = StepData_StepWriter::CleanTextForSend(anInput4);
+  EXPECT_STREQ(aResult4.ToCString(), "\\XA7\\\\XB6\\");
+}
+
+// Test CleanTextForSend with malformed but safe input
+TEST(StepData_StepWriterTest, CleanTextForSend_MalformedInput)
+{
+  // Test incomplete control directive (should be treated as regular text)
+  TCollection_AsciiString anInput1("incomplete \\X and 'quotes'");
+  TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
+  EXPECT_STREQ(aResult1.ToCString(), "incomplete \\\\X and ''quotes''");
+
+  // Test partial control directive
+  TCollection_AsciiString anInput2("partial \\XA and more");
+  TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
+  EXPECT_STREQ(aResult2.ToCString(), "partial \\\\XA and more");
+}
+
+// Test CleanTextForSend hex sequence detection
+TEST(StepData_StepWriterTest, CleanTextForSend_HexSequenceDetection)
+{
+  // Test valid hex sequences in \X2\ directive
+  TCollection_AsciiString anInput1("\\X2\\03B103B203B3\\X0\\");
+  TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1);
+  EXPECT_STREQ(aResult1.ToCString(), "\\X2\\03B103B203B3\\X0\\");
+
+  // Test valid hex sequences in \X4\ directive
+  TCollection_AsciiString anInput2("\\X4\\001F600001F638\\X0\\");
+  TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2);
+  EXPECT_STREQ(aResult2.ToCString(), "\\X4\\001F600001F638\\X0\\");
+
+  // Test text around hex sequences
+  TCollection_AsciiString anInput3("start \\X2\\03C0\\X0\\ end");
+  TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3);
+  EXPECT_STREQ(aResult3.ToCString(), "start \\X2\\03C0\\X0\\ end");
+}
\ No newline at end of file
index b90f9ce3a5dd65b69180db27e239675624ec2e41..c519c0cc1a0ba2ea42bec27fd80d2b8bfb0484f4 100644 (file)
@@ -828,122 +828,67 @@ void StepData_StepWriter::Send(const Standard_Real val)
 void StepData_StepWriter::Send(const TCollection_AsciiString& val)
 {
   AddParam();
-  TCollection_AsciiString aval(val); // on duplique pour trafiquer si besoin
-  Standard_Integer        nb = aval.Length();
-  Standard_Integer        nn = nb;
-  aval.AssignCat('\''); // comme cela, Insert(i+1) est OK
+  // Use helper function to clean text while preserving control directives
+  TCollection_AsciiString aVal = CleanTextForSend(val);
+  Standard_Integer        aNn  = aVal.Length();
 
-  //    Conversion des Caracteres speciaux
-  for (Standard_Integer i = nb; i > 0; i--)
-  {
-    char uncar = aval.Value(i);
-    if (uncar == '\'')
-    {
-      aval.Insert(i + 1, '\'');
-      nn++;
-      continue;
-    }
-    if (uncar == '\\')
-    {
-      aval.Insert(i + 1, '\\');
-      nn++;
-      continue;
-    }
-    if (uncar == '\n')
-    {
-      aval.SetValue(i, '\\');
-      aval.Insert(i + 1, '\\');
-      aval.Insert(i + 1, 'N');
-      nn += 2;
-      continue;
-    }
-    if (uncar == '\t')
-    {
-      aval.SetValue(i, '\\');
-      aval.Insert(i + 1, '\\');
-      aval.Insert(i + 1, 'T');
-      nn += 2;
-      continue;
-    }
-  }
-  //: i2 abv 31 Aug 98: ProSTEP TR9: avoid wrapping text or do it at spaces
-  aval.Insert(1, '\'');
-  nn += 2;
+  // Add surrounding quotes
+  aVal.Insert(1, '\'');
+  aVal.AssignCat('\'');
+  aNn += 2;
 
-  //: i2  AddString ("\'",1); nn ++;
+  //: i2 abv 31 Aug 98: ProSTEP TR9: avoid wrapping text or do it at spaces
 
   //    Attention au depassement des 72 caracteres
-  if (thecurr.CanGet(nn))
-    AddString(aval, 0);
+  if (thecurr.CanGet(aNn))
+    AddString(aVal, 0);
   //: i2
   else
   {
     thefile->Append(thecurr.Moved());
-    Standard_Integer indst = thelevel * 2;
+    Standard_Integer anIndst = thelevel * 2;
     if (theindent)
-      indst += theindval;
-    if (indst + nn <= StepLong)
-      thecurr.SetInitial(indst);
+      anIndst += theindval;
+    if (anIndst + aNn <= StepLong)
+      thecurr.SetInitial(anIndst);
     else
       thecurr.SetInitial(0);
-    if (thecurr.CanGet(nn))
-      AddString(aval, 0);
+    if (thecurr.CanGet(aNn))
+      AddString(aVal, 0);
     else
     {
-      while (nn > 0)
+      while (aNn > 0)
       {
-        if (nn <= StepLong)
+        if (aNn <= StepLong)
         {
-          thecurr.Add(aval); // Ca yet, on a tout epuise
+          thecurr.Add(aVal); // Ca yet, on a tout epuise
           thecurr.FreezeInitial();
           break;
         }
-        Standard_Integer stop = StepLong; // position of last separator
-        for (; stop > 0 && aval.Value(stop) != ' '; stop--)
+        Standard_Integer aStop = StepLong; // position of last separator
+        for (; aStop > 0 && aVal.Value(aStop) != ' '; aStop--)
           ;
-        if (!stop)
+        if (!aStop)
         {
-          stop = StepLong;
-          for (; stop > 0 && aval.Value(stop) != '\\'; stop--)
+          aStop = StepLong;
+          for (; aStop > 0 && aVal.Value(aStop) != '\\'; aStop--)
             ;
-          if (!stop)
+          if (!aStop)
           {
-            stop = StepLong;
-            for (; stop > 0 && aval.Value(stop) != '_'; stop--)
+            aStop = StepLong;
+            for (; aStop > 0 && aVal.Value(aStop) != '_'; aStop--)
               ;
-            if (!stop)
-              stop = StepLong;
+            if (!aStop)
+              aStop = StepLong;
           }
         }
-        TCollection_AsciiString bval = aval.Split(stop);
-        thefile->Append(new TCollection_HAsciiString(aval));
-        aval = bval;
-        nn -= stop;
+        TCollection_AsciiString aBval = aVal.Split(aStop);
+        thefile->Append(new TCollection_HAsciiString(aVal));
+        aVal = aBval;
+        aNn -= aStop;
       }
     }
   }
-  /* //:i2
-    else {
-      //    Il faut tronconner ...  lignes limitees a 72 caracteres (StepLong)
-      Standard_Integer ncurr = thecurr.Length();
-      Standard_Integer nbuff = StepLong - ncurr;
-      thecurr.Add (aval.ToCString(),nbuff);
-      thefile->Append(thecurr.Moved());
-      aval.Remove(1,nbuff);
-      nn -= nbuff;
-      while (nn > 0) {
-        if (nn <= StepLong) {
-      thecurr.Add (aval);  // Ca yet, on a tout epuise
-      thecurr.FreezeInitial();
-      break;
-        }
-        TCollection_AsciiString bval = aval.Split(StepLong);
-        thefile->Append(new TCollection_HAsciiString(bval));
-        nn -= StepLong;
-      }
-    }
-  //:i2 */
-  //  thecurr.Add('\'');   deja mis dans aval au debut
 }
 
 //=================================================================================================
@@ -1214,3 +1159,124 @@ Standard_Boolean StepData_StepWriter::Print(Standard_OStream& S)
 
   return isGood;
 }
+
+//=================================================================================================
+
+TCollection_AsciiString StepData_StepWriter::CleanTextForSend(
+  const TCollection_AsciiString& theText)
+{
+  TCollection_AsciiString aResult;
+  const Standard_Integer  aNb = theText.Length();
+
+  // Process characters from beginning to end
+  for (Standard_Integer anI = 1; anI <= aNb; anI++)
+  {
+    const char anUncar = theText.Value(anI);
+
+    // Check if we're at the start of a control directive
+    Standard_Boolean anIsDirective    = Standard_False;
+    Standard_Integer aDirectiveLength = 0;
+
+    if (anUncar == '\\' && anI <= aNb)
+    {
+
+      // Check for \X2\ and \X4\ patterns first (need exactly 4 characters: \X2\)
+      if (anI + 3 <= aNb && theText.Value(anI + 1) == 'X' && theText.Value(anI + 3) == '\\')
+      {
+        const char aThirdChar = theText.Value(anI + 2);
+
+        // \X2, \X4, \X0 patterns - special control sequences
+        if (aThirdChar == '2' || aThirdChar == '4' || aThirdChar == '0')
+        {
+          anIsDirective    = Standard_True;
+          aDirectiveLength = 4; // Basic directive length: \X2\, \X4\, \X0\
+
+          // For \X2 and \X4, find the terminating \X0 sequence
+          if (aThirdChar == '2' || aThirdChar == '4')
+          {
+            Standard_Integer aJ = anI + 4;
+            while (aJ <= aNb - 3)
+            {
+              if (theText.Value(aJ) == '\\' && theText.Value(aJ + 1) == 'X'
+                  && theText.Value(aJ + 2) == '0' && theText.Value(aJ + 3) == '\\')
+              {
+                aDirectiveLength = (aJ + 4) - anI; // Include the \X0 sequence
+                break;
+              }
+              aJ++;
+            }
+          }
+        }
+      }
+      // Check for \X{HH}\ pattern (need exactly 5 characters: \X{HH}\)
+      else if (anI + 4 <= aNb && theText.Value(anI + 1) == 'X' && theText.Value(anI + 4) == '\\')
+      {
+        const char aThirdChar  = theText.Value(anI + 2);
+        const char aFourthChar = theText.Value(anI + 3);
+
+        // Regular \X{HH}\ pattern - check for two hex characters
+        if (std::isxdigit(aThirdChar) && std::isxdigit(aFourthChar))
+        {
+          anIsDirective    = Standard_True;
+          aDirectiveLength = 5; // Control directive with two hex chars
+        }
+      }
+      // Check for \S, \N, \T patterns (need exactly 3 characters: \S\)
+      else if (anI + 2 <= aNb && theText.Value(anI + 2) == '\\')
+      {
+        const char aSecondChar = theText.Value(anI + 1);
+        if (aSecondChar == 'S' || aSecondChar == 'N' || aSecondChar == 'T')
+        {
+          anIsDirective    = Standard_True;
+          aDirectiveLength = 3; // Simple directive pattern
+        }
+      }
+      // Check for \P{char}\ patterns (need exactly 4 characters: \P{char}\)
+      else if (anI + 3 <= aNb && theText.Value(anI + 1) == 'P' && theText.Value(anI + 3) == '\\')
+      {
+        const char aSecondChar = theText.Value(anI + 2);
+        if (std::isalpha(aSecondChar))
+        {
+          anIsDirective    = Standard_True;
+          aDirectiveLength = 4; // P directive with parameter
+        }
+      }
+    }
+
+    if (anIsDirective)
+    {
+      // Copy the entire directive as-is
+      for (Standard_Integer aJ = 0; aJ < aDirectiveLength; aJ++)
+      {
+        aResult += theText.Value(anI + aJ);
+      }
+      anI += aDirectiveLength - 1; // Move past directive (loop will increment by 1)
+    }
+    else
+    {
+      // Process non-directive characters
+      if (anUncar == '\'')
+      {
+        aResult += "''"; // Double the quote
+      }
+      else if (anUncar == '\\')
+      {
+        aResult += "\\\\"; // Double the backslash
+      }
+      else if (anUncar == '\n')
+      {
+        aResult += "\\N\\"; // Convert to directive
+      }
+      else if (anUncar == '\t')
+      {
+        aResult += "\\T\\"; // Convert to directive
+      }
+      else
+      {
+        aResult += anUncar; // Copy as-is
+      }
+    }
+  }
+
+  return aResult;
+}
index b5885085a4e8fd40a4d0b56bad7b256bebc9fc80..8e684738367da3d36f36a436eb897a5f7ff7b296 100644 (file)
@@ -267,6 +267,37 @@ public:
   //! then clears it
   Standard_EXPORT Standard_Boolean Print(Standard_OStream& S);
 
+  //! Static helper function to prepare text for STEP file output while preserving
+  //! existing ISO 10303-21 control directives.
+  //!
+  //! This function processes input text and escapes special characters (quotes, backslashes,
+  //! newlines, tabs) for STEP file format compliance, while carefully preserving any existing
+  //! control directives that may already be present in the input string.
+  //!
+  //! Supported control directive patterns that are preserved:
+  //! - \X{HH}\ : Single byte character encoding (U+0000 to U+00FF)
+  //! - \X2\{HHHH}...\X0\ : UTF-16 character encoding
+  //! - \X4\{HHHHHHHH}...\X0\ : UTF-32 character encoding
+  //! - \S\ : Latin codepoint character with current code page
+  //! - \P{A-I}\ : Code page control directive
+  //! - \N\ : Newline directive (preserved as-is)
+  //! - \T\ : Tab directive (preserved as-is)
+  //!
+  //! Character escaping performed (only on non-directive content):
+  //! - Single quote (') -> double quote ('')
+  //! - Backslash (\) -> double backslash (\\)
+  //! - Newline character -> \N\ directive
+  //! - Tab character -> \T\ directive
+  //!
+  //! Example:
+  //!   Input:  "text with \XA7\ and 'quotes'"
+  //!   Output: "text with \XA7\ and ''quotes''"
+  //!
+  //! @param theText The input text string to be processed
+  //! @return Processed text with preserved control directives and escaped special characters
+  Standard_EXPORT static TCollection_AsciiString CleanTextForSend(
+    const TCollection_AsciiString& theText);
+
 protected:
 private:
   //! adds a string to current line; first flushes it if full