From: Pasukhin Dmitry Date: Sat, 12 Jul 2025 16:05:39 +0000 (+0100) Subject: Data Exchange, Step Export - Preserving control directives (#601) X-Git-Url: http://git.dev.opencascade.org/gitweb/?a=commitdiff_plain;h=08f6de3affd1b5034a858dd838cb1331203978c8;p=occt.git Data Exchange, Step Export - Preserving control directives (#601) - Introduced `CleanTextForSend` static helper with detailed documentation. - Updated `StepData_StepWriter::Send` to use the new helper and simplified quoting/line‐wrapping logic. - Added comprehensive GTests for `CleanTextForSend` and updated the test suite configuration. --- diff --git a/src/DataExchange/TKDESTEP/GTests/FILES.cmake b/src/DataExchange/TKDESTEP/GTests/FILES.cmake index 478b5cfcb5..88e5402244 100644 --- a/src/DataExchange/TKDESTEP/GTests/FILES.cmake +++ b/src/DataExchange/TKDESTEP/GTests/FILES.cmake @@ -3,6 +3,7 @@ set(OCCT_TKDESTEP_GTests_FILES_LOCATION "${CMAKE_CURRENT_LIST_DIR}") set(OCCT_TKDESTEP_GTests_FILES STEPConstruct_RenderingProperties_Test.cxx + StepData_StepWriter_Test.cxx StepTidy_BaseTestFixture.pxx StepTidy_Axis2Placement3dReducer_Test.cxx StepTidy_CartesianPointReducer_Test.cxx diff --git a/src/DataExchange/TKDESTEP/GTests/StepData_StepWriter_Test.cxx b/src/DataExchange/TKDESTEP/GTests/StepData_StepWriter_Test.cxx new file mode 100644 index 0000000000..2685300b2a --- /dev/null +++ b/src/DataExchange/TKDESTEP/GTests/StepData_StepWriter_Test.cxx @@ -0,0 +1,160 @@ +// Copyright (c) 2025 OPEN CASCADE SAS +// +// This file is part of Open CASCADE Technology software library. +// +// This library is free software; you can redistribute it and/or modify it under +// the terms of the GNU Lesser General Public License version 2.1 as published +// by the Free Software Foundation, with special exception defined in the file +// OCCT_LGPL_EXCEPTION.txt. Consult the file LICENSE_LGPL_21.txt included in OCCT +// distribution for complete text of the license and disclaimer of any warranty. +// +// Alternatively, this file may be used under the terms of Open CASCADE +// commercial license or contractual agreement. + +#include +#include + +#include + +// Test CleanTextForSend with basic character escaping +TEST(StepData_StepWriterTest, CleanTextForSend_BasicEscaping) +{ + // Test single quote escaping + TCollection_AsciiString anInput1("text with 'single quotes'"); + TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1); + EXPECT_STREQ(aResult1.ToCString(), "text with ''single quotes''"); + + // Test backslash escaping + TCollection_AsciiString anInput2("path\\with\\backslashes"); + TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2); + EXPECT_STREQ(aResult2.ToCString(), "path\\\\with\\\\backslashes"); + + // Test newline escaping + TCollection_AsciiString anInput3("line1\nline2"); + TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3); + EXPECT_STREQ(aResult3.ToCString(), "line1\\N\\line2"); + + // Test tab escaping + TCollection_AsciiString anInput4("text\twith\ttabs"); + TCollection_AsciiString aResult4 = StepData_StepWriter::CleanTextForSend(anInput4); + EXPECT_STREQ(aResult4.ToCString(), "text\\T\\with\\T\\tabs"); +} + +// Test CleanTextForSend with control directives preservation +TEST(StepData_StepWriterTest, CleanTextForSend_ControlDirectivePreservation) +{ + // Test \X\ control directive preservation + TCollection_AsciiString anInput1("text with \\XA7\\ section sign"); + TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1); + EXPECT_STREQ(aResult1.ToCString(), "text with \\XA7\\ section sign"); + + // Test \X2\ control directive preservation + TCollection_AsciiString anInput2("\\X2\\03C0\\X0\\ is pi"); + TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2); + EXPECT_STREQ(aResult2.ToCString(), "\\X2\\03C0\\X0\\ is pi"); + + // Test \X4\ control directive preservation + TCollection_AsciiString anInput3("emoji \\X4\\001F600\\X0\\ face"); + TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3); + EXPECT_STREQ(aResult3.ToCString(), "emoji \\X4\\001F600\\X0\\ face"); + + // Test \S\ control directive preservation + TCollection_AsciiString anInput4("text with \\S\\ directive"); + TCollection_AsciiString aResult4 = StepData_StepWriter::CleanTextForSend(anInput4); + EXPECT_STREQ(aResult4.ToCString(), "text with \\S\\ directive"); + + // Test \P\ control directive preservation + TCollection_AsciiString anInput5("\\PA\\ code page setting"); + TCollection_AsciiString aResult5 = StepData_StepWriter::CleanTextForSend(anInput5); + EXPECT_STREQ(aResult5.ToCString(), "\\PA\\ code page setting"); +} + +// Test CleanTextForSend with existing \N\ and \T\ directive preservation +TEST(StepData_StepWriterTest, CleanTextForSend_ExistingDirectivePreservation) +{ + // Test existing \N\ directive preservation + TCollection_AsciiString anInput1("line1\\N\\line2"); + TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1); + EXPECT_STREQ(aResult1.ToCString(), "line1\\N\\line2"); + + // Test existing \T\ directive preservation + TCollection_AsciiString anInput2("text\\T\\with\\T\\tab"); + TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2); + EXPECT_STREQ(aResult2.ToCString(), "text\\T\\with\\T\\tab"); +} + +// Test CleanTextForSend with mixed content +TEST(StepData_StepWriterTest, CleanTextForSend_MixedContent) +{ + // Test quotes outside control directives + TCollection_AsciiString anInput1("see \\XA7\\ section and 'quotes'"); + TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1); + EXPECT_STREQ(aResult1.ToCString(), "see \\XA7\\ section and ''quotes''"); + + // Test backslashes outside control directives + TCollection_AsciiString anInput2("\\XA7\\ and path\\file"); + TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2); + EXPECT_STREQ(aResult2.ToCString(), "\\XA7\\ and path\\\\file"); + + // Test complex mixture + TCollection_AsciiString anInput3("prefix \\X2\\03B103B2\\X0\\ 'text' with\ttab"); + TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3); + EXPECT_STREQ(aResult3.ToCString(), "prefix \\X2\\03B103B2\\X0\\ ''text'' with\\T\\tab"); +} + +// Test CleanTextForSend with edge cases +TEST(StepData_StepWriterTest, CleanTextForSend_EdgeCases) +{ + // Test empty string + TCollection_AsciiString anInput1(""); + TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1); + EXPECT_STREQ(aResult1.ToCString(), ""); + + // Test string with only quotes + TCollection_AsciiString anInput2("''"); + TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2); + EXPECT_STREQ(aResult2.ToCString(), "''''"); + + // Test string with only control directive + TCollection_AsciiString anInput3("\\XA7\\"); + TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3); + EXPECT_STREQ(aResult3.ToCString(), "\\XA7\\"); + + // Test consecutive control directives + TCollection_AsciiString anInput4("\\XA7\\\\XB6\\"); + TCollection_AsciiString aResult4 = StepData_StepWriter::CleanTextForSend(anInput4); + EXPECT_STREQ(aResult4.ToCString(), "\\XA7\\\\XB6\\"); +} + +// Test CleanTextForSend with malformed but safe input +TEST(StepData_StepWriterTest, CleanTextForSend_MalformedInput) +{ + // Test incomplete control directive (should be treated as regular text) + TCollection_AsciiString anInput1("incomplete \\X and 'quotes'"); + TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1); + EXPECT_STREQ(aResult1.ToCString(), "incomplete \\\\X and ''quotes''"); + + // Test partial control directive + TCollection_AsciiString anInput2("partial \\XA and more"); + TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2); + EXPECT_STREQ(aResult2.ToCString(), "partial \\\\XA and more"); +} + +// Test CleanTextForSend hex sequence detection +TEST(StepData_StepWriterTest, CleanTextForSend_HexSequenceDetection) +{ + // Test valid hex sequences in \X2\ directive + TCollection_AsciiString anInput1("\\X2\\03B103B203B3\\X0\\"); + TCollection_AsciiString aResult1 = StepData_StepWriter::CleanTextForSend(anInput1); + EXPECT_STREQ(aResult1.ToCString(), "\\X2\\03B103B203B3\\X0\\"); + + // Test valid hex sequences in \X4\ directive + TCollection_AsciiString anInput2("\\X4\\001F600001F638\\X0\\"); + TCollection_AsciiString aResult2 = StepData_StepWriter::CleanTextForSend(anInput2); + EXPECT_STREQ(aResult2.ToCString(), "\\X4\\001F600001F638\\X0\\"); + + // Test text around hex sequences + TCollection_AsciiString anInput3("start \\X2\\03C0\\X0\\ end"); + TCollection_AsciiString aResult3 = StepData_StepWriter::CleanTextForSend(anInput3); + EXPECT_STREQ(aResult3.ToCString(), "start \\X2\\03C0\\X0\\ end"); +} \ No newline at end of file diff --git a/src/DataExchange/TKDESTEP/StepData/StepData_StepWriter.cxx b/src/DataExchange/TKDESTEP/StepData/StepData_StepWriter.cxx index b90f9ce3a5..c519c0cc1a 100644 --- a/src/DataExchange/TKDESTEP/StepData/StepData_StepWriter.cxx +++ b/src/DataExchange/TKDESTEP/StepData/StepData_StepWriter.cxx @@ -828,122 +828,67 @@ void StepData_StepWriter::Send(const Standard_Real val) void StepData_StepWriter::Send(const TCollection_AsciiString& val) { AddParam(); - TCollection_AsciiString aval(val); // on duplique pour trafiquer si besoin - Standard_Integer nb = aval.Length(); - Standard_Integer nn = nb; - aval.AssignCat('\''); // comme cela, Insert(i+1) est OK + // Use helper function to clean text while preserving control directives + TCollection_AsciiString aVal = CleanTextForSend(val); + Standard_Integer aNn = aVal.Length(); - // Conversion des Caracteres speciaux - for (Standard_Integer i = nb; i > 0; i--) - { - char uncar = aval.Value(i); - if (uncar == '\'') - { - aval.Insert(i + 1, '\''); - nn++; - continue; - } - if (uncar == '\\') - { - aval.Insert(i + 1, '\\'); - nn++; - continue; - } - if (uncar == '\n') - { - aval.SetValue(i, '\\'); - aval.Insert(i + 1, '\\'); - aval.Insert(i + 1, 'N'); - nn += 2; - continue; - } - if (uncar == '\t') - { - aval.SetValue(i, '\\'); - aval.Insert(i + 1, '\\'); - aval.Insert(i + 1, 'T'); - nn += 2; - continue; - } - } - //: i2 abv 31 Aug 98: ProSTEP TR9: avoid wrapping text or do it at spaces - aval.Insert(1, '\''); - nn += 2; + // Add surrounding quotes + aVal.Insert(1, '\''); + aVal.AssignCat('\''); + aNn += 2; - //: i2 AddString ("\'",1); nn ++; + //: i2 abv 31 Aug 98: ProSTEP TR9: avoid wrapping text or do it at spaces // Attention au depassement des 72 caracteres - if (thecurr.CanGet(nn)) - AddString(aval, 0); + if (thecurr.CanGet(aNn)) + AddString(aVal, 0); //: i2 else { thefile->Append(thecurr.Moved()); - Standard_Integer indst = thelevel * 2; + Standard_Integer anIndst = thelevel * 2; if (theindent) - indst += theindval; - if (indst + nn <= StepLong) - thecurr.SetInitial(indst); + anIndst += theindval; + if (anIndst + aNn <= StepLong) + thecurr.SetInitial(anIndst); else thecurr.SetInitial(0); - if (thecurr.CanGet(nn)) - AddString(aval, 0); + if (thecurr.CanGet(aNn)) + AddString(aVal, 0); else { - while (nn > 0) + while (aNn > 0) { - if (nn <= StepLong) + if (aNn <= StepLong) { - thecurr.Add(aval); // Ca yet, on a tout epuise + thecurr.Add(aVal); // Ca yet, on a tout epuise thecurr.FreezeInitial(); break; } - Standard_Integer stop = StepLong; // position of last separator - for (; stop > 0 && aval.Value(stop) != ' '; stop--) + Standard_Integer aStop = StepLong; // position of last separator + for (; aStop > 0 && aVal.Value(aStop) != ' '; aStop--) ; - if (!stop) + if (!aStop) { - stop = StepLong; - for (; stop > 0 && aval.Value(stop) != '\\'; stop--) + aStop = StepLong; + for (; aStop > 0 && aVal.Value(aStop) != '\\'; aStop--) ; - if (!stop) + if (!aStop) { - stop = StepLong; - for (; stop > 0 && aval.Value(stop) != '_'; stop--) + aStop = StepLong; + for (; aStop > 0 && aVal.Value(aStop) != '_'; aStop--) ; - if (!stop) - stop = StepLong; + if (!aStop) + aStop = StepLong; } } - TCollection_AsciiString bval = aval.Split(stop); - thefile->Append(new TCollection_HAsciiString(aval)); - aval = bval; - nn -= stop; + TCollection_AsciiString aBval = aVal.Split(aStop); + thefile->Append(new TCollection_HAsciiString(aVal)); + aVal = aBval; + aNn -= aStop; } } } - /* //:i2 - else { - // Il faut tronconner ... lignes limitees a 72 caracteres (StepLong) - Standard_Integer ncurr = thecurr.Length(); - Standard_Integer nbuff = StepLong - ncurr; - thecurr.Add (aval.ToCString(),nbuff); - thefile->Append(thecurr.Moved()); - aval.Remove(1,nbuff); - nn -= nbuff; - while (nn > 0) { - if (nn <= StepLong) { - thecurr.Add (aval); // Ca yet, on a tout epuise - thecurr.FreezeInitial(); - break; - } - TCollection_AsciiString bval = aval.Split(StepLong); - thefile->Append(new TCollection_HAsciiString(bval)); - nn -= StepLong; - } - } - //:i2 */ - // thecurr.Add('\''); deja mis dans aval au debut } //================================================================================================= @@ -1214,3 +1159,124 @@ Standard_Boolean StepData_StepWriter::Print(Standard_OStream& S) return isGood; } + +//================================================================================================= + +TCollection_AsciiString StepData_StepWriter::CleanTextForSend( + const TCollection_AsciiString& theText) +{ + TCollection_AsciiString aResult; + const Standard_Integer aNb = theText.Length(); + + // Process characters from beginning to end + for (Standard_Integer anI = 1; anI <= aNb; anI++) + { + const char anUncar = theText.Value(anI); + + // Check if we're at the start of a control directive + Standard_Boolean anIsDirective = Standard_False; + Standard_Integer aDirectiveLength = 0; + + if (anUncar == '\\' && anI <= aNb) + { + + // Check for \X2\ and \X4\ patterns first (need exactly 4 characters: \X2\) + if (anI + 3 <= aNb && theText.Value(anI + 1) == 'X' && theText.Value(anI + 3) == '\\') + { + const char aThirdChar = theText.Value(anI + 2); + + // \X2, \X4, \X0 patterns - special control sequences + if (aThirdChar == '2' || aThirdChar == '4' || aThirdChar == '0') + { + anIsDirective = Standard_True; + aDirectiveLength = 4; // Basic directive length: \X2\, \X4\, \X0\ + + // For \X2 and \X4, find the terminating \X0 sequence + if (aThirdChar == '2' || aThirdChar == '4') + { + Standard_Integer aJ = anI + 4; + while (aJ <= aNb - 3) + { + if (theText.Value(aJ) == '\\' && theText.Value(aJ + 1) == 'X' + && theText.Value(aJ + 2) == '0' && theText.Value(aJ + 3) == '\\') + { + aDirectiveLength = (aJ + 4) - anI; // Include the \X0 sequence + break; + } + aJ++; + } + } + } + } + // Check for \X{HH}\ pattern (need exactly 5 characters: \X{HH}\) + else if (anI + 4 <= aNb && theText.Value(anI + 1) == 'X' && theText.Value(anI + 4) == '\\') + { + const char aThirdChar = theText.Value(anI + 2); + const char aFourthChar = theText.Value(anI + 3); + + // Regular \X{HH}\ pattern - check for two hex characters + if (std::isxdigit(aThirdChar) && std::isxdigit(aFourthChar)) + { + anIsDirective = Standard_True; + aDirectiveLength = 5; // Control directive with two hex chars + } + } + // Check for \S, \N, \T patterns (need exactly 3 characters: \S\) + else if (anI + 2 <= aNb && theText.Value(anI + 2) == '\\') + { + const char aSecondChar = theText.Value(anI + 1); + if (aSecondChar == 'S' || aSecondChar == 'N' || aSecondChar == 'T') + { + anIsDirective = Standard_True; + aDirectiveLength = 3; // Simple directive pattern + } + } + // Check for \P{char}\ patterns (need exactly 4 characters: \P{char}\) + else if (anI + 3 <= aNb && theText.Value(anI + 1) == 'P' && theText.Value(anI + 3) == '\\') + { + const char aSecondChar = theText.Value(anI + 2); + if (std::isalpha(aSecondChar)) + { + anIsDirective = Standard_True; + aDirectiveLength = 4; // P directive with parameter + } + } + } + + if (anIsDirective) + { + // Copy the entire directive as-is + for (Standard_Integer aJ = 0; aJ < aDirectiveLength; aJ++) + { + aResult += theText.Value(anI + aJ); + } + anI += aDirectiveLength - 1; // Move past directive (loop will increment by 1) + } + else + { + // Process non-directive characters + if (anUncar == '\'') + { + aResult += "''"; // Double the quote + } + else if (anUncar == '\\') + { + aResult += "\\\\"; // Double the backslash + } + else if (anUncar == '\n') + { + aResult += "\\N\\"; // Convert to directive + } + else if (anUncar == '\t') + { + aResult += "\\T\\"; // Convert to directive + } + else + { + aResult += anUncar; // Copy as-is + } + } + } + + return aResult; +} diff --git a/src/DataExchange/TKDESTEP/StepData/StepData_StepWriter.hxx b/src/DataExchange/TKDESTEP/StepData/StepData_StepWriter.hxx index b5885085a4..8e68473836 100644 --- a/src/DataExchange/TKDESTEP/StepData/StepData_StepWriter.hxx +++ b/src/DataExchange/TKDESTEP/StepData/StepData_StepWriter.hxx @@ -267,6 +267,37 @@ public: //! then clears it Standard_EXPORT Standard_Boolean Print(Standard_OStream& S); + //! Static helper function to prepare text for STEP file output while preserving + //! existing ISO 10303-21 control directives. + //! + //! This function processes input text and escapes special characters (quotes, backslashes, + //! newlines, tabs) for STEP file format compliance, while carefully preserving any existing + //! control directives that may already be present in the input string. + //! + //! Supported control directive patterns that are preserved: + //! - \X{HH}\ : Single byte character encoding (U+0000 to U+00FF) + //! - \X2\{HHHH}...\X0\ : UTF-16 character encoding + //! - \X4\{HHHHHHHH}...\X0\ : UTF-32 character encoding + //! - \S\ : Latin codepoint character with current code page + //! - \P{A-I}\ : Code page control directive + //! - \N\ : Newline directive (preserved as-is) + //! - \T\ : Tab directive (preserved as-is) + //! + //! Character escaping performed (only on non-directive content): + //! - Single quote (') -> double quote ('') + //! - Backslash (\) -> double backslash (\\) + //! - Newline character -> \N\ directive + //! - Tab character -> \T\ directive + //! + //! Example: + //! Input: "text with \XA7\ and 'quotes'" + //! Output: "text with \XA7\ and ''quotes''" + //! + //! @param theText The input text string to be processed + //! @return Processed text with preserved control directives and escaped special characters + Standard_EXPORT static TCollection_AsciiString CleanTextForSend( + const TCollection_AsciiString& theText); + protected: private: //! adds a string to current line; first flushes it if full