diff --git a/ClosedXML/ClosedXML.csproj b/ClosedXML/ClosedXML.csproj index 12bf76a..1136f31 100644 --- a/ClosedXML/ClosedXML.csproj +++ b/ClosedXML/ClosedXML.csproj @@ -70,6 +70,7 @@ + diff --git a/ClosedXML/Excel/XLWorkbook_Load.cs b/ClosedXML/Excel/XLWorkbook_Load.cs index d3b05dc..6db27f6 100644 --- a/ClosedXML/Excel/XLWorkbook_Load.cs +++ b/ClosedXML/Excel/XLWorkbook_Load.cs @@ -5,6 +5,7 @@ using System.Globalization; using System.IO; using System.Linq; +using ClosedXML.Utils; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Spreadsheet; @@ -913,7 +914,7 @@ } if(!hasRuns) - xlCell._cellValue = sharedString.Text.InnerText; + xlCell._cellValue = XmlEncoder.DecodeString(sharedString.Text.InnerText); #region Load PhoneticProperties diff --git a/ClosedXML/Excel/XLWorkbook_Save.cs b/ClosedXML/Excel/XLWorkbook_Save.cs index 998c2ef..172a843 100644 --- a/ClosedXML/Excel/XLWorkbook_Save.cs +++ b/ClosedXML/Excel/XLWorkbook_Save.cs @@ -36,6 +36,7 @@ using System.Xml; using System.Xml.Linq; using System.Text; +using ClosedXML.Utils; using Anchor = DocumentFormat.OpenXml.Vml.Spreadsheet.Anchor; using Field = DocumentFormat.OpenXml.Spreadsheet.Field; using Run = DocumentFormat.OpenXml.Spreadsheet.Run; @@ -823,7 +824,7 @@ EndingBaseIndex = (UInt32)p.End }; - var text = new Text {Text = p.Text}; + var text = new Text { Text = p.Text }; if (p.Text.PreserveSpaces()) text.Space = SpaceProcessingModeValues.Preserve; @@ -867,7 +868,7 @@ { var s = c.Value.ToString(); var sharedStringItem = new SharedStringItem(); - var text = new Text {Text = s}; + var text = new Text {Text = XmlEncoder.EncodeString(s)}; if (!s.Trim().Equals(s)) text.Space = SpaceProcessingModeValues.Preserve; sharedStringItem.Append(text); diff --git a/ClosedXML/Utils/XmlEncoder.cs b/ClosedXML/Utils/XmlEncoder.cs new file mode 100644 index 0000000..5c42ed9 --- /dev/null +++ b/ClosedXML/Utils/XmlEncoder.cs @@ -0,0 +1,47 @@ +using System.Text; +using System.Xml; + +namespace ClosedXML.Utils +{ + public static class XmlEncoder + { + /// + /// Checks if a character is not allowed to the XML Spec http://www.w3.org/TR/REC-xml/#charsets + /// + /// Input Character + /// Returns false if the character is invalid according to the XML specification, and will not be + /// escaped by an XmlWriter. + public static bool IsXmlChar(char ch) + { + return (((ch >= 0x0020 && ch <= 0xD7FF) || + (ch >= 0xE000 && ch <= 0xFFFD) || + ch == 0x0009 || ch == 0x000A || + ch == 0x000D)); + } + + public static string EncodeString(string encodeStr) + { + if (encodeStr == null) return null; + + var newString = new StringBuilder(); + + foreach (var ch in encodeStr) + { + if (IsXmlChar(ch)) //this method is new in .NET 4 + { + newString.Append(ch); + } + else + { + newString.Append(XmlConvert.EncodeName(ch.ToString())); + } + } + return newString.ToString(); + } + + public static string DecodeString(string decodeStr) + { + return XmlConvert.DecodeName(decodeStr); + } + } +} diff --git a/ClosedXML_Net3.5/ClosedXML_Net3.5.csproj b/ClosedXML_Net3.5/ClosedXML_Net3.5.csproj index 8040570..dc7a265 100644 --- a/ClosedXML_Net3.5/ClosedXML_Net3.5.csproj +++ b/ClosedXML_Net3.5/ClosedXML_Net3.5.csproj @@ -817,6 +817,9 @@ Utils\GraphicsUtils.cs + + Utils\XmlEncoder.cs + XLHelper.cs diff --git a/ClosedXML_Tests/ClosedXML_Tests.csproj b/ClosedXML_Tests/ClosedXML_Tests.csproj index 5ea0151..022bf3d 100644 --- a/ClosedXML_Tests/ClosedXML_Tests.csproj +++ b/ClosedXML_Tests/ClosedXML_Tests.csproj @@ -74,6 +74,7 @@ + diff --git a/ClosedXML_Tests/Excel/Cells/XLCellTests.cs b/ClosedXML_Tests/Excel/Cells/XLCellTests.cs index 1415703..750af3a 100644 --- a/ClosedXML_Tests/Excel/Cells/XLCellTests.cs +++ b/ClosedXML_Tests/Excel/Cells/XLCellTests.cs @@ -1,10 +1,11 @@ -using System; +using ClosedXML.Excel; +using NUnit.Framework; +using System; using System.Collections.Generic; using System.Globalization; +using System.IO; using System.Linq; using System.Threading; -using ClosedXML.Excel; -using NUnit.Framework; namespace ClosedXML_Tests { @@ -56,7 +57,7 @@ { IXLWorksheet ws = new XLWorkbook().Worksheets.Add("Sheet1"); IXLCell cell = ws.Cell("A1"); - var doubleList = new List {1.0/0.0}; + var doubleList = new List { 1.0 / 0.0 }; cell.Value = doubleList.AsEnumerable(); Assert.AreNotEqual(XLCellValues.Number, cell.DataType); @@ -67,7 +68,7 @@ { IXLWorksheet ws = new XLWorkbook().Worksheets.Add("Sheet1"); IXLCell cell = ws.Cell("A1"); - var doubleList = new List {0.0/0.0}; + var doubleList = new List { 0.0 / 0.0 }; cell.Value = doubleList.AsEnumerable(); Assert.AreNotEqual(XLCellValues.Number, cell.DataType); @@ -77,7 +78,7 @@ public void InsertData1() { IXLWorksheet ws = new XLWorkbook().Worksheets.Add("Sheet1"); - IXLRange range = ws.Cell(2, 2).InsertData(new[] {"a", "b", "c"}); + IXLRange range = ws.Cell(2, 2).InsertData(new[] { "a", "b", "c" }); Assert.AreEqual("'Sheet1'!B2:B4", range.ToString()); } @@ -345,5 +346,25 @@ var actual = (DateTime)cell.Value; Assert.AreEqual(expected, actual); } + + [Test] + public void TestInvalidXmlCharacters() + { + byte[] data; + + using (var stream = new MemoryStream()) + { + var wb = new XLWorkbook(); + wb.AddWorksheet("Sheet1").FirstCell().SetValue("\u0018"); + wb.SaveAs(stream); + data = stream.ToArray(); + } + + using (var stream = new MemoryStream(data)) + { + var wb = new XLWorkbook(stream); + Assert.AreEqual("\u0018", wb.Worksheets.First().FirstCell().Value); + } + } } -} \ No newline at end of file +} diff --git a/ClosedXML_Tests/Excel/Misc/XmlEncoderTests.cs b/ClosedXML_Tests/Excel/Misc/XmlEncoderTests.cs new file mode 100644 index 0000000..d6bac0a --- /dev/null +++ b/ClosedXML_Tests/Excel/Misc/XmlEncoderTests.cs @@ -0,0 +1,33 @@ +using ClosedXML.Utils; +using NUnit.Framework; + +namespace ClosedXML_Tests.Excel +{ + [TestFixture] + public class XmlEncoderTest + { + [Test] + public void TestControlChars() + { + Assert.AreEqual("_x0001_ _x0002_ _x0003_ _x0004_", XmlEncoder.EncodeString("\u0001 \u0002 \u0003 \u0004")); + Assert.AreEqual("_x0005_ _x0006_ _x0007_ _x0008_", XmlEncoder.EncodeString("\u0005 \u0006 \u0007 \u0008")); + Assert.AreEqual("\u0001 \u0002 \u0003 \u0004", XmlEncoder.DecodeString("_x0001_ _x0002_ _x0003_ _x0004_")); + Assert.AreEqual("\u0005 \u0006 \u0007 \u0008", XmlEncoder.DecodeString("_x0005_ _x0006_ _x0007_ _x0008_")); + } + + [Test] + public void TestIsXmlChar() + { + Assert.AreEqual(false, XmlEncoder.IsXmlChar('\u0001')); + Assert.AreEqual(false, XmlEncoder.IsXmlChar('\u0005')); + Assert.AreEqual(false, XmlEncoder.IsXmlChar('\u0007')); + Assert.AreEqual(false, XmlEncoder.IsXmlChar('\u0008')); + Assert.AreEqual(true, XmlEncoder.IsXmlChar('J')); + Assert.AreEqual(true, XmlEncoder.IsXmlChar('+')); + Assert.AreEqual(true, XmlEncoder.IsXmlChar('S')); + Assert.AreEqual(true, XmlEncoder.IsXmlChar('4')); + Assert.AreEqual(true, XmlEncoder.IsXmlChar('!')); + Assert.AreEqual(true, XmlEncoder.IsXmlChar('$')); + } + } +}