diff --git a/samples/xlsx/TestIssue915.xlsx b/samples/xlsx/TestIssue915.xlsx new file mode 100644 index 00000000..476fd0ef Binary files /dev/null and b/samples/xlsx/TestIssue915.xlsx differ diff --git a/src/MiniExcel/Csv/CsvReader.cs b/src/MiniExcel/Csv/CsvReader.cs index 39562333..36ce18e5 100644 --- a/src/MiniExcel/Csv/CsvReader.cs +++ b/src/MiniExcel/Csv/CsvReader.cs @@ -37,6 +37,9 @@ public IEnumerable> Query(bool useHeaderRow, string string row; for (var rowIndex = 1; (row = reader.ReadLine()) != null; rowIndex++) { + if (string.IsNullOrWhiteSpace(row)) + continue; + string finalRow = row; if (_config.ReadLineBreaksWithinQuotes) { diff --git a/src/MiniExcel/SaveByTemplate/ExcelOpenXmlTemplate.Impl.cs b/src/MiniExcel/SaveByTemplate/ExcelOpenXmlTemplate.Impl.cs index 7e87a156..dc6da1d7 100644 --- a/src/MiniExcel/SaveByTemplate/ExcelOpenXmlTemplate.Impl.cs +++ b/src/MiniExcel/SaveByTemplate/ExcelOpenXmlTemplate.Impl.cs @@ -140,10 +140,13 @@ internal partial class ExcelOpenXmlTemplate private static readonly Regex _templateRegex = TemplateRegex(); [GeneratedRegex(@".*?\{\{.*?\}\}.*?")] private static partial Regex NonTemplateRegex(); private static readonly Regex _nonTemplateRegex = TemplateRegex(); + [GeneratedRegex(@"<(?:x:)?v>\s*")] private static partial Regex EmptyVTagRegexImpl(); + private static readonly Regex _emptyVTagRegex = EmptyVTagRegexImpl(); #else private static readonly Regex _cellRegex = new Regex("([A-Z]+)([0-9]+)", RegexOptions.Compiled); private static readonly Regex _templateRegex = new Regex(@"\{\{(.*?)\}\}", RegexOptions.Compiled); private static readonly Regex _nonTemplateRegex = new Regex(@".*?\{\{.*?\}\}.*?", RegexOptions.Compiled); + private static readonly Regex _emptyVTagRegex = new Regex(@"<(?:x:)?v>\s*", RegexOptions.Compiled); #endif private void GenerateSheetXmlImplByUpdateMode(ZipArchiveEntry sheetZipEntry, Stream stream, Stream sheetStream, IDictionary inputMaps, IDictionary sharedStrings, bool mergeCells = false) @@ -324,6 +327,15 @@ private void WriteSheetXml(Stream outputFileStream, XmlDocument doc, XmlNode she phoneticPr.ParentNode.RemoveChild(phoneticPr); } + // Extract autoFilter - must be written before mergeCells and phoneticPr per ECMA-376 + var autoFilter = doc.SelectSingleNode("/x:worksheet/x:autoFilter", _ns); + var autoFilterXml = string.Empty; + if (autoFilter != null) + { + autoFilterXml = autoFilter.OuterXml; + autoFilter.ParentNode.RemoveChild(autoFilter); + } + var contents = doc.InnerXml.Split(new[] { $"<{prefix}sheetData>{{{{{{{{{{{{split}}}}}}}}}}}}" }, StringSplitOptions.None); using (var writer = new StreamWriter(outputFileStream, Encoding.UTF8)) @@ -514,6 +526,15 @@ private void WriteSheetXml(Stream outputFileStream, XmlDocument doc, XmlNode she writer.Write($""); + // ECMA-376 element order: sheetData → autoFilter → mergeCells → phoneticPr → conditionalFormatting + + // 1. autoFilter (must come before mergeCells) + if (!string.IsNullOrEmpty(autoFilterXml)) + { + writer.Write(CleanXml(autoFilterXml, endPrefix)); + } + + // 2. mergeCells if (_newXMergeCellInfos.Count != 0) { writer.Write($"<{prefix}mergeCells count=\"{_newXMergeCellInfos.Count}\">"); @@ -524,14 +545,16 @@ private void WriteSheetXml(Stream outputFileStream, XmlDocument doc, XmlNode she writer.Write($""); } + // 3. PhoneticPr if (!string.IsNullOrEmpty(phoneticPrXml)) { - writer.Write(phoneticPrXml); + writer.Write(CleanXml(phoneticPrXml, endPrefix)); } + // 4. conditionalFormatting if (newConditionalFormatRanges.Count != 0) { - writer.Write(string.Join(string.Empty, newConditionalFormatRanges.Select(cf => cf.Node.OuterXml))); + writer.Write(CleanXml(string.Join(string.Empty, newConditionalFormatRanges.Select(cf => cf.Node.OuterXml)), endPrefix)); } writer.Write(contents[1]); @@ -548,12 +571,23 @@ private void GenerateCellValues(string endPrefix, StreamWriter writer, ref int r var cleanOuterXmlOpen = CleanXml(outerXmlOpen, endPrefix); // https://github.com/mini-software/MiniExcel/issues/771 Saving by template introduces unintended value replication in each row #771 - var notFirstRowElement = rowElement.Clone(); + var notFirstRowElement = rowElement.Clone(); foreach (XmlElement c in notFirstRowElement.SelectNodes("x:c", _ns)) { - var v = c.SelectSingleNode("x:v", _ns); - if (v != null && !_nonTemplateRegex.IsMatch(v.InnerText)) - v.InnerText = string.Empty; + // Try first (for t="n"/t="b" cells), then (for t="inlineStr" cells) + var vTag = c.SelectSingleNode("x:v", _ns); + if (vTag != null) + { + if (!_nonTemplateRegex.IsMatch(vTag.InnerText)) + vTag.InnerText = string.Empty; + } + else + { + // Handle inline string cells + var t = c.SelectSingleNode("x:is/x:t", _ns); + if (t != null && !_nonTemplateRegex.IsMatch(t.InnerText)) + t.InnerText = string.Empty; + } } foreach (var item in rowInfo.CellIEnumerableValues) @@ -694,7 +728,7 @@ private void GenerateCellValues(string endPrefix, StreamWriter writer, ref int r else { cellValueStr = ExcelOpenXmlUtils.EncodeXML(cellValue?.ToString()); - if (!isDictOrTable && TypeHelper.IsNumericType(type)) + if (TypeHelper.IsNumericType(type)) { if (decimal.TryParse(cellValueStr, out var decimalValue)) cellValueStr = decimalValue.ToString(CultureInfo.InvariantCulture); @@ -712,6 +746,9 @@ private void GenerateCellValues(string endPrefix, StreamWriter writer, ref int r substXmlRow = rowXml.ToString(); substXmlRow = _templateRegex.Replace(substXmlRow, MatchDelegate); + + // Cleanup empty tags which defaults to invalid XML + substXmlRow = _emptyVTagRegex.Replace(substXmlRow, ""); } rowXml.Clear(); @@ -744,9 +781,14 @@ private void GenerateCellValues(string endPrefix, StreamWriter writer, ref int r var mergeBaseRowIndex = newRowIndex; newRowIndex += rowInfo.IEnumerableMercell?.Height ?? 1; + // Replace {{$rowindex}} in the already-built substXmlRow + rowXml.Replace("{{$rowindex}}", mergeBaseRowIndex.ToString()); + // replace formulas ProcessFormulas(rowXml, newRowIndex); - writer.Write(CleanXml(rowXml, endPrefix)); + + var finalXml = CleanXml(rowXml, endPrefix).ToString(); + writer.Write(finalXml); //mergecells if (rowInfo.RowMercells == null) @@ -936,11 +978,11 @@ private void ProcessFormulas(StringBuilder rowXml, int rowIndex) continue; /* Target: - - SUM(C2:C7) - + + SUM(C2:C7) + */ - var vs = c.SelectNodes("x:v", _ns); + var vs = c.SelectNodes("x:is", _ns); foreach (XmlElement v in vs) { if (!v.InnerText.StartsWith("$=")) @@ -975,7 +1017,8 @@ private static string ConvertToDateTimeString(PropertyInfo propInfo, object cell private static string CleanXml(string xml, string endPrefix) => CleanXml(new StringBuilder(xml), endPrefix).ToString(); private static StringBuilder CleanXml(StringBuilder xml, string endPrefix) => xml .Replace("xmlns:x14ac=\"http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac\"", "") - .Replace($"xmlns{endPrefix}=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"", ""); + .Replace($"xmlns{endPrefix}=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"", "") + .Replace("xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\"", ""); private static void ReplaceSharedStringsToStr(IDictionary sharedStrings, XmlNodeList rows) { @@ -996,14 +1039,100 @@ private static void ReplaceSharedStringsToStr(IDictionary sharedStr if (sharedStrings == null || !sharedStrings.TryGetValue(int.Parse(v.InnerText), out var shared)) continue; - // change type = str and replace its value - //TODO: remove sharedstring? - v.InnerText = shared; - c.SetAttribute("t", "str"); - } + // change type = inlineStr and replace its value + // Use the same prefix as the source element to handle namespaced documents (e.g., x:v -> x:is, x:t) + var prefix = v.Prefix; + c.RemoveChild(v); + + var isNode = string.IsNullOrEmpty(prefix) + ? c.OwnerDocument.CreateElement("is", Config.SpreadsheetmlXmlns) + : c.OwnerDocument.CreateElement(prefix, "is", Config.SpreadsheetmlXmlns); + + var tNode = string.IsNullOrEmpty(prefix) + ? c.OwnerDocument.CreateElement("t", Config.SpreadsheetmlXmlns) + : c.OwnerDocument.CreateElement(prefix, "t", Config.SpreadsheetmlXmlns); + + tNode.InnerText = shared; + isNode.AppendChild(tNode); + c.AppendChild(isNode); + + c.RemoveAttribute("t"); + c.SetAttribute("t", "inlineStr"); } } } + private static void SetCellType(XmlElement c, string type) + { + if (type == "str") type = "inlineStr"; // Force inlineStr for strings + + // Determine the prefix used in this document (e.g., "x" for x:c, x:v, etc.) + var prefix = c.Prefix; + + if (type == "inlineStr") + { + // Ensure ... + c.SetAttribute("t", "inlineStr"); + var v = c.SelectSingleNode("x:v", _ns); + + if (v != null) + { + var text = v.InnerText; + c.RemoveChild(v); + + var isNode = string.IsNullOrEmpty(prefix) + ? c.OwnerDocument.CreateElement("is", Config.SpreadsheetmlXmlns) + : c.OwnerDocument.CreateElement(prefix, "is", Config.SpreadsheetmlXmlns); + + var tNode = string.IsNullOrEmpty(prefix) + ? c.OwnerDocument.CreateElement("t", Config.SpreadsheetmlXmlns) + : c.OwnerDocument.CreateElement(prefix, "t", Config.SpreadsheetmlXmlns); + + tNode.InnerText = text; + isNode.AppendChild(tNode); + c.AppendChild(isNode); + } + else if (c.SelectSingleNode("x:is", _ns) == null) + { + // Create empty if neither nor exists + var isNode = string.IsNullOrEmpty(prefix) + ? c.OwnerDocument.CreateElement("is", Config.SpreadsheetmlXmlns) + : c.OwnerDocument.CreateElement(prefix, "is", Config.SpreadsheetmlXmlns); + + var tNode = string.IsNullOrEmpty(prefix) + ? c.OwnerDocument.CreateElement("t", Config.SpreadsheetmlXmlns) + : c.OwnerDocument.CreateElement(prefix, "t", Config.SpreadsheetmlXmlns); + + isNode.AppendChild(tNode); + c.AppendChild(isNode); + } + } + else + { + // Ensure ... + // For numbers/booleans, we remove 't' attribute to let it be default (number) + // or we could set it to 'n' explicitly, but removing is safer for general number types + if (type == "b") + c.SetAttribute("t", "b"); + else + c.RemoveAttribute("t"); + + var isNode = c.SelectSingleNode("x:is", _ns); + if (isNode != null) + { + var tNode = isNode.SelectSingleNode("x:t", _ns); + var text = tNode?.InnerText ?? string.Empty; + c.RemoveChild(isNode); + + var v = string.IsNullOrEmpty(prefix) + ? c.OwnerDocument.CreateElement("v", Config.SpreadsheetmlXmlns) + : c.OwnerDocument.CreateElement(prefix, "v", Config.SpreadsheetmlXmlns); + + v.InnerText = text; + c.AppendChild(v); + } + } + } + private void UpdateDimensionAndGetRowsInfo(IDictionary inputMaps, XmlDocument doc, XmlNodeList rows, bool changeRowIndex = true) { string[] refs; @@ -1053,7 +1182,7 @@ private void UpdateDimensionAndGetRowsInfo(IDictionary inputMaps c.SetAttribute("r", $"{StringHelper.GetLetters(r)}{{{{$rowindex}}}}"); } - var v = c.SelectSingleNode("x:v", _ns); + var v = c.SelectSingleNode("x:v", _ns) ?? c.SelectSingleNode("x:is/x:t", _ns); if (v?.InnerText == null) continue; @@ -1176,19 +1305,19 @@ private void UpdateDimensionAndGetRowsInfo(IDictionary inputMaps if (isMultiMatch) { - c.SetAttribute("t", "str"); + SetCellType(c, "str"); } else if (TypeHelper.IsNumericType(type) && !type.IsEnum) { - c.SetAttribute("t", "n"); + SetCellType(c, "n"); } else if (Type.GetTypeCode(type) == TypeCode.Boolean) { - c.SetAttribute("t", "b"); + SetCellType(c, "b"); } else if (Type.GetTypeCode(type) == TypeCode.DateTime) { - c.SetAttribute("t", "str"); + SetCellType(c, "str"); } break; @@ -1228,19 +1357,19 @@ private void UpdateDimensionAndGetRowsInfo(IDictionary inputMaps if (isMultiMatch) { - c.SetAttribute("t", "str"); + SetCellType(c, "str"); } else if (TypeHelper.IsNumericType(type) && !type.IsEnum) { - c.SetAttribute("t", "n"); + SetCellType(c, "n"); } else if (Type.GetTypeCode(type) == TypeCode.Boolean) { - c.SetAttribute("t", "b"); + SetCellType(c, "b"); } else if (Type.GetTypeCode(type) == TypeCode.DateTime) { - c.SetAttribute("t", "str"); + SetCellType(c, "str"); } } else @@ -1248,16 +1377,16 @@ private void UpdateDimensionAndGetRowsInfo(IDictionary inputMaps var cellValueStr = cellValue?.ToString(); // value did encodexml, so don't duplicate encode value (https://gitee.com/dotnetchina/MiniExcel/issues/I4DQUN) if (isMultiMatch || cellValue is string) // if matchs count over 1 need to set type=str (https://user-images.githubusercontent.com/12729184/114530109-39d46d00-9c7d-11eb-8f6b-52ad8600aca3.png) { - c.SetAttribute("t", "str"); + SetCellType(c, "str"); } else if (decimal.TryParse(cellValueStr, out var outV)) { - c.SetAttribute("t", "n"); + SetCellType(c, "n"); cellValueStr = outV.ToString(CultureInfo.InvariantCulture); } else if (cellValue is bool b) { - c.SetAttribute("t", "b"); + SetCellType(c, "b"); cellValueStr = b ? "1" : "0"; } else if (cellValue is DateTime timestamp) @@ -1266,6 +1395,13 @@ private void UpdateDimensionAndGetRowsInfo(IDictionary inputMaps cellValueStr = timestamp.ToString("yyyy-MM-dd HH:mm:ss"); } + if (string.IsNullOrEmpty(cellValueStr) && string.IsNullOrEmpty(c.GetAttribute("t"))) + { + SetCellType(c, "str"); + } + + // Re-acquire v after SetCellType may have changed DOM structure + v = c.SelectSingleNode("x:v", _ns) ?? c.SelectSingleNode("x:is/x:t", _ns); v.InnerText = v.InnerText.Replace($"{{{{{propNames[0]}}}}}", cellValueStr); //TODO: auto check type and set value } } diff --git a/tests/MiniExcelTests/MiniExcelIssueTests.cs b/tests/MiniExcelTests/MiniExcelIssueTests.cs index 0cf4a112..fe5a0922 100644 --- a/tests/MiniExcelTests/MiniExcelIssueTests.cs +++ b/tests/MiniExcelTests/MiniExcelIssueTests.cs @@ -1416,9 +1416,8 @@ public void TestIssueI4DQUN() MiniExcel.SaveAsByTemplate(path.ToString(), templatePath, value); var sheetXml = Helpers.GetZipFileContent(path.ToString(), "xl/worksheets/sheet1.xml"); - Assert.Contains("Hello & World < , > , \" , '", sheetXml); - Assert.Contains("Hello & Value < , > , \" , '", sheetXml); - } + Assert.Contains("Hello & World < , > , \" , '", sheetXml); + Assert.Contains("Hello & Value < , > , \" , '", sheetXml); } /// /// [SaveAs default theme support filter mode · Issue #190 · mini-software/MiniExcel](https://github.com/mini-software/MiniExcel/issues/190) @@ -4762,4 +4761,57 @@ public void TestIssue888_ShouldIgnoreFrame() Assert.Equal(dataInSheet, dataRead); } -} \ No newline at end of file + + class NameAgeTuple + { + public string? Name { get; set; } + public int Age { get; set; } + } + + [Fact] + public void Issue914() + { + var csv = + """ + Name,Age + Jack,22 + + + Sam,33 + + Henry,44 + + """u8; + + using var ms = new MemoryStream([..csv]); + var result = ms.Query(excelType: ExcelType.CSV).ToList(); + + Assert.Equal(3, result.Count); + Assert.Equal("Sam", result[1].Name); + Assert.Equal(44, result[2].Age); + } + + [Fact] + public void TestIssue915() + { + var templatePath = PathHelper.GetFile("xlsx/TestIssue915.xlsx"); + var value = new Dictionary + { + ["Data"] = new[] + { + new { Name = "Hill", Altitude = 6m }, + new { Name = "Mount", Altitude = 7.4m }, + new { Name = "Peak", Altitude = 8.6m } + } + }; + + using var path = AutoDeletingPath.Create(); + MiniExcel.SaveAsByTemplate(path.ToString(), templatePath, value); + + var result = MiniExcel.Query(path.ToString(), true).ToList(); + + Assert.Equal(6, result[0].Altitude); + Assert.Equal(7.4, result[1].Altitude); + Assert.Equal(8.6, result[2].Altitude); + } +}