Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DocumentAssembler, OpenXmlRegex, UnicodeMapper bugfixes #46

Open
wants to merge 12 commits into
base: vNext
Choose a base branch
from
63 changes: 63 additions & 0 deletions OpenXmlPowerTools.Tests/DocumentAssemblerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,24 @@ public void DA259(string name, string data, bool err)
Assert.Equal(4, brCount);
}

[Fact]
public void DA240()
{
string name = "DA240-Whitespace.docx";
DA101(name, "DA240-Whitespace.xml", false);
var assembledDocx = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-processed-by-DocumentAssembler.docx")));
WmlDocument afterAssembling = new WmlDocument(assembledDocx.FullName);

// when elements are inserted that begin or end with white space, make sure white space is preserved
string firstParaTextIncorrect = afterAssembling.MainDocumentPart.Element(W.body).Elements(W.p).First().Value;
Assert.Equal("Content may or may not have spaces: he/she; he, she; he and she.", firstParaTextIncorrect);
// warning: XElement.Value returns the string resulting from direct concatenation of all W.t elements. This is fast but ignores
// proper handling of xml:space="preserve" attributes, which Word honors when rendering content. Below we also check
// the result of UnicodeMapper.RunToString, which has been enhanced to take xml:space="preserve" into account.
string firstParaTextCorrect = InnerText(afterAssembling.MainDocumentPart.Element(W.body).Elements(W.p).First());
Assert.Equal("Content may or may not have spaces: he/she; he, she; he and she.", firstParaTextCorrect);
}

[Theory]
[InlineData("DA024-TrackedRevisions.docx", "DA-Data.xml")]
public void DA102_Throws(string name, string data)
Expand All @@ -190,6 +208,42 @@ public void DA102_Throws(string name, string data)
});
}

[Fact]
public void DATemplateMaior()
{
// this test case was causing incorrect behavior of OpenXmlRegex when replacing fields in paragraphs that contained
// lastRenderedPageBreak XML elements. Recent fixes relating to UnicodeMapper and OpenXmlRegex addressed it.
string name = "DA-TemplateMaior.docx";
DA101(name, "DA-templateMaior.xml", false);
var assembledDocx = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, name.Replace(".docx", "-processed-by-DocumentAssembler.docx")));
var afterAssembling = new WmlDocument(assembledDocx.FullName);

var descendants = afterAssembling.MainDocumentPart.Value;

Assert.False(descendants.Contains(">"), "Found > on text");
}

[Fact]
public void DAXmlError()
{
/* The assembly below would originally (prior to bug fixes) cause an exception to be thrown during assembly: 
System.ArgumentException : '', hexadecimal value 0x01, is an invalid character.
*/
string name = "DA-xmlerror.docx";
string data = "DA-xmlerror.xml";

DirectoryInfo sourceDir = new DirectoryInfo("../../../../TestFiles/");
var templateDocx = new FileInfo(Path.Combine(sourceDir.FullName, name));
var dataFile = new FileInfo(Path.Combine(sourceDir.FullName, data));

var wmlTemplate = new WmlDocument(templateDocx.FullName);
var xmlData = XElement.Load(dataFile.FullName);

var afterAssembling = DocumentAssembler.AssembleDocument(wmlTemplate, xmlData, out var returnedTemplateError);
var assembledDocx = new FileInfo(Path.Combine(TestUtil.TempDir.FullName, templateDocx.Name.Replace(".docx", "-processed-by-DocumentAssembler.docx")));
afterAssembling.SaveAs(assembledDocx.FullName);
}

[Theory]
[InlineData("DA025-TemplateDocument.docx", "DA-Data.xml", false)]
public void DA103_UseXmlDocument(string name, string data, bool err)
Expand Down Expand Up @@ -221,6 +275,14 @@ public void DA103_UseXmlDocument(string name, string data, bool err)
Assert.Equal(err, returnedTemplateError);
}

private static string InnerText(XContainer e)
{
return e.Descendants(W.r)
.Where(r => r.Parent.Name != W.del)
.Select(UnicodeMapper.RunToString)
.StringConcatenate();
}

private static List<string> s_ExpectedErrors = new List<string>()
{
"The 'http://schemas.openxmlformats.org/wordprocessingml/2006/main:evenHBand' attribute is not declared.",
Expand All @@ -237,6 +299,7 @@ public void DA103_UseXmlDocument(string name, string data, bool err)
"The 'http://schemas.openxmlformats.org/wordprocessingml/2006/main:noVBand' attribute is not declared.",
"The 'http://schemas.openxmlformats.org/wordprocessingml/2006/main:oddHBand' attribute is not declared.",
"The 'http://schemas.openxmlformats.org/wordprocessingml/2006/main:oddVBand' attribute is not declared.",
"The attribute 'http://schemas.openxmlformats.org/wordprocessingml/2006/main:name' has invalid value 'useWord2013TrackBottomHyphenation'. The Enumeration constraint failed.",
};
}
}
Expand Down
43 changes: 43 additions & 0 deletions OpenXmlPowerTools.Tests/OpenXmlRegexTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,21 @@ public class OpenXmlRegexTests
</w:body>
</w:document>";

private const string LastRenderedPageBreakXmlString =
@"<w:document xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:body>
<w:p>
<w:r>
<w:t>ThisIsAParagraphContainingNoNaturalLi</w:t>
</w:r>
<w:r>
<w:lastRenderedPageBreak/>
<w:t>neBreaksSoTheLineBreakIsForced.</w:t>
</w:r>
</w:p>
</w:body>
</w:document>";

private static string InnerText(XContainer e)
{
return e.Descendants(W.r)
Expand Down Expand Up @@ -380,6 +395,34 @@ public void CanReplaceTextWithFields()
Assert.Equal("As stated in Article {__1} and this Section {__1.1}, this is described in Exhibit 4.", innerText);
}
}

[Fact]
public void CanMatchDespiteLastRenderedPageBreaks()
{
XDocument partDocument = XDocument.Parse(LastRenderedPageBreakXmlString);
XElement p = partDocument.Descendants(W.p).Last();
string innerText = InnerText(p);

Assert.Equal("ThisIsAParagraphContainingNoNaturalLineBreaksSoTheLineBreakIsForced.", innerText);

using (var stream = new MemoryStream())
using (WordprocessingDocument wordDocument = WordprocessingDocument.Create(stream, DocumentType))
{
MainDocumentPart part = wordDocument.AddMainDocumentPart();
part.PutXDocument(partDocument);

IEnumerable<XElement> content = partDocument.Descendants(W.p);
var regex = new Regex(@"LineBreak");
int count = OpenXmlRegex.Replace(content, regex, "LB", null);

p = partDocument.Descendants(W.p).Last();
innerText = InnerText(p);

Assert.Equal(2, count);
Assert.Equal("ThisIsAParagraphContainingNoNaturalLBsSoTheLBIsForced.", innerText);
}
}

}
}

Expand Down
2 changes: 1 addition & 1 deletion OpenXmlPowerTools.Tests/PtUtilTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace OxPt
{
public class PtUtilTests
{
[Theory(Skip = "This is failing on AppVeyor")]
[Theory]
[InlineData("PU/PU001-Test001.mht")]
public void PU001(string name)
{
Expand Down
34 changes: 31 additions & 3 deletions OpenXmlPowerTools.Tests/SpreadsheetWriterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -286,20 +286,48 @@ public void SW002_AllDataTypes()
{
Cells = new Sw.CellDfn[]
{
new Sw.CellDfn {
CellDataType = Sw.CellDataType.String,
Value = "date (t:d, mm-dd-yy)",
},
new Sw.CellDfn {
CellDataType = Sw.CellDataType.Date,
Value = new DateTime(2012, 1, 8),
Value = new DateTime(2012, 1, 8).ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fff"),
FormatCode = "mm-dd-yy",
},
}
},
new Sw.RowDfn
{
Cells = new Sw.CellDfn[]
{
new Sw.CellDfn {
CellDataType = Sw.CellDataType.String,
Value = "date (t:d, d-mmm-yy)",
},
new Sw.CellDfn {
CellDataType = Sw.CellDataType.Date,
Value = new DateTime(2012, 1, 9),
FormatCode = "mm-dd-yy",
Value = new DateTime(2012, 1, 9).ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fff"),
FormatCode = "d-mmm-yy",
Bold = true,
HorizontalCellAlignment = Sw.HorizontalCellAlignment.Center,
},
}
},
new Sw.RowDfn
{
Cells = new Sw.CellDfn[]
{
new Sw.CellDfn {
CellDataType = Sw.CellDataType.String,
Value = "date (t:d)",
},
new Sw.CellDfn {
CellDataType = Sw.CellDataType.Date,
Value = new DateTime(2012, 1, 11).ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fff"),
},
}
},
}
}
}
Expand Down
34 changes: 34 additions & 0 deletions OpenXmlPowerTools.Tests/UnicodeMapperTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,40 @@ public void CanStringifySymbols()
Assert.Equal(symFromChar1.ToString(SaveOptions.None), symFromChar2.ToString(SaveOptions.None));
Assert.Equal(symFromChar1.ToString(SaveOptions.None), symFromChar3.ToString(SaveOptions.None));
}

[Fact]
public void HonorsXmlSpace()
{
XDocument partDocument = XDocument.Parse(PreserveSpacingXmlString);
XElement p = partDocument.Descendants(W.p).Last();
string innerText = p.Descendants(W.r)
.Select(UnicodeMapper.RunToString)
.StringConcatenate();
Assert.Equal(@"The following space is retained: but this one is not:. Similarly these two lines should have only a space between them: Line 1! Line 2!", innerText);
}

private const string PreserveSpacingXmlString =
@"<w:document xmlns:w=""http://schemas.openxmlformats.org/wordprocessingml/2006/main"">
<w:body>
<w:p>
<w:r>
<w:t xml:space=""preserve"">The following space is retained: </w:t>
</w:r>
<w:r>
<w:t>but this one is not: </w:t>
</w:r>
<w:r>
<w:t xml:space=""preserve"">. Similarly these two lines should have only a space between them: </w:t>
</w:r>
<w:r>
<w:t>
Line 1!
Line 2!
</w:t>
</w:r>
</w:p>
</w:body>
</w:document>";
}
}

Expand Down
3 changes: 2 additions & 1 deletion OpenXmlPowerTools/ChartUpdater.cs
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,8 @@ private static void UpdateEmbeddedWorkbook(ChartPart chartPart, ChartData chartD
var embeddedSpreadsheet = chartPart.GetPartById(embeddedSpreadsheetRid);
if (embeddedSpreadsheet != null)
{
using (SpreadsheetDocument sDoc = SpreadsheetDocument.Open(embeddedSpreadsheet.GetStream(), true))
using (Stream spreadsheetStream = embeddedSpreadsheet.GetStream())
using (SpreadsheetDocument sDoc = SpreadsheetDocument.Open(spreadsheetStream, true))
{
var workbookPart = sDoc.WorkbookPart;
var wbRoot = workbookPart.GetXDocument().Root;
Expand Down
17 changes: 15 additions & 2 deletions OpenXmlPowerTools/DocumentAssembler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ static object ContentReplacementTransform(XNode node, XElement data, TemplateErr
p.Add(new XElement(W.r,
para.Elements(W.r).Elements(W.rPr).FirstOrDefault(),
(p.Elements().Count() > 1) ? new XElement(W.br) : null,
new XElement(W.t, line)));
new XElement(W.t, GetXmlSpaceAttribute(line), line)));
}
return p;
}
Expand All @@ -635,7 +635,7 @@ static object ContentReplacementTransform(XNode node, XElement data, TemplateErr
list.Add(new XElement(W.r,
run.Elements().Where(e => e.Name != W.t),
(list.Count > 0) ? new XElement(W.br) : null,
new XElement(W.t, line)));
new XElement(W.t, GetXmlSpaceAttribute(line), line)));
}
return list;
}
Expand Down Expand Up @@ -853,5 +853,18 @@ private static string EvaluateXPathToString(XElement element, string xPath, bool
return xPathSelectResult.ToString();

}

private static XAttribute GetXmlSpaceAttribute(string textOfTextElement)
{
if (!string.IsNullOrEmpty(textOfTextElement))
{
if (char.IsWhiteSpace(textOfTextElement[0]) ||
char.IsWhiteSpace(textOfTextElement[textOfTextElement.Length - 1]))
{
return new XAttribute(XNamespace.Xml + "space", "preserve");
}
}
return null;
}
}
}
2 changes: 1 addition & 1 deletion OpenXmlPowerTools/OpenXmlRegex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ private static object WmlSearchAndReplaceTransform(XNode node, Regex regex, stri
if (element.Name == W.r)
{
return element.Elements()
.Where(e => e.Name != W.rPr)
.Where(e => e.Name != W.rPr && e.Name != W.lastRenderedPageBreak)
.Select(e => e.Name == W.t
? ((string) e).Select(c =>
new XElement(W.r,
Expand Down
Loading