Skip to content

Commit 1afedd0

Browse files
committed
acpt: add Paragraph inner-content scenarios
1 parent 08ee10a commit 1afedd0

File tree

3 files changed

+156
-25
lines changed

3 files changed

+156
-25
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
Feature: Access paragraph inner-content including hyperlinks
2+
In order to extract paragraph content with high-fidelity
3+
As a developer using python-docx
4+
I need to access differentiated paragraph content in document order
5+
6+
7+
@wip
8+
Scenario Outline: Paragraph.contains_page_break reports presence of page-break
9+
Given a paragraph having <zero-or-more> rendered page breaks
10+
Then paragraph.contains_page_break is <value>
11+
12+
Examples: Paragraph.contains_page_break cases
13+
| zero-or-more | value |
14+
| no | False |
15+
| one | True |
16+
| two | True |
17+
18+
19+
@wip
20+
Scenario Outline: Paragraph.hyperlinks contains Hyperlink for each link in paragraph
21+
Given a paragraph having <zero-or-more> hyperlinks
22+
Then paragraph.hyperlinks has length <value>
23+
And paragraph.hyperlinks contains only Hyperlink instances
24+
25+
Examples: Paragraph.hyperlinks cases
26+
| zero-or-more | value |
27+
| no | 0 |
28+
| one | 1 |
29+
| three | 3 |
30+
31+
32+
@wip
33+
Scenario: Paragraph.iter_inner_content() generates the paragraph's runs and hyperlinks
34+
Given a paragraph having three hyperlinks
35+
Then paragraph.iter_inner_content() generates the paragraph runs and hyperlinks
36+
37+
38+
@wip
39+
Scenario Outline: Paragraph.rendered_page_breaks contains paragraph RenderedPageBreaks
40+
Given a paragraph having <zero-or-more> rendered page breaks
41+
Then paragraph.rendered_page_breaks has length <value>
42+
And paragraph.rendered_page_breaks contains only RenderedPageBreak instances
43+
44+
Examples: Paragraph.rendered_page_breaks cases
45+
| zero-or-more | value |
46+
| no | 0 |
47+
| one | 1 |
48+
| two | 2 |

features/steps/paragraph.py

Lines changed: 108 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
"""Step implementations for paragraph-related features."""
22

3+
from __future__ import annotations
4+
5+
from typing import Any
6+
37
from behave import given, then, when
8+
from behave.runner import Context
49

510
from docx import Document
6-
from docx.enum.text import WD_ALIGN_PARAGRAPH
11+
from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
712
from docx.text.parfmt import ParagraphFormat
813

914
from helpers import saved_docx_path, test_docx, test_text
@@ -12,7 +17,7 @@
1217

1318

1419
@given("a document containing three paragraphs")
15-
def given_a_document_containing_three_paragraphs(context):
20+
def given_a_document_containing_three_paragraphs(context: Context):
1621
document = Document()
1722
document.add_paragraph("foo")
1823
document.add_paragraph("bar")
@@ -21,7 +26,7 @@ def given_a_document_containing_three_paragraphs(context):
2126

2227

2328
@given("a paragraph having {align_type} alignment")
24-
def given_a_paragraph_align_type_alignment(context, align_type):
29+
def given_a_paragraph_align_type_alignment(context: Context, align_type: str):
2530
paragraph_idx = {
2631
"inherited": 0,
2732
"left": 1,
@@ -34,7 +39,7 @@ def given_a_paragraph_align_type_alignment(context, align_type):
3439

3540

3641
@given("a paragraph having {style_state} style")
37-
def given_a_paragraph_having_style(context, style_state):
42+
def given_a_paragraph_having_style(context: Context, style_state: str):
3843
paragraph_idx = {
3944
"no specified": 0,
4045
"a missing": 1,
@@ -45,8 +50,30 @@ def given_a_paragraph_having_style(context, style_state):
4550
context.paragraph = document.paragraphs[paragraph_idx]
4651

4752

53+
@given("a paragraph having {zero_or_more} hyperlinks")
54+
def given_a_paragraph_having_hyperlinks(context: Context, zero_or_more: str):
55+
paragraph_idx = {
56+
"no": 0,
57+
"one": 1,
58+
"three": 2,
59+
}[zero_or_more]
60+
document = context.document = Document(test_docx("par-hyperlinks"))
61+
context.paragraph = document.paragraphs[paragraph_idx]
62+
63+
64+
@given("a paragraph having {zero_or_more} rendered page breaks")
65+
def given_a_paragraph_having_rendered_page_breaks(context: Context, zero_or_more: str):
66+
paragraph_idx = {
67+
"no": 0,
68+
"one": 1,
69+
"two": 2,
70+
}[zero_or_more]
71+
document = Document(test_docx("par-rendered-page-breaks"))
72+
context.paragraph = document.paragraphs[paragraph_idx]
73+
74+
4875
@given("a paragraph with content and formatting")
49-
def given_a_paragraph_with_content_and_formatting(context):
76+
def given_a_paragraph_with_content_and_formatting(context: Context):
5077
document = Document(test_docx("par-known-paragraphs"))
5178
context.paragraph = document.paragraphs[0]
5279

@@ -55,12 +82,12 @@ def given_a_paragraph_with_content_and_formatting(context):
5582

5683

5784
@when("I add a run to the paragraph")
58-
def when_add_new_run_to_paragraph(context):
85+
def when_add_new_run_to_paragraph(context: Context):
5986
context.run = context.p.add_run()
6087

6188

6289
@when("I assign a {style_type} to paragraph.style")
63-
def when_I_assign_a_style_type_to_paragraph_style(context, style_type):
90+
def when_I_assign_a_style_type_to_paragraph_style(context: Context, style_type: str):
6491
paragraph = context.paragraph
6592
style = context.style = context.document.styles["Heading 1"]
6693
style_spec = {
@@ -71,34 +98,88 @@ def when_I_assign_a_style_type_to_paragraph_style(context, style_type):
7198

7299

73100
@when("I clear the paragraph content")
74-
def when_I_clear_the_paragraph_content(context):
101+
def when_I_clear_the_paragraph_content(context: Context):
75102
context.paragraph.clear()
76103

77104

78105
@when("I insert a paragraph above the second paragraph")
79-
def when_I_insert_a_paragraph_above_the_second_paragraph(context):
106+
def when_I_insert_a_paragraph_above_the_second_paragraph(context: Context):
80107
paragraph = context.document.paragraphs[1]
81108
paragraph.insert_paragraph_before("foobar", "Heading1")
82109

83110

84111
@when("I set the paragraph text")
85-
def when_I_set_the_paragraph_text(context):
112+
def when_I_set_the_paragraph_text(context: Context):
86113
context.paragraph.text = "bar\tfoo\r"
87114

88115

89116
# then =====================================================
90117

91118

119+
@then("paragraph.contains_page_break is {value}")
120+
def then_paragraph_contains_page_break_is_value(context: Context, value: str):
121+
actual_value = context.paragraph.contains_page_break
122+
expected_value = {"True": True, "False": False}[value]
123+
assert (
124+
actual_value == expected_value
125+
), f"expected: {expected_value}, got: {actual_value}"
126+
127+
128+
@then("paragraph.hyperlinks contains only Hyperlink instances")
129+
def then_paragraph_hyperlinks_contains_only_Hyperlink_instances(context: Context):
130+
assert all(
131+
type(item).__name__ == "Hyperlink" for item in context.paragraph.hyperlinks
132+
)
133+
134+
135+
@then("paragraph.hyperlinks has length {value}")
136+
def then_paragraph_hyperlinks_has_length(context: Context, value: str):
137+
expected_value = int(value)
138+
assert len(context.paragraph.hyperlinks) == expected_value
139+
140+
141+
@then("paragraph.iter_inner_content() generates the paragraph runs and hyperlinks")
142+
def then_paragraph_iter_inner_content_generates_runs_and_hyperlinks(context: Context):
143+
assert [type(item).__name__ for item in context.paragraph.iter_inner_content()] == [
144+
"Run",
145+
"Hyperlink",
146+
"Run",
147+
"Hyperlink",
148+
"Run",
149+
"Hyperlink",
150+
"Run",
151+
]
152+
153+
92154
@then("paragraph.paragraph_format is its ParagraphFormat object")
93-
def then_paragraph_paragraph_format_is_its_parfmt_object(context):
155+
def then_paragraph_paragraph_format_is_its_parfmt_object(context: Context):
94156
paragraph = context.paragraph
95157
paragraph_format = paragraph.paragraph_format
96158
assert isinstance(paragraph_format, ParagraphFormat)
97159
assert paragraph_format.element is paragraph._element
98160

99161

162+
@then("paragraph.rendered_page_breaks has length {value}")
163+
def then_paragraph_rendered_page_breaks_has_length(context: Context, value: str):
164+
actual_value = len(context.paragraph.rendered_page_breaks)
165+
expected_value = int(value)
166+
assert (
167+
actual_value == expected_value
168+
), f"got: {actual_value}, expected: {expected_value}"
169+
170+
171+
@then("paragraph.rendered_page_breaks contains only RenderedPageBreak instances")
172+
def then_paragraph_rendered_page_breaks_contains_only_RenderedPageBreak_instances(
173+
context: Context,
174+
):
175+
assert all(
176+
type(item).__name__ == "RenderedPageBreak"
177+
for item in context.paragraph.rendered_page_breaks
178+
)
179+
180+
100181
@then("paragraph.style is {value_key}")
101-
def then_paragraph_style_is_value(context, value_key):
182+
def then_paragraph_style_is_value(context: Context, value_key: str):
102183
styles = context.document.styles
103184
expected_value = {
104185
"Normal": styles["Normal"],
@@ -110,12 +191,12 @@ def then_paragraph_style_is_value(context, value_key):
110191

111192

112193
@then("the document contains four paragraphs")
113-
def then_the_document_contains_four_paragraphs(context):
194+
def then_the_document_contains_four_paragraphs(context: Context):
114195
assert len(context.document.paragraphs) == 4
115196

116197

117198
@then("the document contains the text I added")
118-
def then_document_contains_text_I_added(context):
199+
def then_document_contains_text_I_added(context: Context):
119200
document = Document(saved_docx_path)
120201
paragraphs = document.paragraphs
121202
paragraph = paragraphs[-1]
@@ -126,47 +207,49 @@ def then_document_contains_text_I_added(context):
126207

127208

128209
@then("the paragraph alignment property value is {align_value}")
129-
def then_the_paragraph_alignment_prop_value_is_value(context, align_value):
130-
expected_value = {
210+
def then_the_paragraph_alignment_prop_value_is_value(
211+
context: Context, align_value: str
212+
):
213+
expected_value: Any = {
131214
"None": None,
132-
"WD_ALIGN_PARAGRAPH.LEFT": WD_ALIGN_PARAGRAPH.LEFT,
133-
"WD_ALIGN_PARAGRAPH.CENTER": WD_ALIGN_PARAGRAPH.CENTER,
134-
"WD_ALIGN_PARAGRAPH.RIGHT": WD_ALIGN_PARAGRAPH.RIGHT,
215+
"WD_ALIGN_PARAGRAPH.LEFT": WD_PARAGRAPH_ALIGNMENT.LEFT, # pyright: ignore
216+
"WD_ALIGN_PARAGRAPH.CENTER": WD_PARAGRAPH_ALIGNMENT.CENTER, # pyright: ignore
217+
"WD_ALIGN_PARAGRAPH.RIGHT": WD_PARAGRAPH_ALIGNMENT.RIGHT, # pyright: ignore
135218
}[align_value]
136219
assert context.paragraph.alignment == expected_value
137220

138221

139222
@then("the paragraph formatting is preserved")
140-
def then_the_paragraph_formatting_is_preserved(context):
223+
def then_the_paragraph_formatting_is_preserved(context: Context):
141224
paragraph = context.paragraph
142225
assert paragraph.style.name == "Heading 1"
143226

144227

145228
@then("the paragraph has no content")
146-
def then_the_paragraph_has_no_content(context):
229+
def then_the_paragraph_has_no_content(context: Context):
147230
assert context.paragraph.text == ""
148231

149232

150233
@then("the paragraph has the style I set")
151-
def then_the_paragraph_has_the_style_I_set(context):
234+
def then_the_paragraph_has_the_style_I_set(context: Context):
152235
paragraph, expected_style = context.paragraph, context.style
153236
assert paragraph.style == expected_style
154237

155238

156239
@then("the paragraph has the text I set")
157-
def then_the_paragraph_has_the_text_I_set(context):
240+
def then_the_paragraph_has_the_text_I_set(context: Context):
158241
actual = context.paragraph.text
159242
expected = "bar\tfoo\n"
160243
assert actual == expected, f"expected: {expected}, got: {actual}"
161244

162245

163246
@then("the style of the second paragraph matches the style I set")
164-
def then_the_style_of_the_second_paragraph_matches_the_style_I_set(context):
247+
def then_the_style_of_the_second_paragraph_matches_the_style_I_set(context: Context):
165248
second_paragraph = context.document.paragraphs[1]
166249
assert second_paragraph.style.name == "Heading 1"
167250

168251

169252
@then("the text of the second paragraph matches the text I set")
170-
def then_the_text_of_the_second_paragraph_matches_the_text_I_set(context):
253+
def then_the_text_of_the_second_paragraph_matches_the_text_I_set(context: Context):
171254
second_paragraph = context.document.paragraphs[1]
172255
assert second_paragraph.text == "foobar"
12.1 KB
Binary file not shown.

0 commit comments

Comments
 (0)