@@ -76,16 +76,10 @@ def text(self) -> str:
7676 Inner-content child elements like `w:tab` are translated to their text
7777 equivalent.
7878 """
79- text = ""
80- for child in self :
81- if child .tag == qn ("w:t" ):
82- t_text = child .text
83- text += t_text if t_text is not None else ""
84- elif child .tag == qn ("w:tab" ):
85- text += "\t "
86- elif child .tag in (qn ("w:br" ), qn ("w:cr" )):
87- text += "\n "
88- return text
79+ return "" .join (
80+ str (e )
81+ for e in self .xpath ("w:br | w:cr | w:noBreakHyphen | w:ptab | w:t | w:tab" )
82+ )
8983
9084 @text .setter
9185 def text (self , text : str ):
@@ -104,13 +98,89 @@ def _insert_rPr(self, rPr: CT_RPr) -> CT_RPr:
10498class CT_Br (BaseOxmlElement ):
10599 """`<w:br>` element, indicating a line, page, or column break in a run."""
106100
107- type = OptionalAttribute ("w:type" , ST_BrType , default = "textWrapping" )
101+ type : str | None = OptionalAttribute ( # pyright: ignore[reportGeneralTypeIssues]
102+ "w:type" , ST_BrType , default = "textWrapping"
103+ )
108104 clear = OptionalAttribute ("w:clear" , ST_BrClear )
109105
106+ def __str__ (self ) -> str :
107+ """Text equivalent of this element. Actual value depends on break type.
108+
109+ A line break is translated as "\n ". Column and page breaks produce the empty
110+ string ("").
111+
112+ This allows the text of run inner-content to be accessed in a consistent way
113+ for all run inner-context text elements.
114+ """
115+ return "\n " if self .type == "textWrapping" else ""
116+
117+
118+ class CT_Cr (BaseOxmlElement ):
119+ """`<w:cr>` element, representing a carriage-return (0x0D) character within a run.
120+
121+ In Word, this represents a "soft carriage-return" in the sense that it does not end
122+ the paragraph the way pressing Enter (aka. Return) on the keyboard does. Here the
123+ text equivalent is considered to be newline ("\n ") since in plain-text that's the
124+ closest Python equivalent.
125+
126+ NOTE: this complex-type name does not exist in the schema, where `w:tab` maps to
127+ `CT_Empty`. This name was added to give it distinguished behavior. CT_Empty is used
128+ for many elements.
129+ """
130+
131+ def __str__ (self ) -> str :
132+ """Text equivalent of this element, a single newline ("\n ")."""
133+ return "\n "
134+
135+
136+ class CT_NoBreakHyphen (BaseOxmlElement ):
137+ """`<w:noBreakHyphen>` element, a hyphen ineligible for a line-wrap position.
138+
139+ This maps to a plain-text dash ("-").
140+
141+ NOTE: this complex-type name does not exist in the schema, where `w:noBreakHyphen`
142+ maps to `CT_Empty`. This name was added to give it behavior distinguished from the
143+ many other elements represented in the schema by CT_Empty.
144+ """
145+
146+ def __str__ (self ) -> str :
147+ """Text equivalent of this element, a single dash character ("-")."""
148+ return "-"
149+
150+
151+ class CT_PTab (BaseOxmlElement ):
152+ """`<w:ptab>` element, representing an absolute-position tab character within a run.
153+
154+ This character advances the rendering position to the specified position regardless
155+ of any tab-stops, perhaps for layout of a table-of-contents (TOC) or similar.
156+ """
157+
158+ def __str__ (self ) -> str :
159+ """Text equivalent of this element, a single tab ("\t ") character.
160+
161+ This allows the text of run inner-content to be accessed in a consistent way
162+ for all run inner-context text elements.
163+ """
164+ return "\t "
165+
166+
167+ # -- CT_Tab functionality is provided by CT_TabStop which also uses `w:tab` tag. That
168+ # -- element class provides the __str__() method for this empty element, unconditionally
169+ # -- returning "\t".
170+
110171
111172class CT_Text (BaseOxmlElement ):
112173 """`<w:t>` element, containing a sequence of characters within a run."""
113174
175+ def __str__ (self ) -> str :
176+ """Text contained in this element, the empty string if it has no content.
177+
178+ This property allows this run inner-content element to be queried for its text
179+ the same way as other run-content elements are. In particular, this never
180+ returns None, as etree._Element does when there is no content.
181+ """
182+ return self .text or ""
183+
114184
115185# ------------------------------------------------------------------------------------
116186# Utility
0 commit comments