1-
21import udapi .block .msf .phrase
32from enum import Enum
43
54AUXES_HAVE = ['ter' , 'haber' , 'avere' ]
65AUXES_BE = ['estar' , 'essere' ]
6+ MODALS = ['poder' , 'deber' , 'querer' , 'saber' , # Spanish + Portuguese
7+ 'potere' , 'dovere' , 'volere' , 'sapere' ] # Italian
78
89class Aspect (str , Enum ):
910 IMP = 'Imp'
@@ -28,15 +29,21 @@ def process_node(self, node):
2829 cop = [x for x in node .children if x .udeprel == 'cop' ]
2930
3031 # only expl or expl:pv, no expl:impers or expl:pass
31- refl = [x for x in node .children if x .lemma == 'se' and x .upos == 'PRON' and x .udeprel == 'expl' and x .udeprel != 'expl:impers' and x .udeprel != 'expl:pass' ]
32+ refl = [x for x in node .children if x .lemma == 'se' and x .upos == 'PRON' and x .udeprel == 'expl' and x .deprel != 'expl:impers' and x .deprel != 'expl:pass' ]
3233
3334 if refl :
3435 expl = 'Pv'
3536 else :
3637 expl = None
3738
3839 if cop :
39- auxes = [x for x in node .children if x .udeprel == 'aux' ]
40+ # find auxiliary verbs, modal verbs, and auxiliary verbs related to modal verbs among the children of the content verb and separate them from each other
41+ auxes , modals , modal_auxes = self .find_auxes (node )
42+
43+ if modals :
44+ # we consider modals themselves to be separate verb forms
45+ self .process_modal_verbs (modals , modal_auxes )
46+
4047 if auxes :
4148 self .process_periphrastic_verb_forms (cop [0 ], auxes , refl , auxes + cop , node )
4249 else :
@@ -45,9 +52,11 @@ def process_node(self, node):
4552 return
4653
4754 if node .upos == 'VERB' : #TODO maybe add "or node.feats['VerbForm'] == 'Part'"?
48- auxes = [x for x in node .children if x .udeprel == 'aux' ]
49- aux_pass = [x for x in node .children if x .deprel == 'aux:pass' ]
50- auxes_without_pass = [x for x in node .children if x .udeprel == 'aux' and x .deprel != 'aux:pass' ]
55+
56+ # find auxiliary verbs, modal verbs, and auxiliary verbs related to modals among the children of the content verb and separate them from each other
57+ auxes , modals , modal_auxes = self .find_auxes (node )
58+ aux_pass = [x for x in auxes if x .deprel == 'aux:pass' ]
59+ auxes_without_pass = [x for x in auxes if x .deprel != 'aux:pass' ]
5160
5261 # infinitive with a subject is a subjunctive
5362 subj = [x for x in node .children if x .udeprel == 'subj' ]
@@ -65,147 +74,202 @@ def process_node(self, node):
6574 )
6675 return
6776
77+ if modals :
78+ # we consider modals themselves to be separate verb forms
79+ self .process_modal_verbs (modals , modal_auxes )
80+
6881 if not auxes :
69- phrase_ords = [node .ord ] + [r .ord for r in refl ]
82+ phrase_ords = [node .ord ] + [r .ord for r in refl ]
83+ phrase_ords .sort ()
84+
85+ self .process_simple_verb_forms (node , expl , phrase_ords , node )
86+
87+
88+ else :
89+ # no passive auxiliaries
90+ if not aux_pass :
91+ self .process_periphrastic_verb_forms (node , auxes , refl , auxes , node )
92+
93+ # head verb has only passive auxiliary and no more other auxiliaries
94+ elif not auxes_without_pass :
95+ phrase_ords = [node .ord ] + [x .ord for x in auxes ] + [r .ord for r in refl ]
7096 phrase_ords .sort ()
7197
72- # Portuguese
73- # presente -> PhraseTense=Pres, PhraseAspect=''
74- # Futuro do presente -> PhraseTense=Fut, PhraseAspect=''
75-
76- # Spanish
77- # presente -> PhraseTense=Pres, PhraseAspect=''
78- # futuro simple -> PhraseTense=Fut, PhraseAspect=''
79-
80- # Italian
81- # presente -> PhraseTense=Pres, PhraseAspect=''
82- # futuro semplice -> PhraseTense=Fut, PhraseAspect=''
83-
84- aspect = ''
85- tense = node .feats ['Tense' ]
86-
87- if node .feats ['Mood' ] == 'Ind' :
88-
89- # Portuguese
90- # pretérito imperfeito -> PhraseTense=Past, PhraseAspect=Imp
91-
92- # Spanish
93- # pretérito imperfecto -> PhraseTense=Past, PhraseAspect=Imp
94-
95- # Italian
96- # imperfetto -> PhraseTense=Past, PhraseAspect=Imp
97- if node .feats ['Tense' ] == 'Imp' :
98- tense = Tense .PAST .value
99- aspect = Aspect .IMP .value
100-
101- # Portuguese
102- # pretérito perfeito -> PhraseTense=Past, PhraseAspect=Perf
103-
104- # Spanish
105- # pretérito perfecto -> PhraseTense=Past, PhraseAspect=Perf
106-
107- # Italian
108- # pass remoto -> PhraseTense=Past, PhraseAspect=Perf
109- if node .feats ['Tense' ] == 'Past' :
110- aspect = Aspect .PERF .value
111-
112- # Portuguese
113- # pretérito mais que perfeito simples -> PhraseTense=Past, PhraseAspect=Pqp
114- if node .feats ['Tense' ] == 'Pqp' :
115- tense = Tense .PAST .value
116- aspect = Aspect .PQP .value
117-
118- # Portuguese
119- # subjunctive presente -> PhraseTense=Pres, PhraseAspect=''
120- # subjunctive futuro -> PhraseTense=Fut, PhraseAspect=''
98+ # TODO phrase-level features are currently determined based on the first passive auxiliary, but it can happen that there are more than one passive auxiliary
99+ self .process_simple_verb_forms (auxes [0 ], expl , phrase_ords , node )
121100
122- # Spanish
123- # subjunctive presente -> PhraseTense=Pres, PhraseAspect=''
124- # subjunctive futuro -> PhraseTense=Fut, PhraseAspect='' TODO not annotated in treebanks?
101+ # head verb has passive auxiliary and also other auxiliaries
102+ else :
103+ self . process_periphrastic_verb_forms ( aux_pass [ 0 ], auxes_without_pass , refl , auxes , node )
125104
126- # Italian
127- # Congiuntivo presente -> PhraseTense=Pres, PhraseAspect=''
128- if node .feats [ 'Mood' ] == 'Sub' :
105+ def find_auxes ( self , node ):
106+ """
107+ Find all auxiliaries among node.children and classifies them.
129108
130- if node . feats [ 'Tense' ] == 'Past' :
131- aspect = Aspect . IMP . value
109+ Parameters :
110+ node (udapi.core.node.Node): head word, look for auxiliaries in its children
132111
133- # Portuguese
134- # subjunctive pretérito imperfeito -> PhraseTense=Past, PhraseAspect=Imp
112+ Returns:
113+ tuple: a classification of auxiliaries consisting of:
114+ - auxiliaries directly modifying the node,
115+ - modal verbs,
116+ - auxiliaries modifying a modal verb.
117+ """
135118
136- # Spanish
137- # Pretérito imperfecto -> PhraseTense=Past, PhraseAspect=Imp
119+ node_auxes = []
120+ modals = []
121+ modal_auxes = []
138122
139- # Italian
140- # Congiuntivo imperfetto -> PhraseTense=Past, PhraseAspect=Imp
141- if node .feats ['Tense' ] == 'Imp' :
142- tense = Tense .PAST .value
143- aspect = Aspect .IMP .value
123+ for child in node .children :
124+ if child .udeprel == 'aux' :
125+ if child .lemma in MODALS :
126+ modals .append (child )
127+ modal_auxes = node_auxes # auxiliaries found so far are assumed to modify the modal verb (they come before it)
128+ node_auxes = []
129+ else :
130+ node_auxes .append (child )
144131
145- # Portuguese
146- # Futuro do pretérito (cnd) -> PhraseTense=Pres, PhraseAspect='', PhraseMood=Cnd
132+ return node_auxes , modals , modal_auxes
133+
134+ def process_modal_verbs (self , modals , modal_auxes ):
135+ """
136+ Annotates modal verb forms with the Phrase* attributes.
137+ The modal verbs are kept as a single verb form, without including the infinitive of the content word.
147138
148- # Spanish
149- # pospretérito (cnd) -> PhraseTense=Pres, PhraseAspect='', PhraseMood=Cnd
139+ Parameters:
140+ modals (list): all modal verbs among the children of the head content verb (currently assumes there is only one.)
141+ modal_auxes (list): auxiliaries of the modal verb(s)
142+
143+ """
150144
151- # Italian
152- # Condizionale presente -> PhraseTense=Pres, PhraseAspect='', PhraseMood=Cnd
153- if node .feats ['Mood' ] == 'Cnd' :
154- aspect = ''
155- tense = Tense .PRES .value
145+ if not modal_auxes :
146+ self .process_simple_verb_forms (modals [0 ], '' , [modals [0 ].ord ], modals [0 ])
156147
157-
158- self .write_node_info (node ,
159- person = node .feats ['Person' ],
160- aspect = aspect ,
161- number = node .feats ['Number' ],
162- mood = node .feats ['Mood' ],
163- form = node .feats ['VerbForm' ],
164- tense = tense ,
165- gender = node .feats ['Gender' ],
166- voice = node .feats ['Voice' ],
167- expl = expl ,
168- ords = phrase_ords
169- )
148+ else :
149+ self .process_periphrastic_verb_forms (modals [0 ], modal_auxes , [], modal_auxes , modals [0 ])
170150
171151
172- else :
173- # no passive auxiliaries
174- if not aux_pass :
175- self .process_periphrastic_verb_forms (node , auxes , refl , auxes , node )
152+ def process_simple_verb_forms (self , node , expl , phrase_ords , head_node ):
153+ """
154+ Annotate simple verb forms or passive verb forms that contain only a passive auxiliary.
176155
177- # head verb has one passive auxiliary and no more other auxiliaries
178- # TODO complete the tenses and aspects for individual verb forms
179- elif not auxes_without_pass :
180- phrase_ords = [node .ord ] + [x .ord for x in auxes ] + [r .ord for r in refl ]
181- phrase_ords .sort ()
156+ Parameters
157+ node (udapi.core.node.Node): The relevant node. If there is no passive construction, this is the head verb. If the head verb is passive, this is the passive auxiliary.
158+ expl (str): The value of the PhraseExpl attribute.
159+ phrase_ords (list[int]): The ord values of all member words of the verb form.
160+ head_node (udapi.core.node.Node): The node that should receive the Phrase* attributes, i.e., the head of the phrase.
161+ """
182162
183- self .write_node_info (node ,
184- person = aux_pass [0 ].feats ['Person' ],
185- number = aux_pass [0 ].feats ['Number' ],
186- mood = aux_pass [0 ].feats ['Mood' ],
187- form = 'Fin' ,
188- tense = aux_pass [0 ].feats ['Tense' ],
189- gender = node .feats ['Gender' ],
190- voice = 'Pass' ,
191- expl = expl ,
192- ords = phrase_ords
193- )
163+ # Portuguese
164+ # presente -> PhraseTense=Pres, PhraseAspect=''
165+ # Futuro do presente -> PhraseTense=Fut, PhraseAspect=''
166+
167+ # Spanish
168+ # presente -> PhraseTense=Pres, PhraseAspect=''
169+ # futuro simple -> PhraseTense=Fut, PhraseAspect=''
170+
171+ # Italian
172+ # presente -> PhraseTense=Pres, PhraseAspect=''
173+ # futuro semplice -> PhraseTense=Fut, PhraseAspect=''
174+
175+ aspect = ''
176+ tense = node .feats ['Tense' ]
177+
178+ if node .feats ['Mood' ] == 'Ind' :
179+
180+ # Portuguese
181+ # pretérito imperfeito -> PhraseTense=Past, PhraseAspect=Imp
182+
183+ # Spanish
184+ # pretérito imperfecto -> PhraseTense=Past, PhraseAspect=Imp
185+
186+ # Italian
187+ # imperfetto -> PhraseTense=Past, PhraseAspect=Imp
188+ if node .feats ['Tense' ] == 'Imp' :
189+ tense = Tense .PAST .value
190+ aspect = Aspect .IMP .value
191+
192+ # Portuguese
193+ # pretérito perfeito -> PhraseTense=Past, PhraseAspect=Perf
194+
195+ # Spanish
196+ # pretérito perfecto -> PhraseTense=Past, PhraseAspect=Perf
197+
198+ # Italian
199+ # pass remoto -> PhraseTense=Past, PhraseAspect=Perf
200+ if node .feats ['Tense' ] == 'Past' :
201+ aspect = Aspect .PERF .value
202+
203+ # Portuguese
204+ # pretérito mais que perfeito simples -> PhraseTense=Past, PhraseAspect=Pqp
205+ if node .feats ['Tense' ] == 'Pqp' :
206+ tense = Tense .PAST .value
207+ aspect = Aspect .PQP .value
208+
209+ # Portuguese
210+ # subjunctive presente -> PhraseTense=Pres, PhraseAspect=''
211+ # subjunctive futuro -> PhraseTense=Fut, PhraseAspect=''
212+
213+ # Spanish
214+ # subjunctive presente -> PhraseTense=Pres, PhraseAspect=''
215+ # subjunctive futuro -> PhraseTense=Fut, PhraseAspect='' TODO not annotated in treebanks?
216+
217+ # Italian
218+ # Congiuntivo presente -> PhraseTense=Pres, PhraseAspect=''
219+ if node .feats ['Mood' ] == 'Sub' :
220+
221+ if node .feats ['Tense' ] == 'Past' :
222+ aspect = Aspect .IMP .value
223+
224+ # Portuguese
225+ # subjunctive pretérito imperfeito -> PhraseTense=Past, PhraseAspect=Imp
226+
227+ # Spanish
228+ # Pretérito imperfecto -> PhraseTense=Past, PhraseAspect=Imp
229+
230+ # Italian
231+ # Congiuntivo imperfetto -> PhraseTense=Past, PhraseAspect=Imp
232+ if node .feats ['Tense' ] == 'Imp' :
233+ tense = Tense .PAST .value
234+ aspect = Aspect .IMP .value
235+
236+ # Portuguese
237+ # Futuro do pretérito (cnd) -> PhraseTense=Pres, PhraseAspect='', PhraseMood=Cnd
238+
239+ # Spanish
240+ # pospretérito (cnd) -> PhraseTense=Pres, PhraseAspect='', PhraseMood=Cnd
241+
242+ # Italian
243+ # Condizionale presente -> PhraseTense=Pres, PhraseAspect='', PhraseMood=Cnd
244+ if node .feats ['Mood' ] == 'Cnd' :
245+ aspect = ''
246+ tense = Tense .PRES .value
247+
248+
249+ self .write_node_info (head_node ,
250+ person = node .feats ['Person' ],
251+ aspect = aspect ,
252+ number = node .feats ['Number' ],
253+ mood = node .feats ['Mood' ],
254+ form = node .feats ['VerbForm' ],
255+ tense = tense ,
256+ gender = head_node .feats ['Gender' ],
257+ voice = head_node .feats ['Voice' ],
258+ expl = expl ,
259+ ords = phrase_ords
260+ )
194261
195- # head verb has passive auxiliary and also other auxiliaries
196- else :
197- self .process_periphrastic_verb_forms (aux_pass [0 ], auxes_without_pass , refl , auxes , node )
198262
199263 def process_periphrastic_verb_forms (self , node , auxes , refl , all_auxes , head_node ):
200264 """
201- Parameters
202- - node: if there is no passive then the node is the head verb, if the head verb is in the passive, then the node is the passive auxiliary
203- - auxes: list of all auxiliaries except the passive auxes
204- - refl: list of reflexives which should be included into the periphrastic phrase
205- - all_auxes: list of all auxiliaries (passive auxes are included)
206- - head_node: the node which should have the Phrase* attributes, i. e. the head of the phrase
265+ Annotate periphrastic verb forms with the Phrase* attributes.
207266
208- annotates periphrastic verb forms with the Phrase* attributes
267+ Parameters
268+ node (udapi.core.node.Node): The relevant node. If there is no passive construction, this is the head verb. If the head verb is passive, this is the passive auxiliary.
269+ auxes (list[udapi.core.node.Node]): All auxiliaries except the passive auxiliaries.
270+ refl (list[udapi.core.node.Node]): Reflexives that should be included in the periphrastic phrase.
271+ all_auxes (list[udapi.core.node.Node]): All auxiliaries, including the passive auxiliaries.
272+ head_node (udapi.core.node.Node): The node that should receive the Phrase* attributes, i.e., the head of the phrase.
209273 """
210274
211275 if refl :
@@ -216,7 +280,7 @@ def process_periphrastic_verb_forms(self, node, auxes, refl, all_auxes, head_nod
216280 if len (auxes ) == 1 :
217281 # Cnd
218282 if auxes [0 ].feats ['Mood' ] == 'Cnd' and (node .feats ['VerbForm' ] == 'Part' or node .feats ['VerbForm' ] == 'Ger' ):
219- phrase_ords = [head_node .ord ] + [x .ord for x in all_auxes ] + [r .ord for r in refl ] + [ r . ord for r in refl ]
283+ phrase_ords = [head_node .ord ] + [x .ord for x in all_auxes ] + [r .ord for r in refl ]
220284 phrase_ords .sort ()
221285
222286 # Portuguese
0 commit comments