Skip to content

Commit 8ebbab3

Browse files
authored
Merge pull request #130 from lenka-krippnerova/master
Improve handling of negation detection in Slavic blocks
2 parents 7459d35 + ac391d5 commit 8ebbab3

File tree

9 files changed

+285
-143
lines changed

9 files changed

+285
-143
lines changed

udapi/block/msf/phrase.py

Lines changed: 66 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,31 @@ def process_node(self, node):
3030
'animacy':'PhraseAnimacy',
3131
'ords':'Phrase'
3232
}
33+
34+
# a dictionary where the key is the lemma of a negative particle and the value is a list of the lemmas of their possible children that have a 'fixed' relation
35+
# we do not want to include these negative particles in the phrase; these are expressions like "never", etc.
36+
negation_fixed = {
37+
# Belarusian
38+
'ні' : ['раз'],
39+
'ня' : ['толькі'],
40+
41+
# Upper Sorbian
42+
'nic' : ['naposledku'],
43+
44+
# Polish
45+
'nie' : ['mało'],
46+
47+
# Pomak
48+
'néma' : ['kak'],
49+
50+
# Slovenian
51+
'ne' : ['le'],
52+
53+
# Russian and Old East Slavic
54+
'не' : ['то', 'токмо'],
55+
'ни' : ['в', 'раз', 'шатко'],
56+
'нет' : ['нет']
57+
}
3358

3459
def write_node_info(self, node,
3560
tense = None,
@@ -51,12 +76,46 @@ def write_node_info(self, node,
5176
if val != None:
5277
node.misc[self.dictionary[key]] = val
5378

54-
def get_polarity(self, node, neg):
55-
if node.feats['Polarity'] != "":
56-
return node.feats['Polarity']
57-
if len(neg) == 0:
58-
return None
59-
return 'Neg'
79+
def has_fixed_children(self, node):
80+
"""
81+
Returns True if the node has any children with the 'fixed' relation and the node's lemma along with the child's lemma are listed in self.negation_fixed.
82+
"""
83+
fixed_children = [x for x in node.children if x.udeprel == 'fixed']
84+
85+
if fixed_children:
86+
if fixed_children[0].lemma in self.negation_fixed.get(node.lemma, []):
87+
return True
88+
return False
89+
90+
def get_polarity(self, nodes):
91+
"""
92+
Returns 'Neg' if there is exactly one node with Polarity='Neg' among the given nodes.
93+
Returns an empty string if there are zero or more than one such nodes.
94+
"""
95+
neg_count = 0
96+
for node in nodes:
97+
if node.feats['Polarity'] == 'Neg':
98+
neg_count += 1
99+
100+
if neg_count == 1:
101+
return 'Neg'
102+
103+
# neg_count can be zero or two, in either case we want to return an empty string so that the PhrasePolarity attribute is not generated
104+
else:
105+
return ''
106+
107+
def get_negative_particles(self, nodes):
108+
"""
109+
Returns a list of all negative particles found among the children
110+
of the specified nodes, except for negative particles with fixed children specified in self.negation_fixed.
111+
"""
112+
neg_particles = []
113+
for node in nodes:
114+
neg = [x for x in node.children if x.upos == 'PART' and x.feats['Polarity'] == 'Neg' and x.udeprel == 'advmod' and not self.has_fixed_children(x)]
115+
if neg:
116+
neg_particles += neg
117+
return neg_particles
118+
60119

61120
def get_is_reflex(self,node,refl):
62121
if node.feats['Voice'] == 'Mid':
@@ -75,4 +134,4 @@ def get_voice(self,node,refl):
75134
if self.is_expl_pass(refl):
76135
return 'Pass'
77136
return voice
78-
137+

udapi/block/msf/slavic/conditional.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,16 @@ def process_node(self, node):
1919
# the conditional mood can be formed using the auxiliary verb or some conjunctions (such as 'aby, kdyby...' in Czech)
2020
# so x.udeprel == 'aux' can't be required because it doesn't meet the conjunctions
2121

22-
if len(aux_cnd) > 0 and len(cop) == 0:
22+
if aux_cnd and not cop:
2323
aux = [x for x in node.children if x.udeprel == 'aux' or x.feats['Mood'] == 'Cnd'] # all auxiliary verbs and conjuctions with feats['Mood'] == 'Cnd'
2424
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']
25-
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
25+
26+
phrase_nodes = [node] + aux + refl
2627

27-
phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in refl] + [x.ord for x in neg]
28+
neg = self.get_negative_particles(phrase_nodes)
29+
phrase_nodes += neg
30+
31+
phrase_ords = [x.ord for x in phrase_nodes]
2832
phrase_ords.sort()
2933

3034
auxVerb = aux_cnd[0]
@@ -41,7 +45,7 @@ def process_node(self, node):
4145
form='Fin',
4246
aspect=node.feats['Aspect'],
4347
reflex=self.get_is_reflex(node,refl),
44-
polarity=self.get_polarity(node,neg),
48+
polarity=self.get_polarity(phrase_nodes),
4549
voice=self.get_voice(node, refl),
4650
ords=phrase_ords,
4751
gender=node.feats['Gender'],
@@ -53,15 +57,18 @@ def process_node(self, node):
5357
cop = [x for x in node.children if x.udeprel == 'cop' and (x.feats['VerbForm'] == 'Part' or x.feats['VerbForm'] == 'Fin')]
5458
aux_cnd = [x for x in node.children if x.feats['Mood'] == 'Cnd' or x.deprel=='aux:cnd']
5559

56-
if len(cop) > 0 and len(aux_cnd) > 0:
60+
if cop and aux_cnd:
5761
# there can be a copula with Mood='Cnd' (i. e. in Old East Slavonic), we don't want to count these copula in phrase_ords twice, so there is x.udeprel != 'cop' in aux list
5862
aux = [x for x in node.children if (x.udeprel == 'aux' or x.feats['Mood'] == 'Cnd') and x.udeprel != 'cop']
59-
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
6063
prep = [x for x in node.children if x.upos == 'ADP']
6164
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']
6265

66+
phrase_nodes = [node] + aux + prep + refl + cop
67+
neg = self.get_negative_particles(phrase_nodes)
68+
phrase_nodes += neg
69+
6370
copVerb = cop[0]
64-
phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in cop] + [x.ord for x in neg] + [x.ord for x in prep] + [x.ord for x in refl]
71+
phrase_ords = [x.ord for x in phrase_nodes]
6572
phrase_ords.sort()
6673
self.write_node_info(node,
6774
aspect=copVerb.feats['Aspect'],
@@ -70,9 +77,9 @@ def process_node(self, node):
7077
mood='Cnd',
7178
form='Fin',
7279
voice=self.get_voice(copVerb, refl),
73-
polarity=self.get_polarity(copVerb,neg),
80+
polarity=self.get_polarity(phrase_nodes),
7481
reflex=self.get_is_reflex(node, refl),
7582
ords=phrase_ords,
7683
gender=copVerb.feats['Gender'],
7784
animacy=copVerb.feats['Animacy']
78-
)
85+
)

udapi/block/msf/slavic/converb.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,12 @@ def process_node(self, node):
1212
# condition node.upos == 'VERB' to prevent copulas from entering this branch
1313
if node.feats['VerbForm'] == 'Conv' and node.upos == 'VERB':
1414
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']
15-
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
16-
17-
phrase_ords = [node.ord] + [x.ord for x in refl] + [x.ord for x in neg]
15+
16+
phrase_nodes = [node] + refl
17+
neg = self.get_negative_particles(phrase_nodes)
18+
phrase_nodes += neg
19+
20+
phrase_ords = [x.ord for x in phrase_nodes]
1821
phrase_ords.sort()
1922

2023
self.write_node_info(node,
@@ -23,7 +26,7 @@ def process_node(self, node):
2326
form='Conv',
2427
tense=node.feats['Tense'],
2528
aspect=node.feats['Aspect'],
26-
polarity=self.get_polarity(node,neg),
29+
polarity=self.get_polarity(phrase_nodes),
2730
reflex=self.get_is_reflex(node,refl),
2831
ords=phrase_ords,
2932
gender=node.feats['Gender'],
@@ -35,10 +38,13 @@ def process_node(self, node):
3538
elif node.upos == 'ADJ':
3639
aux = [x for x in node.children if x.udeprel == 'aux' and x.feats['VerbForm'] == 'Conv']
3740

38-
if len(aux) > 0:
39-
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
41+
if aux:
4042
auxVerb = aux[0]
41-
phrase_ords = [node.ord] + [x.ord for x in aux] + [x.ord for x in neg]
43+
44+
phrase_nodes = [node] + aux
45+
neg = self.get_negative_particles(phrase_nodes)
46+
phrase_nodes += neg
47+
phrase_ords = [x.ord for x in phrase_nodes]
4248
phrase_ords.sort()
4349

4450
self.write_node_info(node,
@@ -47,7 +53,7 @@ def process_node(self, node):
4753
form='Conv',
4854
tense=auxVerb.feats['Tense'],
4955
aspect=node.feats['Aspect'],
50-
polarity=self.get_polarity(auxVerb,neg),
56+
polarity=self.get_polarity(phrase_nodes),
5157
ords=phrase_ords,
5258
gender=auxVerb.feats['Gender'],
5359
animacy=auxVerb.feats['Animacy'],
@@ -58,13 +64,16 @@ def process_node(self, node):
5864
else:
5965
cop = [x for x in node.children if x.udeprel == 'cop' and x.feats['VerbForm'] == 'Conv']
6066

61-
if len(cop) > 0:
67+
if cop:
6268
prep = [x for x in node.children if x.upos == 'ADP']
63-
neg = [x for x in node.children if x.feats['Polarity'] == 'Neg' and x.upos == 'PART']
6469
refl = [x for x in node.children if x.feats['Reflex'] == 'Yes' and x.udeprel == 'expl']
6570

6671
copVerb = cop[0]
67-
phrase_ords = [node.ord] + [x.ord for x in cop] + [x.ord for x in prep] + [x.ord for x in neg] + [x.ord for x in refl]
72+
73+
phrase_nodes = [node] + cop + prep + refl
74+
neg = self.get_negative_particles(phrase_nodes)
75+
phrase_nodes += neg
76+
phrase_ords = [x.ord for x in phrase_nodes]
6877
phrase_ords.sort()
6978

7079

@@ -76,7 +85,7 @@ def process_node(self, node):
7685
gender=copVerb.feats['Gender'],
7786
animacy=copVerb.feats['Animacy'],
7887
form='Conv',
79-
polarity=self.get_polarity(node,neg),
88+
polarity=self.get_polarity(phrase_nodes),
8089
ords=phrase_ords,
8190
voice=self.get_voice(copVerb, refl)
8291
)

0 commit comments

Comments
 (0)