diff --git a/src/grammar/consts.rs b/src/grammar/consts.rs new file mode 100644 index 0000000..274377a --- /dev/null +++ b/src/grammar/consts.rs @@ -0,0 +1,10 @@ + +pub const EPSILON: char = 'ε'; +pub const STRING_END: char = '$'; + +// NOTE: this could be in conflict with the Terminal symbols, so +// it is MANDATORY that the Terminal doesn´t have dots in it! +pub const ITEM_SEP: char = '.'; + +pub type NonTerminal = usize; +pub type Terminal = char; diff --git a/src/grammar/first_follow.rs b/src/grammar/first_follow.rs new file mode 100644 index 0000000..44e636e --- /dev/null +++ b/src/grammar/first_follow.rs @@ -0,0 +1,156 @@ +use std::collections::BTreeSet; + +use crate::grammar::{ + consts::{NonTerminal, Terminal, EPSILON}, + Grammar, Letter, Production +}; + +mod first; +mod follow; + +pub type FirstTable = Vec>; +pub type FollowTable = Vec>; + +pub use first::get_first; + +pub struct FirstFollowTable { + first: FirstTable, + follow: FollowTable, + nullable: Vec, +} + +impl FirstFollowTable { + pub fn get_follow(&self, non_terminal: NonTerminal) -> BTreeSet { + self.follow[non_terminal].clone() + } + + pub fn get_first(&self, letter: &Letter) -> BTreeSet { + first::get_first_letter(&self.first, letter) + } + + /// checks if the rest of the iterator is all nullable. + /// assumes the nullable set has been initialized. + fn is_nullable<'a, T: Iterator>(&self, iter: &mut T) -> bool { + iter.all(|letter| -> bool { + match letter { + Letter::NonTerminal(idx) => self.nullable[*idx], + Letter::Terminal(ch) => *ch == EPSILON, + } + }) + } +} + +impl From<&Grammar> for FirstFollowTable { + fn from(grammar: &Grammar) -> Self { + let num_non_terminal = grammar.get_non_terminal().len(); + + let nullable = compute_nullable(grammar, num_non_terminal); + let first = first::compute_first(grammar, num_non_terminal, &nullable); + let follow = follow::compute_follow(grammar, num_non_terminal, &first); + + FirstFollowTable { + first, + follow, + nullable, + } + } +} + +fn compute_nullable(grammar: &Grammar, num_non_terminal: usize) -> Vec { + let nullable = grammar.get_nullable(); + let mut out = vec![false; num_non_terminal]; + + nullable.iter().for_each(|non_terminal| -> () { + out[*non_terminal] = true; + }); + out +} + +#[cfg(test)] +mod test { + use crate::grammar::consts::STRING_END; + + use super::*; + + fn get_test_grammar() -> Grammar { + // S -> Ab | c + // A -> aA | ε + // S = 0 + // A = 1 + Grammar::new( + 0, + vec![ + Production { + start_symbol: 0, + expand_rule: vec![Letter::NonTerminal(1), Letter::Terminal('b')], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('c')], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('a'), Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal(EPSILON)], + }, + ], + ) + } + + #[test] + fn first_test() { + let grammar = get_test_grammar(); + + let first_follow = FirstFollowTable::from(&grammar); + let first = first_follow.get_first(&Letter::NonTerminal(0)); + + assert_eq!(first.len(), 3); + assert!(first.contains(&'a')); + assert!(first.contains(&'b')); + assert!(first.contains(&'c')); + + let first = first_follow.get_first(&Letter::NonTerminal(1)); + assert_eq!(first.len(), 2); + assert!(first.contains(&'a')); + assert!(first.contains(&EPSILON)); + } + + #[test] + fn first_cycle_test() { + let grammar = Grammar::new( + 0, + vec![ + Production { + start_symbol: 0, + expand_rule: vec![Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::NonTerminal(0)], + }, + ], + ); + + let first_follow = FirstFollowTable::from(&grammar); + let first = first_follow.get_first(&Letter::NonTerminal(0)); + + assert_eq!(first.len(), 0); + } + + #[test] + fn follow_test() { + let grammar = get_test_grammar(); + let first_follow = FirstFollowTable::from(&grammar); + + let follow = first_follow.get_follow(0); + assert_eq!(follow.len(), 1); + assert!(follow.contains(&STRING_END)); + + let follow = first_follow.get_follow(1); + assert_eq!(follow.len(), 1); + assert!(follow.contains(&'b')); + } +} diff --git a/src/grammar/first_follow/first.rs b/src/grammar/first_follow/first.rs new file mode 100644 index 0000000..797a025 --- /dev/null +++ b/src/grammar/first_follow/first.rs @@ -0,0 +1,60 @@ +use std::collections::BTreeSet; + +use crate::grammar::{ + consts::{Terminal, EPSILON}, + Grammar, Letter, +}; + +use super::FirstTable; + +pub fn compute_first( + grammar: &Grammar, + num_non_terminal: usize, + nullable: &Vec, +) -> FirstTable { + let mut first_table = vec![BTreeSet::new(); num_non_terminal]; + let productions = grammar.get_productions(); + let mut has_changed = true; + + while has_changed { + has_changed = false; + + productions.iter().for_each(|production| -> () { + has_changed |= production.update_first_table(&mut first_table, &nullable); + }); + } + + (0..num_non_terminal).for_each(|i| -> () { + if nullable[i] { + first_table[i].insert(EPSILON); + } + }); + first_table +} + +pub fn get_first_letter(first: &FirstTable, letter: &Letter) -> BTreeSet { + match letter { + Letter::NonTerminal(idx) => first[*idx].clone(), + Letter::Terminal(ch) => BTreeSet::from([*ch]), + } +} + +//TODO: add tests +pub fn get_first<'a, T>(first: &FirstTable, iter: &mut T) -> BTreeSet +where + T: Iterator, +{ + let mut out = BTreeSet::new(); + let mut nullable = true; + for letter in iter { + let mut set = get_first_letter(first, letter); + if !set.remove(&EPSILON) { + nullable = false; + } + out.append(&mut set); + } + if nullable { + out.insert(EPSILON); + } + out +} diff --git a/src/grammar/first_follow/follow.rs b/src/grammar/first_follow/follow.rs new file mode 100644 index 0000000..1304cdd --- /dev/null +++ b/src/grammar/first_follow/follow.rs @@ -0,0 +1,26 @@ +use std::collections::BTreeSet; + +use crate::grammar::{consts::STRING_END, Grammar}; + +use super::{FirstTable, FollowTable}; + +pub fn compute_follow( + grammar: &Grammar, + num_non_terminal: usize, + first: &FirstTable, +) -> FollowTable { + let mut follow_table = vec![BTreeSet::new(); num_non_terminal]; + follow_table[grammar.get_start_symbol()].insert(STRING_END); + + let productions = grammar.get_productions(); + let mut has_changed = true; + + while has_changed { + has_changed = false; + + productions.iter().for_each(|production| { + has_changed |= production.update_follow_table(first, &mut follow_table); + }); + } + follow_table +} diff --git a/src/grammar/grammar.rs b/src/grammar/grammar.rs index cca1948..728c2e0 100644 --- a/src/grammar/grammar.rs +++ b/src/grammar/grammar.rs @@ -1,21 +1,11 @@ -use std::collections::{BTreeSet, BTreeMap}; +use std::collections::{BTreeMap, BTreeSet}; use crate::automata::DFA; +use crate::grammar::consts::{NonTerminal, Terminal, EPSILON, STRING_END}; +use crate::grammar::{Letter, Production}; -pub type NonTerminal = usize; -pub type Terminal = char; - -#[derive(Debug, PartialEq, Clone, PartialOrd, Eq, Ord)] -pub enum Letter { - NonTerminal(NonTerminal), - Terminal(Terminal), -} - -#[derive(Debug, PartialEq)] -pub struct Production { - lhs: NonTerminal, - rhs: Vec, -} +mod helper; +mod semplification; #[derive(Debug, PartialEq)] pub struct Grammar { @@ -28,393 +18,43 @@ pub struct Grammar { nullable: Option>, } -const EPSILON: char = 'ε'; -const STRING_END: char = '$'; - -// NOTE: this could be in conflict with the Terminal symbols, so -// it is MANDATORY that the Terminal doesn´t have dots in it! -const ITEM_SEP: char = '.'; - impl Grammar { - pub fn first(&mut self, letter: &Letter) -> BTreeSet { - if let None = self.nullable { - self.nullable = Some(self.get_nullable()); - } - - match letter { - Letter::NonTerminal(non_terminal) => { - let mut first = self._first(non_terminal); - - if self.nullable.as_ref().unwrap().contains(&non_terminal) { - first.insert(EPSILON); - } - - first - } - Letter::Terminal(terminal) => { - let mut first = BTreeSet::new(); - first.insert(*terminal); - first - } - } - } - - fn _first(&self, non_terminal: &NonTerminal) -> BTreeSet { - let nullable = self.nullable.as_ref().unwrap(); - let mut first = BTreeSet::new(); - - for production in self.productions.iter() { - if production.lhs != *non_terminal { - continue; - } - - for letter in production.rhs.iter() { - // we can continue to add more only if previous symbols are nullable - match letter { - Letter::NonTerminal(idx) => { - first.append(&mut self._first(idx)); - if !nullable.contains(idx) { - break; - } - }, - Letter::Terminal(ch) => { - // NOTE: i don't want to insert epsilons, because - // then i should remove them in upper level! - if *ch != EPSILON { - first.insert(*ch); - break; - } - } - } - } - } - - first - } - - pub fn follow(&mut self, non_terminal: &NonTerminal) -> BTreeSet { - if let None = self.nullable { - self.nullable = Some(self.get_nullable()); - } - - let num_non_terminal = self.get_non_terminal().len(); - let mut used = vec![0; num_non_terminal]; - - self._follow(non_terminal, &mut used) - } - - fn _follow(&self, non_terminal: &NonTerminal, used: &mut Vec) -> BTreeSet { - if used[*non_terminal] == 1 { - return BTreeSet::new(); - } - used[*non_terminal] = 1; - - let nullable = self.nullable.as_ref().unwrap(); - let mut follow = BTreeSet::new(); - - if *non_terminal == self.start_symbol { - follow.insert(STRING_END); - } - - for production in self.productions.iter() { - for (i, letter) in production.rhs.iter().enumerate() { - match letter { - Letter::NonTerminal(idx) => { - if *idx != *non_terminal { - continue; - } - - // if we are at the end of the production, then we need to add the follow of the lhs - if i == production.rhs.len() - 1 { - follow.append(&mut self._follow(&production.lhs, used)); - } else { - // otherwise we need to add the first of the next symbol - let next_letter = &production.rhs[i + 1]; - match next_letter { - Letter::NonTerminal(idx) => { - follow.append(&mut self._first(idx)); - }, - Letter::Terminal(ch) => { - follow.insert(*ch); - } - } - - // if the whole next symbol is nullable, then we need to add the follow of the lhs - let mut is_nullable = true; - for letter in production.rhs[i + 1..].iter() { - match letter { - Letter::NonTerminal(idx) => { - if !nullable.contains(idx) { - is_nullable = false; - break; - } - }, - Letter::Terminal(ch) => { - if *ch != EPSILON { - is_nullable = false; - break; - } - } - } - } - - if is_nullable { - follow.append(&mut self._follow(&production.lhs, used)); - } - } - }, - Letter::Terminal(_) => {} - } - } - } - - follow - } - - pub fn get_non_terminal(&self) -> BTreeSet { - let mut non_terminals = BTreeSet::new(); - for production in self.productions.iter() { - non_terminals.insert(production.lhs); - } - - non_terminals - } - - pub fn get_nullable(&self) -> BTreeSet { - let mut nullable = BTreeSet::new(); - let mut has_changed = true; - while has_changed { - has_changed = false; - for production in self.productions.iter() { - let mut is_nullable = true; - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(idx) => { - if !nullable.contains(idx) { - is_nullable = false; - break; - } - } - Letter::Terminal(ch) => { - if *ch != EPSILON { - is_nullable = false; - break; - } - } - } - } - if is_nullable && !nullable.contains(&production.lhs) { - nullable.insert(production.lhs); - has_changed = true; - } - } - } - - nullable - } - - /// O(m^2) implementation of reachable function, could be optimized - /// but i need to store adjacency list of the graph in grammar, and the - /// use bfs. - pub fn get_reachable(&self) -> BTreeSet { - let mut reachable = BTreeSet::new(); - let mut has_changed = true; - reachable.insert(self.start_symbol); - while has_changed { - has_changed = false; - for production in self.productions.iter() { - if !reachable.contains(&production.lhs) { - continue; - } - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(idx) => { - if !reachable.contains(idx) { - reachable.insert(*idx); - has_changed = true; - } - } - Letter::Terminal(_) => {} - } - } - } - } - - reachable - } - - /// returns set of generator non terminals - /// a non terminal is a generator when it produces some finite - /// string of terminals - pub fn get_generators(&self) -> BTreeSet { - let mut generators = BTreeSet::new(); - for production in self.productions.iter() { - let mut is_generator = true; - for letter in production.rhs.iter() { - match letter { - Letter::NonTerminal(non_terminal) => { - if !generators.contains(non_terminal) { - is_generator = false; - break; - } - } - Letter::Terminal(_) => {} - } - } - if is_generator { - generators.insert(production.lhs); - } + pub fn new(start_symbol: NonTerminal, productions: Vec) -> Self { + Grammar { + start_symbol, + productions, + nullable: None, } - - generators - } - - pub fn remove_useless(&mut self) -> () { - // first remove non generators - let generators = self.get_generators(); - - self.productions.retain(|production| { - generators.contains(&production.lhs) && production.rhs.iter().all(|letter| { - match letter { - Letter::NonTerminal(idx) => generators.contains(idx), - Letter::Terminal(_) => true - } - }) - }); - - // then remove non reachable - let reachable = self.get_reachable(); - - self.productions.retain(|production| { - reachable.contains(&production.lhs) && production.rhs.iter().all(|letter| { - match letter { - Letter::NonTerminal(idx) => reachable.contains(idx), - Letter::Terminal(_) => true - } - }) - }); - - // invalidate nullable - self.nullable = None; } - pub fn get_unitary_couples(&self) -> BTreeSet<(NonTerminal, NonTerminal)> { - let non_terminals = self.get_non_terminal(); - let mut unitary_couples = BTreeSet::new(); - let mut has_changed = true; - - for non_terminal in non_terminals { - unitary_couples.insert((non_terminal, non_terminal)); - } - - while has_changed { - has_changed = false; - for production in self.productions.iter() { - if production.rhs.len() != 1 { - continue; - } - let mut to_insert = BTreeSet::new(); - for unitary_couple in unitary_couples.iter() { - if let Letter::NonTerminal(non_term) = production.rhs[0] { - if unitary_couple.1 == production.lhs && - !unitary_couples.contains(&(unitary_couple.0, non_term)) && - !to_insert.contains(&(unitary_couple.0, non_term)) { - to_insert.insert((unitary_couple.0, non_term)); - } - } - } - - if to_insert.len() > 0 { - unitary_couples.append(&mut to_insert); - has_changed = true; - } - } - } - - unitary_couples + pub fn get_start_symbol(&self) -> NonTerminal { + self.start_symbol } - // TODO: this is a very complex function in this moment, it needs refactor - // it also has some points were it can be optimized - pub fn remove_unitary_cycles(&mut self) { - let unitary_couples = self.get_unitary_couples(); - - // remove all unitary couples - self.productions.retain(|production| { - if production.rhs.len() != 1 { - return true; - } - - match production.rhs[0] { - Letter::NonTerminal(non_term) => !unitary_couples.contains(&(production.lhs, non_term)), - Letter::Terminal(_) => true - } - }); - - // add corresponding productions - let mut adj_list = self.transitions_to_adj_list(); - for unitary_couple in unitary_couples.iter() { - if unitary_couple.0 == unitary_couple.1 { - continue; - } - - let mut to_insert = adj_list.get(&unitary_couple.1).unwrap().clone(); - - adj_list.entry(unitary_couple.0) - .or_insert(BTreeSet::new()) - .append(&mut to_insert); - } - - // trasform adj list back to transitions - let mut new_transitions = vec![]; - for (non_terminal, transitions) in adj_list.iter() { - for transition in transitions.iter() { - new_transitions.push(Production { - lhs: *non_terminal, - rhs: transition.clone() - }); - } - } - self.productions = new_transitions; - - // invalidate nullable - self.nullable = None; + pub fn get_productions(&self) -> &Vec { + &self.productions } - fn transitions_to_adj_list(&self) -> BTreeMap>> { + pub fn productions_to_adj_list(&self) -> BTreeMap>> { let mut adj_list: BTreeMap>> = BTreeMap::new(); for production in self.productions.iter() { - adj_list.entry(production.lhs) + adj_list + .entry(production.start_symbol) .or_insert(BTreeSet::new()) - .insert(production.rhs.clone()); + .insert(production.expand_rule.clone()); } adj_list } - pub fn get_itemization(&self) -> Vec { - let mut itemized_transitions = vec![]; - for production in self.productions.iter() { - if production.rhs.len() == 1 && production.rhs[0] == Letter::Terminal(EPSILON) { - itemized_transitions.push(Production { - lhs: production.lhs, - rhs: vec![Letter::Terminal(ITEM_SEP)] - }); - continue; - } - - for i in 0..=production.rhs.len() { - let mut rhs = production.rhs.clone(); - - rhs.insert(i, Letter::Terminal(ITEM_SEP)); - itemized_transitions.push(Production { - lhs: production.lhs, - rhs: rhs - }); - } - } + pub fn add_fake_initial_state(&mut self) -> () { + let new_state = self.get_non_terminal().iter().max().unwrap() + 1; + self.productions.push(Production { + start_symbol: new_state, + expand_rule: vec![Letter::NonTerminal(self.start_symbol)], + }); - itemized_transitions + self.start_symbol = new_state; } } @@ -429,17 +69,23 @@ impl From<&DFA> for Grammar { for (transition_ch, dest) in transitions.iter() { let lhs = idx; let rhs = vec![Letter::Terminal(*transition_ch), Letter::NonTerminal(*dest)]; - productions.push(Production { lhs: lhs, rhs }); + productions.push(Production { + start_symbol: lhs, + expand_rule: rhs, + }); } } for end_state in dfa.get_end_states() { let lhs = *end_state; let rhs = vec![Letter::Terminal(EPSILON)]; - productions.push(Production { lhs: lhs, rhs }); + productions.push(Production { + start_symbol: lhs, + expand_rule: rhs, + }); } - Self { + Self { start_symbol: dfa.get_start_state(), productions, @@ -451,82 +97,31 @@ impl From<&DFA> for Grammar { #[cfg(test)] mod test { use super::*; + use crate::automata::DFA; use crate::map; - fn get_test_grammar() -> Grammar { - // S -> Ab | c - // A -> aA | ε - // S = 0 - // A = 1 - Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b')] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('c')] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, - ], - nullable: None, - } - } - - #[test] - fn test_first() { - let mut grammar = get_test_grammar(); - - let first = grammar.first(&Letter::NonTerminal(0)); - - assert_eq!(first.len(), 3); - assert!(first.contains(&'a')); - assert!(first.contains(&'b')); - assert!(first.contains(&'c')); - - } - - #[test] - fn test_follow() { - let mut grammar = get_test_grammar(); - - let follow = grammar.follow(&0); - assert_eq!(follow.len(), 1); - assert!(follow.contains(&STRING_END)); - - let follow = grammar.follow(&1); - assert_eq!(follow.len(), 1); - assert!(follow.contains(&'b')); - } - - #[test] - fn test_nullable() { - let grammar = get_test_grammar(); - - let nullable = grammar.get_nullable(); - assert_eq!(nullable.len(), 1); - assert!(nullable.contains(&1)); - } - #[test] fn test_dfa_conversion() { // this dfa should recognize ba* let dfa: DFA = DFA::from_state( 3, - 0, - vec![1], + 0, + vec![1], vec![ - map! { + map! { 'a' => 2, 'b' => 1 }, - map! { + map! { 'a' => 1, 'b' => 2 }, - map! { + map! { 'a' => 2, 'b' => 2 }, - ], - None + ], + None, ); let grammar = Grammar::from(&dfa); @@ -535,118 +130,38 @@ mod test { let result = Grammar { start_symbol: 0, productions: vec![ - Production { lhs: 0, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(2)] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('b'), Letter::NonTerminal(2)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, - ], - nullable: None, - }; - - assert_eq!(grammar, result); - } - - #[test] - fn test_remove_useless() { - let mut grammar = { - // S -> AB | a - // B -> b - - // S = 0 - // B = 1 - // A = 2 - - Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::NonTerminal(2)] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('b')] }, - ], - nullable: None, - } - }; - - grammar.remove_useless(); - - let result = Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, - ], - nullable: None, - }; - - assert_eq!(grammar, result); - } - - #[test] - fn test_remove_unitary_cycles() { - // E -> E + T | T - // T -> T * F | F - // F -> (E) | a - - let mut grammar = Grammar { - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(0), Letter::Terminal('+'), Letter::NonTerminal(1)] }, - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, - Production { lhs: 1, rhs: vec![Letter::NonTerminal(2)] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('a')] }, - ], - nullable: None, - }; - - let result = Grammar { - // E -> E + T | T * F | (E) | a - // T -> T * F | (E) | a - // F -> (E) | a - start_symbol: 0, - productions: vec![ - Production { lhs: 0, rhs: vec![Letter::NonTerminal(0), Letter::Terminal('+'), Letter::NonTerminal(1)] }, - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('a')] }, - Production { lhs: 1, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('*'), Letter::NonTerminal(2)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a')] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, - Production { lhs: 2, rhs: vec![Letter::Terminal('a')] }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('a'), Letter::NonTerminal(2)], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('b'), Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('a'), Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('b'), Letter::NonTerminal(2)], + }, + Production { + start_symbol: 2, + expand_rule: vec![Letter::Terminal('a'), Letter::NonTerminal(2)], + }, + Production { + start_symbol: 2, + expand_rule: vec![Letter::Terminal('b'), Letter::NonTerminal(2)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal(EPSILON)], + }, ], nullable: None, }; - grammar.remove_unitary_cycles(); - assert_eq!(grammar, result); } - - #[test] - fn test_itemization() { - let grammar = get_test_grammar(); - - let items = grammar.get_itemization(); - - let result_productions = vec![ - Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1), Letter::Terminal('b')] }, - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal(ITEM_SEP), Letter::Terminal('b')] }, - Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b'), Letter::Terminal(ITEM_SEP)] }, - - Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('c')] }, - Production { lhs: 0, rhs: vec![Letter::Terminal('c'), Letter::Terminal(ITEM_SEP)] }, - - Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('a'), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1), Letter::Terminal(ITEM_SEP)] }, - Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP)] }, - ]; - - assert!(items.iter().all(|item| result_productions.contains(item))); - assert!(result_productions.iter().all(|item| items.contains(item))); - } -} \ No newline at end of file +} diff --git a/src/grammar/grammar/helper.rs b/src/grammar/grammar/helper.rs new file mode 100644 index 0000000..56381b3 --- /dev/null +++ b/src/grammar/grammar/helper.rs @@ -0,0 +1,184 @@ +/// This file contains some general helper functions used +/// To implement grammar semplification and first and follows +use std::collections::BTreeSet; + +use super::{Grammar, Letter, NonTerminal, Production, EPSILON}; + +impl Grammar { + pub fn get_non_terminal(&self) -> BTreeSet { + let mut non_terminals = BTreeSet::new(); + for production in self.productions.iter() { + non_terminals.insert(production.start_symbol); + } + + non_terminals + } + + pub fn get_nullable(&self) -> BTreeSet { + let mut nullable = BTreeSet::new(); + let mut has_changed = true; + while has_changed { + has_changed = false; + for production in self.productions.iter() { + if production.check_is_nullable(&nullable) + && !nullable.contains(&production.start_symbol) + { + nullable.insert(production.start_symbol); + has_changed = true; + } + } + } + + nullable + } + + /// O(m^2) implementation of reachable function, could be optimized + /// but i need to store adjacency list of the graph in grammar, and the + /// use bfs. + pub fn get_reachable(&self) -> BTreeSet { + let mut reachable = BTreeSet::new(); + let mut has_changed = true; + reachable.insert(self.start_symbol); + while has_changed { + has_changed = false; + self.productions.iter().for_each(|production| -> () { + if !reachable.contains(&production.start_symbol) { + return; + } + for letter in production.expand_rule.iter() { + match letter { + Letter::NonTerminal(idx) => { + if !reachable.contains(idx) { + reachable.insert(*idx); + has_changed = true; + } + } + Letter::Terminal(_) => {} + } + } + }); + } + + reachable + } + + /// returns set of generator non terminals + /// a non terminal is a generator when it produces some finite + /// string of terminals + /// This is still O(m^2) implementation, could be optimized + pub fn get_generators(&self) -> BTreeSet { + let mut generators = BTreeSet::new(); + let mut has_changed = true; + + while has_changed { + has_changed = false; + + self.productions.iter().for_each(|production| -> () { + let mut is_generator = true; + production.expand_rule.iter().for_each(|letter| -> () { + match letter { + Letter::NonTerminal(non_terminal) => { + if !generators.contains(non_terminal) { + is_generator = false; + return; + } + } + Letter::Terminal(_) => {} + } + }); + + if is_generator { + generators.insert(production.start_symbol); + has_changed = true; + } + }); + } + + generators + } + + /// returns set of unitary couples of non terminals + /// a unitary couple is a couple of non terminals (A, B) such that + /// A -> B is a production in the grammar or A -> C, C -> B is a production + /// (aka it's transitive and reflexive) + pub fn get_unitary_couples(&self) -> BTreeSet<(NonTerminal, NonTerminal)> { + let non_terminals = self.get_non_terminal(); + let mut unitary_couples = BTreeSet::new(); + let mut has_changed = true; + + for non_terminal in non_terminals { + unitary_couples.insert((non_terminal, non_terminal)); + } + + while has_changed { + has_changed = false; + for production in self.productions.iter() { + if production.expand_rule.len() != 1 { + continue; + } + let mut to_insert = BTreeSet::new(); + for unitary_couple in unitary_couples.iter() { + if let Letter::NonTerminal(non_term) = production.expand_rule[0] { + if unitary_couple.1 == production.start_symbol + && !unitary_couples.contains(&(unitary_couple.0, non_term)) + && !to_insert.contains(&(unitary_couple.0, non_term)) + { + to_insert.insert((unitary_couple.0, non_term)); + } + } + } + + if to_insert.len() > 0 { + unitary_couples.append(&mut to_insert); + has_changed = true; + } + } + } + + unitary_couples + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::grammar::Production; + + fn get_test_grammar() -> Grammar { + // S -> Ab | c + // A -> aA | ε + // S = 0 + // A = 1 + Grammar { + start_symbol: 0, + productions: vec![ + Production { + start_symbol: 0, + expand_rule: vec![Letter::NonTerminal(1), Letter::Terminal('b')], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('c')], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('a'), Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal(EPSILON)], + }, + ], + nullable: None, + } + } + + #[test] + fn test_nullable() { + let grammar = get_test_grammar(); + + let nullable = grammar.get_nullable(); + assert_eq!(nullable.len(), 1); + assert!(nullable.contains(&1)); + } +} diff --git a/src/grammar/grammar/semplification.rs b/src/grammar/grammar/semplification.rs new file mode 100644 index 0000000..6b742d1 --- /dev/null +++ b/src/grammar/grammar/semplification.rs @@ -0,0 +1,256 @@ +use std::collections::BTreeSet; + +use super::{Grammar, Letter, Production}; + +impl Grammar { + pub fn remove_useless(&mut self) -> () { + // first remove non generators + let generators = self.get_generators(); + + self.productions.retain(|production| { + generators.contains(&production.start_symbol) + && production.expand_rule.iter().all(|letter| match letter { + Letter::NonTerminal(idx) => generators.contains(idx), + Letter::Terminal(_) => true, + }) + }); + + // then remove non reachable + let reachable = self.get_reachable(); + + self.productions.retain(|production| { + reachable.contains(&production.start_symbol) + && production.expand_rule.iter().all(|letter| match letter { + Letter::NonTerminal(idx) => reachable.contains(idx), + Letter::Terminal(_) => true, + }) + }); + + // invalidate nullable + self.nullable = None; + } + + // TODO: this is a very complex function in this moment, it needs refactor + // it also has some points were it can be optimized + pub fn remove_unitary_cycles(&mut self) { + let unitary_couples = self.get_unitary_couples(); + + // remove all unitary couples + self.productions.retain(|production| { + if production.expand_rule.len() != 1 { + return true; + } + + match production.expand_rule[0] { + Letter::NonTerminal(non_term) => { + !unitary_couples.contains(&(production.start_symbol, non_term)) + } + Letter::Terminal(_) => true, + } + }); + + // add corresponding productions + let mut adj_list = self.productions_to_adj_list(); + for unitary_couple in unitary_couples.iter() { + if unitary_couple.0 == unitary_couple.1 { + continue; + } + + let mut to_insert = adj_list.get(&unitary_couple.1).unwrap().clone(); + + adj_list + .entry(unitary_couple.0) + .or_insert(BTreeSet::new()) + .append(&mut to_insert); + } + + // trasform adj list back to transitions + let mut new_transitions = vec![]; + for (non_terminal, transitions) in adj_list.iter() { + for transition in transitions.iter() { + new_transitions.push(Production { + start_symbol: *non_terminal, + expand_rule: transition.clone(), + }); + } + } + self.productions = new_transitions; + + // invalidate nullable + self.nullable = None; + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_remove_useless() { + let mut grammar = { + // S -> AB | a + // B -> b + + // S = 0 + // B = 1 + // A = 2 + + Grammar { + start_symbol: 0, + productions: vec![ + Production { + start_symbol: 0, + expand_rule: vec![Letter::NonTerminal(1), Letter::NonTerminal(2)], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('a')], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('b')], + }, + ], + nullable: None, + } + }; + + grammar.remove_useless(); + + let result = Grammar { + start_symbol: 0, + productions: vec![Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('a')], + }], + nullable: None, + }; + + assert_eq!(grammar, result); + } + + #[test] + fn test_remove_unitary_cycles() { + // E -> E + T | T + // T -> T * F | F + // F -> (E) | a + + let mut grammar = Grammar { + start_symbol: 0, + productions: vec![ + Production { + start_symbol: 0, + expand_rule: vec![ + Letter::NonTerminal(0), + Letter::Terminal('+'), + Letter::NonTerminal(1), + ], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::NonTerminal(1)], + }, + Production { + start_symbol: 1, + expand_rule: vec![ + Letter::NonTerminal(1), + Letter::Terminal('*'), + Letter::NonTerminal(2), + ], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::NonTerminal(2)], + }, + Production { + start_symbol: 2, + expand_rule: vec![ + Letter::Terminal('('), + Letter::NonTerminal(0), + Letter::Terminal(')'), + ], + }, + Production { + start_symbol: 2, + expand_rule: vec![Letter::Terminal('a')], + }, + ], + nullable: None, + }; + + let result = Grammar { + // E -> E + T | T * F | (E) | a + // T -> T * F | (E) | a + // F -> (E) | a + start_symbol: 0, + productions: vec![ + Production { + start_symbol: 0, + expand_rule: vec![ + Letter::NonTerminal(0), + Letter::Terminal('+'), + Letter::NonTerminal(1), + ], + }, + Production { + start_symbol: 0, + expand_rule: vec![ + Letter::NonTerminal(1), + Letter::Terminal('*'), + Letter::NonTerminal(2), + ], + }, + Production { + start_symbol: 0, + expand_rule: vec![ + Letter::Terminal('('), + Letter::NonTerminal(0), + Letter::Terminal(')'), + ], + }, + Production { + start_symbol: 0, + expand_rule: vec![Letter::Terminal('a')], + }, + Production { + start_symbol: 1, + expand_rule: vec![ + Letter::NonTerminal(1), + Letter::Terminal('*'), + Letter::NonTerminal(2), + ], + }, + Production { + start_symbol: 1, + expand_rule: vec![ + Letter::Terminal('('), + Letter::NonTerminal(0), + Letter::Terminal(')'), + ], + }, + Production { + start_symbol: 1, + expand_rule: vec![Letter::Terminal('a')], + }, + Production { + start_symbol: 2, + expand_rule: vec![ + Letter::Terminal('('), + Letter::NonTerminal(0), + Letter::Terminal(')'), + ], + }, + Production { + start_symbol: 2, + expand_rule: vec![Letter::Terminal('a')], + }, + ], + nullable: None, + }; + + grammar.remove_unitary_cycles(); + + assert_eq!(grammar, result); + } +} + diff --git a/src/grammar/item.rs b/src/grammar/item.rs new file mode 100644 index 0000000..4a68806 --- /dev/null +++ b/src/grammar/item.rs @@ -0,0 +1,405 @@ +use std::collections::{BTreeSet, BTreeMap, VecDeque}; + +use crate::grammar::{Grammar, Letter, Production}; +use crate::grammar::consts::{EPSILON, ITEM_SEP, Terminal, NonTerminal}; + +use super::consts::STRING_END; + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Item { + production: Production, + look_ahead: Option, +} + +impl Item { + pub fn get_itemization(productions: &Vec) -> Vec { + let mut itemized_transitions = vec![]; + for production in productions.iter() { + itemized_transitions.append(&mut Item::itemize(production)); + } + + itemized_transitions + } + + /// itemizes a single production + pub fn itemize(production: &Production) -> Vec { + let mut itemized_prod = vec![]; + + if production.rhs.len() == 1 && production.rhs[0] == Letter::Terminal(EPSILON) { + itemized_prod.push(Production { + lhs: production.lhs, + rhs: vec![Letter::Terminal(ITEM_SEP)] + }); + return itemized_prod; + } + + for i in 0..=production.rhs.len() { + let mut rhs = production.rhs.clone(); + + rhs.insert(i, Letter::Terminal(ITEM_SEP)); + itemized_prod.push(Production { + lhs: production.lhs, + rhs: rhs + }); + } + + itemized_prod + } + + /// adds a ITEM_SEP to the beginning of each production + fn add_initial_sep(productions: BTreeMap>>) + -> BTreeMap>> { + let mut result_prod = BTreeMap::new(); + for (non_terminal, set) in productions.into_iter() { + let mut letters = set.into_iter().collect::>>(); + letters.iter_mut().for_each(|letter| { + letter.insert(0, Letter::Terminal(ITEM_SEP)); + }); + + result_prod.insert(non_terminal, letters.into_iter().collect::>>()); + } + + result_prod + } + + /// return the closure of the set **productions** in the input + /// with the given look_ahead + /// production is the adjiacency list of all the productions + pub fn closure( + items: &BTreeSet, + grammar: &mut Grammar, + ) -> BTreeSet { + let mut closure_items = (*items).clone(); + let mut used_non_term = vec![false; grammar.get_non_terminal().len()]; + let dot_production = Self::add_initial_sep(grammar.productions_to_adj_list()); + + // the non terminals to explore are seen as a BFS that expands to other non terminals + // when it sees an arc (e.g. a non terminal after a SEP). + let mut closure_queue = Self::compute_closure_queue(items, &mut used_non_term); + + // apply the closure to all the non terminals in closure_queue + while let Some((non_terminal, letter_first)) = closure_queue.pop_front() { + dot_production.get(&non_terminal) + .unwrap() + .iter() + .for_each(|rhs| { + // with dot_production, the dot is always at 0, so the first letter is 1 + let (non_term_opt, look_ahead) = Self::get_next_closure_non_term(rhs); + + match non_term_opt { + Some(non_term) => { + if !used_non_term[non_term] { + used_non_term[non_term] = true; + + match look_ahead { + Some(_) => { + if let None = letter_first { + closure_queue.push_back((non_term, None)); + } else { + closure_queue.push_back((non_term, look_ahead)) + } + }, + None => closure_queue.push_back((non_term, None)), + } + } + } + None => {} + } + + let production = Production { + lhs: non_terminal, + rhs: rhs.clone(), + }; + + // Closure with first only if the precedente look_ahead has been set! + // this should save some computation time. + if let None = letter_first { + closure_items.insert(Item { + production: production, + look_ahead: None, + }); + return; + } + + let first = letter_first.as_ref().unwrap(); + let first_letter_set = grammar.first(first); + + first_letter_set.iter().for_each(|look_ahead| { + closure_items.insert(Item { + production: production.clone(), + look_ahead: Some(*look_ahead), + }); + }); + }); + } + + closure_items + } + + /// queue of non_terminal to explore and next letter for first + fn compute_closure_queue(items: &BTreeSet, used_non_term: &mut Vec) + -> VecDeque<(NonTerminal, Option)> { + let mut non_terminals: VecDeque<(NonTerminal, Option)> = VecDeque::new(); + items.iter().for_each(|item| { + let item_sep_pos = item.production.rhs.iter() + .position(|letter| *letter == Letter::Terminal(ITEM_SEP)); + if item_sep_pos.is_none() { + return; + } + + let item_sep_pos = item_sep_pos.unwrap(); + if item_sep_pos == item.production.rhs.len() - 1 { + return; + } + + let next_letter = &item.production.rhs[item_sep_pos + 1]; + + if let Letter::NonTerminal(non_terminal) = next_letter { + if used_non_term[*non_terminal] { + return; + } + used_non_term[*non_terminal] = true; + + if let None = item.look_ahead { + non_terminals.push_back((*non_terminal, None)); + } else if item_sep_pos < item.production.rhs.len() - 2 { + non_terminals.push_back((*non_terminal, Some(item.production.rhs[item_sep_pos + 2].clone()))); + } else { + non_terminals.push_back((*non_terminal, Some(Letter::Terminal(STRING_END)))); + } + } + }); + + non_terminals + } + + /// this function assumes the dot is at the beginning of the production + /// and returns the non terminal after the dot, with the look ahead letter, + /// if there is one. + fn get_next_closure_non_term(rhs: &Vec) -> (Option, Option) { + let first_non_term = Production::get_nth_if_non_terminal(rhs, 1); + let second_letter = Production::get_nth(rhs, 2); + + match first_non_term { + Some(non_term) => { + match second_letter { + Some(letter) => (Some(*non_term), Some(letter.clone())), + None => (Some(*non_term), None), + } + } + None => (None, None), + } + } + + pub fn goto(items: &BTreeSet, letter: &Letter) -> BTreeSet { + let mut goto_items: BTreeSet = BTreeSet::new(); + + items.iter().for_each(|item| { + let item_sep_pos = item.production.rhs.iter().position(|letter| *letter == Letter::Terminal(ITEM_SEP)); + if item_sep_pos.is_none() { + return; + } + + let item_sep_pos = item_sep_pos.unwrap(); + if item_sep_pos == item.production.rhs.len() - 1 { + return; + } + + if &item.production.rhs[item_sep_pos + 1] != letter { + return; + } + + goto_items.insert({ + let mut new_item = item.clone(); + new_item.production.rhs[item_sep_pos] = new_item.production.rhs[item_sep_pos + 1].clone(); + new_item.production.rhs[item_sep_pos + 1] = Letter::Terminal(ITEM_SEP); + + new_item + }); + }); + + goto_items + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::set; + #[test] + fn test_itemization() { + let productions = vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('c')] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal(EPSILON)] }, + ]; + + let items = Item::get_itemization(&productions); + + let result_productions = vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1), Letter::Terminal('b')] }, + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal(ITEM_SEP), Letter::Terminal('b')] }, + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::Terminal('b'), Letter::Terminal(ITEM_SEP)] }, + + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('c')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('c'), Letter::Terminal(ITEM_SEP)] }, + + Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('a'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a'), Letter::NonTerminal(1), Letter::Terminal(ITEM_SEP)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP)] }, + ]; + + assert!(items.iter().all(|item| result_productions.contains(item))); + assert!(result_productions.iter().all(|item| items.contains(item))); + } + + #[test] + fn closure_0 () { + // S -> (S) + // S -> A + // A -> a + let mut grammar = Grammar::new( + 0, + vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('a')] }, + ], + ); + grammar.add_fake_initial_state(); + + let mut start_item = set![Item { + production: Production { lhs: 2, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0)] }, + look_ahead: None, + }]; + let closure = Item::closure(&mut start_item, &mut grammar).into_iter() + .map(|item| item.production) + .collect::>(); + + let result = vec![ + Production { lhs: 2, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0)] }, + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('a')] }, + ]; + + assert!(closure.iter().all(|item| result.contains(item))); + assert!(result.iter().all(|item| closure.contains(item))); + } + + #[test] + fn closure_1() { + // S -> CC + // C -> cC + // C -> d + + let mut grammar = Grammar::new( + 0, + vec![ + Production { lhs: 0, rhs: vec![Letter::NonTerminal(1), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('c'), Letter::NonTerminal(1)] }, + Production { lhs: 1, rhs: vec![Letter::Terminal('d')] }, + ], + ); + + grammar.add_fake_initial_state(); + + let mut start_item = set![Item { + production: Production { lhs: 2, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0)] }, + look_ahead: Some(STRING_END), + }]; + + let closure: Vec = Item::closure(&mut start_item, &mut grammar).into_iter().collect(); + + let result = vec![ + Item { + production: Production { lhs: 2, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0)] }, + look_ahead: Some(STRING_END), + }, + Item { + production: Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(1), Letter::NonTerminal(1)] }, + look_ahead: Some(STRING_END), + }, + Item { + production: Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('c'), Letter::NonTerminal(1)] }, + look_ahead: Some('c'), + }, + Item { + production: Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('c'), Letter::NonTerminal(1)] }, + look_ahead: Some('d'), + }, + Item { + production: Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('d')] }, + look_ahead: Some('c'), + }, + Item { + production: Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('d')] }, + look_ahead: Some('d'), + }, + ]; + + assert!(closure.iter().all(|item| result.contains(item))); + assert!(result.iter().all(|item| closure.contains(item))); + } + + #[test] + fn goto() { + // S -> (S) + // S -> () + let mut grammar = Grammar::new( + 0, + vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::Terminal(')')] }, + ], + ); + grammar.add_fake_initial_state(); + + let mut start_item = set![Item { + production: Production { lhs: 1, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0)] }, + look_ahead: None, + }]; + + let closure = Item::closure(&mut start_item, &mut grammar); + let goto = Item::goto(&closure, &Letter::Terminal('(')).into_iter() + .map(|item| item.production) + .collect::>(); + + // result should be + // S -> (.S) + // S -> (.) + let result = vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::Terminal(ITEM_SEP), Letter::Terminal(')')] }, + ]; + + assert!(goto.iter().all(|item| result.contains(item))); + assert!(result.iter().all(|item| goto.contains(item))); + + // SECOND PART OF TEST, APPLY CLOSURE TO GOTO'S OUTPUT + let mut goto_items = goto.into_iter() + .map(|item| Item { + production: item, + look_ahead: None, + }) + .collect::>(); + + let closure = Item::closure(&mut goto_items, &mut grammar) + .into_iter() + .map(|item| item.production) + .collect::>(); + + let result = vec![ + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::Terminal(ITEM_SEP), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal('('), Letter::Terminal(ITEM_SEP), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('('), Letter::NonTerminal(0), Letter::Terminal(')')] }, + Production { lhs: 0, rhs: vec![Letter::Terminal(ITEM_SEP), Letter::Terminal('('), Letter::Terminal(')')] }, + ]; + + assert!(closure.iter().all(|item| result.contains(item))); + assert!(result.iter().all(|item| closure.contains(item))); + + } +} \ No newline at end of file diff --git a/src/grammar/mod.rs b/src/grammar/mod.rs index 4b8b37a..12c12f3 100644 --- a/src/grammar/mod.rs +++ b/src/grammar/mod.rs @@ -1,3 +1,10 @@ mod grammar; +// mod item; +pub mod consts; +mod first_follow; +mod production; -pub use grammar::*; \ No newline at end of file +pub use grammar::*; +// pub use item::*; +pub use first_follow::*; +pub use production::*; diff --git a/src/grammar/production.rs b/src/grammar/production.rs new file mode 100644 index 0000000..50ac997 --- /dev/null +++ b/src/grammar/production.rs @@ -0,0 +1,37 @@ +use crate::grammar::consts::{NonTerminal, Terminal}; + +mod helper; + +#[derive(Debug, PartialEq, Clone, PartialOrd, Eq, Ord)] +pub enum Letter { + NonTerminal(NonTerminal), + Terminal(Terminal), +} + +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord)] +pub struct Production { + pub start_symbol: NonTerminal, + pub expand_rule: Vec, +} + +impl Production { + pub fn get_nth_if_non_terminal(letters: &Vec, n: usize) -> Option<&NonTerminal> { + if n >= letters.len() { + return None; + } + + match &letters[n] { + Letter::NonTerminal(idx) => Some(idx), + _ => None, + } + } + + pub fn get_nth(letters: &Vec, n: usize) -> Option<&Letter> { + if n >= letters.len() { + return None; + } + + Some(&letters[n]) + } +} + diff --git a/src/grammar/production/helper.rs b/src/grammar/production/helper.rs new file mode 100644 index 0000000..19c87ef --- /dev/null +++ b/src/grammar/production/helper.rs @@ -0,0 +1,81 @@ +use std::collections::{BTreeSet, VecDeque}; + +use crate::grammar::{ + consts::{NonTerminal, EPSILON}, + get_first, FollowTable, +}; + +use super::{Letter, Production}; +use crate::grammar::first_follow::FirstTable; + +impl Production { + pub fn check_is_nullable(&self, nullable: &BTreeSet) -> bool { + for letter in self.expand_rule.iter() { + match letter { + Letter::NonTerminal(idx) => { + if !nullable.contains(idx) { + return false; + } + } + Letter::Terminal(ch) => { + if *ch != EPSILON { + return false; + } + } + } + } + true + } + + pub fn update_first_table(&self, first_table: &mut FirstTable, nullable: &Vec) -> bool { + let mut has_changed = false; + for letter in self.expand_rule.iter() { + match letter { + Letter::NonTerminal(idx) => { + let mut set_to_join = first_table[*idx].clone(); + + if !set_to_join.is_subset(&first_table[self.start_symbol]) { + first_table[self.start_symbol].append(&mut set_to_join); + has_changed = true; + } + if !nullable[*idx] { + break; + } + } + Letter::Terminal(ch) => { + if *ch != EPSILON { + has_changed |= first_table[self.start_symbol].insert(*ch); + break; + } + } + } + } + has_changed + } + + /// updates the follow table with the given production + /// returns true if the follow table has changed, false otherwise + pub fn update_follow_table(&self, first: &FirstTable, follow: &mut FollowTable) -> bool { + let mut has_changed = false; + let mut production = self + .expand_rule + .clone() + .into_iter() + .collect::>(); + + for letter in self.expand_rule.iter() { + production.pop_front(); + if let Letter::NonTerminal(idx) = letter { + let mut res = get_first(first, &mut production.iter()); + if res.remove(&EPSILON) { + res.append(&mut follow[self.start_symbol].clone()); + } + if !res.is_subset(&follow[*idx]) { + follow[*idx].append(&mut res); + has_changed = true; + } + } + } + has_changed + } +} diff --git a/src/main.rs b/src/main.rs index db54ac0..06280dd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,10 +1,10 @@ #![allow(non_snake_case)] mod app; -mod utils; -mod display; mod automata; -mod grammar; +mod display; mod error; +mod grammar; +mod utils; #[macro_use] mod macros; @@ -31,7 +31,7 @@ fn main() { let mut web_options = eframe::WebOptions::default(); web_options.default_theme = eframe::Theme::Dark; web_options.follow_system_theme = false; - + eframe::start_web( "the_canvas_id", // hardcode it web_options,