55- Frequent itemsets
66- Association rules with minimum confidence and lift
77
8- WIKI:https://en.wikipedia.org/wiki/Apriori_algorithm
8+ WIKI: https://en.wikipedia.org/wiki/Apriori_algorithm
99"""
1010
1111from collections import defaultdict
1212from itertools import combinations
13+ from typing import List , Dict , Tuple , Set
1314
14- def load_data () -> list [list [str ]]:
15+
16+ def load_data () -> List [List [str ]]:
1517 """
1618 Returns a sample transaction dataset.
1719
18- >>> load_data()
19- [['milk'], ['milk', 'butter'], ['milk', 'bread'], ['milk', 'bread', 'chips']]
20+ >>> data = load_data()
21+ >>> len(data)
22+ 4
23+ >>> 'milk' in data[0]
24+ True
2025 """
2126 return [["milk" ], ["milk" , "butter" ], ["milk" , "bread" ], ["milk" , "bread" , "chips" ]]
2227
2328
2429class Apriori :
2530 """Apriori algorithm class with support, confidence, and lift filtering."""
2631
27- def __init__ (self , transactions , min_support = 0.25 , min_confidence = 0.5 , min_lift = 1.0 ):
28- self .transactions = [set (t ) for t in transactions ]
29- self .min_support = min_support
30- self .min_confidence = min_confidence
31- self .min_lift = min_lift
32- self .itemsets = []
33- self .rules = []
32+ def __init__ (
33+ self ,
34+ transactions : List [List [str ]],
35+ min_support : float = 0.25 ,
36+ min_confidence : float = 0.5 ,
37+ min_lift : float = 1.0 ,
38+ ) -> None :
39+ self .transactions : List [Set [str ]] = [set (t ) for t in transactions ]
40+ self .min_support : float = min_support
41+ self .min_confidence : float = min_confidence
42+ self .min_lift : float = min_lift
43+ self .itemsets : List [Dict [frozenset , float ]] = []
44+ self .rules : List [Tuple [frozenset , frozenset , float , float ]] = []
3445
3546 self .find_frequent_itemsets ()
3647 self .generate_association_rules ()
3748
3849 def _get_support (self , itemset : frozenset ) -> float :
3950 """Return support of an itemset."""
40- return sum (1 for t in self .transactions if itemset .issubset (t )) / len (self .transactions )
51+ return sum (1 for t in self .transactions if itemset .issubset (t )) / len (
52+ self .transactions
53+ )
4154
4255 def confidence (self , antecedent : frozenset , consequent : frozenset ) -> float :
4356 """Calculate confidence of a rule A -> B."""
44- support_antecedent = self ._get_support (antecedent )
45- support_both = self ._get_support (antecedent | consequent )
46- return support_both / support_antecedent if support_antecedent > 0 else 0
57+ support_antecedent : float = self ._get_support (antecedent )
58+ support_both : float = self ._get_support (antecedent | consequent )
59+ return support_both / support_antecedent if support_antecedent > 0 else 0.0
4760
4861 def lift (self , antecedent : frozenset , consequent : frozenset ) -> float :
4962 """Calculate lift of a rule A -> B."""
50- support_consequent = self ._get_support (consequent )
51- conf = self .confidence (antecedent , consequent )
52- return conf / support_consequent if support_consequent > 0 else 0
63+ support_consequent : float = self ._get_support (consequent )
64+ conf : float = self .confidence (antecedent , consequent )
65+ return conf / support_consequent if support_consequent > 0 else 0.0
5366
54- def find_frequent_itemsets (self ):
67+ def find_frequent_itemsets (self ) -> List [ Dict [ frozenset , float ]] :
5568 """Generate all frequent itemsets."""
56- item_counts = defaultdict (int )
69+ item_counts : Dict [ frozenset , int ] = defaultdict (int )
5770 for t in self .transactions :
5871 for item in t :
5972 item_counts [frozenset ([item ])] += 1
6073
61- total = len (self .transactions )
62- current_itemsets = {k : v / total for k , v in item_counts .items () if v / total >= self .min_support }
63- self .itemsets .append (current_itemsets )
74+ total : int = len (self .transactions )
75+ current_itemsets : Dict [frozenset , float ] = {
76+ k : v / total for k , v in item_counts .items () if v / total >= self .min_support
77+ }
78+ if current_itemsets :
79+ self .itemsets .append (current_itemsets )
6480
65- k = 2
81+ k : int = 2
6682 while current_itemsets :
67- candidates = set ()
68- keys = list (current_itemsets .keys ())
83+ candidates : Set [ frozenset ] = set ()
84+ keys : List [ frozenset ] = list (current_itemsets .keys ())
6985 for i in range (len (keys )):
7086 for j in range (i + 1 , len (keys )):
7187 union = keys [i ] | keys [j ]
72- if len (union ) == k and all (frozenset (sub ) in current_itemsets for sub in combinations (union , k - 1 )):
73- candidates .add (union )
74-
75- freq_candidates = {c : self ._get_support (c ) for c in candidates if self ._get_support (c ) >= self .min_support }
88+ if len (union ) == k and all (
89+ frozenset (sub ) in current_itemsets
90+ for sub in combinations (union , k - 1 )
91+ ):
92+ candidates .add (union )
93+
94+ freq_candidates : Dict [frozenset , float ] = {
95+ c : self ._get_support (c ) for c in candidates if self ._get_support (c ) >= self .min_support
96+ }
7697 if not freq_candidates :
7798 break
7899
@@ -82,20 +103,26 @@ def find_frequent_itemsets(self):
82103
83104 return self .itemsets
84105
85- def generate_association_rules (self ):
106+ def generate_association_rules (self ) -> List [ Tuple [ frozenset , frozenset , float , float ]] :
86107 """Generate association rules with min confidence and lift."""
87108 for level in self .itemsets :
88109 for itemset in level :
89110 if len (itemset ) < 2 :
90111 continue
91112 for i in range (1 , len (itemset )):
92113 for antecedent in combinations (itemset , i ):
93- antecedent = frozenset (antecedent )
94- consequent = itemset - antecedent
95- conf = self .confidence (antecedent , consequent )
96- lft = self .lift (antecedent , consequent )
97- if conf >= self .min_confidence and lft >= self .min_lift :
98- self .rules .append ((antecedent , consequent , conf , lft ))
114+ antecedent_set : frozenset = frozenset (antecedent )
115+ consequent_set : frozenset = itemset - antecedent_set
116+ conf : float = self .confidence (antecedent_set , consequent_set )
117+ lft : float = self .lift (antecedent_set , consequent_set )
118+ rule : Tuple [frozenset , frozenset , float , float ] = (
119+ antecedent_set ,
120+ consequent_set ,
121+ conf ,
122+ lft ,
123+ )
124+ if rule not in self .rules and conf >= self .min_confidence and lft >= self .min_lift :
125+ self .rules .append (rule )
99126 return self .rules
100127
101128
@@ -104,8 +131,10 @@ def generate_association_rules(self):
104131
105132 doctest .testmod ()
106133
107- transactions = load_data ()
108- model = Apriori (transactions , min_support = 0.25 , min_confidence = 0.1 , min_lift = 0.0 )
134+ transactions : List [List [str ]] = load_data ()
135+ model : Apriori = Apriori (
136+ transactions , min_support = 0.25 , min_confidence = 0.1 , min_lift = 0.0
137+ )
109138
110139 print ("Frequent itemsets:" )
111140 for level in model .itemsets :
@@ -114,7 +143,8 @@ def generate_association_rules(self):
114143
115144 print ("\n Association Rules:" )
116145 for rule in model .rules :
117- antecedent , consequent , conf , lift = rule
118- print (f"{ set (antecedent )} -> { set (consequent )} , conf={ conf :.2f} , lift={ lift :.2f} " )
119-
120-
146+ antecedent , consequent , conf , lift_value = rule
147+ print (
148+ f"{ set (antecedent )} -> { set (consequent )} , "
149+ f"conf={ conf :.2f} , lift={ lift_value :.2f} "
150+ )
0 commit comments