1313
1414class DecisionTreeClassifier :
1515 """A Decision Tree Classifier built from scratch.
16-
16+
1717 This tree uses information gain (entropy-based) for splitting decisions.
18-
18+
1919 Attributes:
2020 max_depth: Maximum depth of the tree
2121 min_samples_split: Minimum samples required to split a node
@@ -31,12 +31,14 @@ def __init__(self, max_depth=10, min_samples_split=2, n_features=None):
3131
3232 def fit (self , X , y ):
3333 """Build the decision tree.
34-
34+
3535 Args:
3636 X: Training features, shape (n_samples, n_features)
3737 y: Training labels, shape (n_samples,)
3838 """
39- self .n_features = X .shape [1 ] if not self .n_features else min (self .n_features , X .shape [1 ])
39+ self .n_features = (
40+ X .shape [1 ] if not self .n_features else min (self .n_features , X .shape [1 ])
41+ )
4042 self .tree = self ._grow_tree (X , y )
4143
4244 def _grow_tree (self , X , y , depth = 0 ):
@@ -45,17 +47,21 @@ def _grow_tree(self, X, y, depth=0):
4547 n_labels = len (np .unique (y ))
4648
4749 # Stopping criteria
48- if depth >= self .max_depth or n_labels == 1 or n_samples < self .min_samples_split :
50+ if (
51+ depth >= self .max_depth
52+ or n_labels == 1
53+ or n_samples < self .min_samples_split
54+ ):
4955 leaf_value = self ._most_common_label (y )
50- return {' leaf' : True , ' value' : leaf_value }
56+ return {" leaf" : True , " value" : leaf_value }
5157
5258 # Find best split
5359 feat_idxs = np .random .choice (n_features , self .n_features , replace = False )
5460 best_feat , best_thresh = self ._best_split (X , y , feat_idxs )
5561
5662 if best_feat is None :
5763 leaf_value = self ._most_common_label (y )
58- return {' leaf' : True , ' value' : leaf_value }
64+ return {" leaf" : True , " value" : leaf_value }
5965
6066 # Split the data
6167 left_idxs = X [:, best_feat ] <= best_thresh
@@ -66,11 +72,11 @@ def _grow_tree(self, X, y, depth=0):
6672 right = self ._grow_tree (X [right_idxs ], y [right_idxs ], depth + 1 )
6773
6874 return {
69- ' leaf' : False ,
70- ' feature' : best_feat ,
71- ' threshold' : best_thresh ,
72- ' left' : left ,
73- ' right' : right
75+ " leaf" : False ,
76+ " feature" : best_feat ,
77+ " threshold" : best_thresh ,
78+ " left" : left ,
79+ " right" : right ,
7480 }
7581
7682 def _best_split (self , X , y , feat_idxs ):
@@ -127,44 +133,44 @@ def _most_common_label(self, y):
127133
128134 def predict (self , X ):
129135 """Predict class labels for samples in X.
130-
136+
131137 Args:
132138 X: Features, shape (n_samples, n_features)
133-
139+
134140 Returns:
135141 Predicted labels, shape (n_samples,)
136142 """
137143 return np .array ([self ._traverse_tree (x , self .tree ) for x in X ])
138144
139145 def _traverse_tree (self , x , node ):
140146 """Traverse the tree to make a prediction for a single sample."""
141- if node [' leaf' ]:
142- return node [' value' ]
147+ if node [" leaf" ]:
148+ return node [" value" ]
143149
144- if x [node [' feature' ]] <= node [' threshold' ]:
145- return self ._traverse_tree (x , node [' left' ])
146- return self ._traverse_tree (x , node [' right' ])
150+ if x [node [" feature" ]] <= node [" threshold" ]:
151+ return self ._traverse_tree (x , node [" left" ])
152+ return self ._traverse_tree (x , node [" right" ])
147153
148154
149155class RandomForestClassifier :
150156 """Random Forest Classifier built from scratch.
151-
157+
152158 Random Forest is an ensemble learning method that constructs multiple
153159 decision trees during training and outputs the mode of the classes
154160 (classification) of the individual trees.
155-
161+
156162 Features:
157163 - Bootstrap sampling (bagging) to create diverse trees
158164 - Random feature selection at each split
159165 - Majority voting for final predictions
160-
166+
161167 Attributes:
162168 n_estimators: Number of trees in the forest
163169 max_depth: Maximum depth of each tree
164170 min_samples_split: Minimum samples required to split a node
165171 n_features: Number of features to consider for best split
166172 trees: List of trained decision trees
167-
173+
168174 Example:
169175 >>> from sklearn.datasets import make_classification
170176 >>> from sklearn.model_selection import train_test_split
@@ -186,9 +192,11 @@ class RandomForestClassifier:
186192 >>> print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
187193 """
188194
189- def __init__ (self , n_estimators = 100 , max_depth = 10 , min_samples_split = 2 , n_features = None ):
195+ def __init__ (
196+ self , n_estimators = 100 , max_depth = 10 , min_samples_split = 2 , n_features = None
197+ ):
190198 """Initialize Random Forest Classifier.
191-
199+
192200 Args:
193201 n_estimators: Number of trees in the forest (default: 100)
194202 max_depth: Maximum depth of each tree (default: 10)
@@ -204,17 +212,17 @@ def __init__(self, n_estimators=100, max_depth=10, min_samples_split=2, n_featur
204212
205213 def fit (self , X , y ):
206214 """Build a forest of trees from the training set (X, y).
207-
215+
208216 Args:
209217 X: Training features, shape (n_samples, n_features)
210218 y: Training labels, shape (n_samples,)
211-
219+
212220 Returns:
213221 self: Fitted classifier
214222 """
215223 self .trees = []
216224 n_features = X .shape [1 ]
217-
225+
218226 # Default to sqrt of total features if not specified
219227 if self .n_features is None :
220228 self .n_features = int (np .sqrt (n_features ))
@@ -223,24 +231,24 @@ def fit(self, X, y):
223231 tree = DecisionTreeClassifier (
224232 max_depth = self .max_depth ,
225233 min_samples_split = self .min_samples_split ,
226- n_features = self .n_features
234+ n_features = self .n_features ,
227235 )
228236 X_sample , y_sample = self ._bootstrap_sample (X , y )
229237 tree .fit (X_sample , y_sample )
230238 self .trees .append (tree )
231-
239+
232240 return self
233241
234242 def _bootstrap_sample (self , X , y ):
235243 """Create a bootstrap sample from the dataset.
236-
244+
237245 Bootstrap sampling randomly samples with replacement from the dataset.
238246 This creates diverse training sets for each tree.
239-
247+
240248 Args:
241249 X: Features, shape (n_samples, n_features)
242250 y: Labels, shape (n_samples,)
243-
251+
244252 Returns:
245253 X_sample: Bootstrap sample of features
246254 y_sample: Bootstrap sample of labels
@@ -251,19 +259,19 @@ def _bootstrap_sample(self, X, y):
251259
252260 def predict (self , X ):
253261 """Predict class labels for samples in X.
254-
262+
255263 Uses majority voting: each tree votes for a class, and the
256264 class with the most votes becomes the final prediction.
257-
265+
258266 Args:
259267 X: Features, shape (n_samples, n_features)
260-
268+
261269 Returns:
262270 Predicted labels, shape (n_samples,)
263271 """
264272 # Get predictions from all trees
265273 tree_preds = np .array ([tree .predict (X ) for tree in self .trees ])
266-
274+
267275 # Majority voting: transpose to get predictions per sample
268276 # then find most common prediction for each sample
269277 tree_preds = np .swapaxes (tree_preds , 0 , 1 )
@@ -287,11 +295,7 @@ def _most_common_label(self, y):
287295
288296 # Generate sample classification dataset
289297 X , y = make_classification (
290- n_samples = 1000 ,
291- n_features = 20 ,
292- n_informative = 15 ,
293- n_redundant = 5 ,
294- random_state = 42
298+ n_samples = 1000 , n_features = 20 , n_informative = 15 , n_redundant = 5 , random_state = 42
295299 )
296300
297301 # Split the data
@@ -307,9 +311,7 @@ def _most_common_label(self, y):
307311 # Train Random Forest Classifier
308312 print ("Training Random Forest Classifier..." )
309313 rf_classifier = RandomForestClassifier (
310- n_estimators = 10 ,
311- max_depth = 10 ,
312- min_samples_split = 2
314+ n_estimators = 10 , max_depth = 10 , min_samples_split = 2
313315 )
314316 rf_classifier .fit (X_train , y_train )
315317 print ("Training complete!" )
0 commit comments