diff --git a/Zooid_Vis/bin/data/student_db_mange.py b/Zooid_Vis/bin/data/student_db_mange.py new file mode 100644 index 0000000..663e835 --- /dev/null +++ b/Zooid_Vis/bin/data/student_db_mange.py @@ -0,0 +1,162 @@ +import pandas as pd +import os + + +class StudentDataBase: + def __init__(self,filename='student-dataset.csv'): + self.filename = filename + self.clean_data_base() + self.add_avg() + self.save() + + def clean_data_base(self): + ''' + clean the Data set from col (info) that i dont care. + We have 2 Option: + Option 1: give the header of the col and remove it with columns + Option 2: give the index of col we want to remove + ''' + self.students= pd.read_csv(self.filename) + # Remvoe those col in the begnning (and check if it's not already if i rerun it..) + columns_to_drop = ['latitude', 'longitude', 'ethnic.group', 'language.grade', 'portfolio.rating', 'coverletter.rating', 'refletter.rating'] + existing_columns_to_drop = [col for col in columns_to_drop if col in self.students.columns] + if existing_columns_to_drop: + self.students.drop(columns=existing_columns_to_drop, inplace=True) + # removing with Index + + # indices_to_drop = list(range(8, 15)) + list(range(3, 6)) + # existing_indices_to_drop = [idx for idx in indices_to_drop if idx in self.students.index] + # if existing_indices_to_drop: + # self.students.drop(index=existing_indices_to_drop, inplace=True) + + + # Mix the removing: + # list_idx_remove = list(range(8,13)) + list(range(3,5)) + + def save(self): + self.students.to_csv(self.filename,index=False) + + + def add_avg(self): + self.students['Average Grade'] = self.students[["math.grade", "english.grade", "sciences.grade"]].mean(axis=1) + + def removre_student(self,name): + self.students = self.students[self.students['name'] != name] + # For memory efficiency : + #self.students.drop(self.students[self.students["name"] == name].index, inplace=True) + + def update_grade(self,name,english = None ,math=None, science = None): + + if self.students[self.students['name'] == name].empty: + print("No such student exist") + return + + if english: + self.students.loc[self.students['name'] == name ,'english.grade'] = english + if math: + self.students.loc[self.students['name'] == name ,'math.grade'] = math + if science: + self.students.loc[self.students['name'] == name ,'sciences.grade'] = science + + self.save() + + def get_student_as_panda(self,id): + ''' + returning a panda type + ''' + student_data = self.students[self.students['id'] == id] + if student_data.empty: + return None + return student_data + + def get_student_as_Student_class(self,id): + ''' + using argws to pass as arguments to the constructor for the Student class (**stu_dic) + ''' + student_data = self.students[self.students['id'] == id] + if student_data.empty: + return None + stu_dic = student_data.iloc[0].to_dict() + # Replace the key "english.grade" with "english" + if 'english.grade' in stu_dic: + stu_dic['english'] = stu_dic.pop('english.grade') + if 'math.grade' in stu_dic: + stu_dic['math'] = stu_dic.pop('math.grade') + if 'sciences.grade' in stu_dic: + stu_dic['science'] = stu_dic.pop('sciences.grade') + + stu_dic.pop('Average Grade') + print(stu_dic) + return Student(**stu_dic) + + def get_database(self): + return self.students + + + def count_country(self, nationality): + ''' + Option 1: shape will return the dimension of the Data, so if i sort the shape + of rows will be the number + Option 2: count() will return the summary of each col to the querey is use: + # filtered_df = self.students[self.students['nationality'] == nationality] + # return filtered_df['nationality'].count() + ''' + return self.students[self.students['nationality'] == nationality].shape[0] + + +class Student(StudentDataBase) : + def __init__(self,id=None,name=None,nationality=None,city=None,gender=None,age=None,english=None,math=None,science=None): + super().__init__() + self.id = id + self.name = name + self.nationality = nationality + self.city = city + self.age = age + self.gender = gender + self.english_grade = english + self.math_grade = math + self.science_grade = science + if id != None: + self.add_student() + + + def add_student(self): + temp_db = pd.DataFrame([{ + "id" : self.get_next_id(), + "name": self.name, + "nationality": self.nationality, + "city": self.city, + "gender": self.gender, + "age": self.age, + "english.grade": self.english_grade, + "math.grade": self.math_grade, + "sciences.grade": self.science_grade, + "Average Grade" : self.avg() + }]) + self.students = pd.concat([self.students,temp_db ] , ignore_index=True) + self.save() + + def __str__(self): + return f"Student name {self.name} have GPA of {self.avg()}" + + def get_next_id(self): + if self.students.empty: + return 1 + else: + return self.students['id'].max()+1 + + def avg(self): + return (self.math_grade + self.english_grade + self.science_grade) / 3 + + + + + +DB = StudentDataBase() +Student(12504,"John Doe", "USA", "New York", "Male", 20, 85, 90, 88) +df = DB.get_database() + +print(DB.count_country('Mexico')) +stu = DB.get_student_as_Student_class(179) +(print(stu)) +