From d36df670278a7d8c2122e32c793ab2ee51ebd528 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Wed, 7 Aug 2024 17:30:32 -0800
Subject: [PATCH 01/34] add apache cassandra astra as source

---
 examples/apachecassandra_to_pinecone.yaml     | 24 +++++++++
 vector_etl/source_mods/__init__.py            |  3 ++
 vector_etl/source_mods/airtable_loader.py     |  0
 .../apache_cassandra_astra_loader.py          | 49 +++++++++++++++++++
 vector_etl/source_mods/base.py                |  3 ++
 5 files changed, 79 insertions(+)
 create mode 100644 examples/apachecassandra_to_pinecone.yaml
 create mode 100644 vector_etl/source_mods/airtable_loader.py
 create mode 100644 vector_etl/source_mods/apache_cassandra_astra_loader.py

diff --git a/examples/apachecassandra_to_pinecone.yaml b/examples/apachecassandra_to_pinecone.yaml
new file mode 100644
index 0000000..6b93fd0
--- /dev/null
+++ b/examples/apachecassandra_to_pinecone.yaml
@@ -0,0 +1,24 @@
+source:
+  source_data_type: "Apache Cassandra"
+  db_type: "cassandra_astra"
+  clientId: ""
+  secret: ""
+  keyspace: "sales"
+  secure_connect_bundle: "secure-connect-contextdata.zip"
+  query: "SELECT * FROM chipotle_stores LIMIT 10"
+  
+embedding:
+  embedding_model: "OpenAI"
+  api_key: ""
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: ""
+  index_name: ""
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+
diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 035255f..6f4cf62 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -8,6 +8,7 @@
 from .zendesk_loader import ZendeskSource
 from .google_drive import GoogleDriveSource
 from .google_cloud_storage import GoogleCloudStorageSource
+from .apache_cassandra_astra_loader import ApacheCassandraAstraSource
 from .local_file import LocalFileSource
 
 def get_source_class(config):
@@ -30,5 +31,7 @@ def get_source_class(config):
         return GoogleDriveSource(config)
     elif source_type == 'Google Cloud Storage':
         return GoogleCloudStorageSource(config)
+    elif source_type == 'Apache Cassandra':
+        return ApacheCassandraAstraSource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")
diff --git a/vector_etl/source_mods/airtable_loader.py b/vector_etl/source_mods/airtable_loader.py
new file mode 100644
index 0000000..e69de29
diff --git a/vector_etl/source_mods/apache_cassandra_astra_loader.py b/vector_etl/source_mods/apache_cassandra_astra_loader.py
new file mode 100644
index 0000000..f4c394c
--- /dev/null
+++ b/vector_etl/source_mods/apache_cassandra_astra_loader.py
@@ -0,0 +1,49 @@
+import pandas as pd
+from cassandra.cluster import Cluster
+import logging
+from base import BaseSource
+from cassandra.auth import PlainTextAuthProvider
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class ApacheCassandraAstraSource(BaseSource):
+    def __init__(self, config):
+        self.config = config
+        self.cluster = None
+        self.auth_provider = None
+        self.session = None
+        self.cloud_config = None
+        self.keyspace = self.config['keyspace']
+        self.connect()  # Initialize connection here
+  
+    def connect(self):
+        if self.config["db_type"] == 'cassandra_astra':
+            self.cloud_config = {'secure_connect_bundle': self.config['secure_connect_bundle']}
+            self.auth_provider = PlainTextAuthProvider(self.config['clientId'], self.config['secret'])
+            self.cluster = Cluster(cloud=self.cloud_config, auth_provider=self.auth_provider,protocol_version=3)
+            self.session = self.cluster.connect(self.keyspace)
+        else:
+            raise ValueError("Invalid database type")
+
+    def fetch_data(self):
+        if not self.session:
+            raise Exception("Session is not initialized. Ensure you call connect() first.")
+            
+        query = self.config.get("query", "")
+        prepared_statement = self.session.prepare(query)
+        
+        try:
+            db_data = self.session.execute(prepared_statement)
+            if db_data:
+                # Convert to Pandas DataFrame
+                df = pd.DataFrame(list(db_data))
+                return df
+            else:
+                logger.error(f"No data returned: {e}")
+                return None
+        except Exception as e:
+            logger.error(f"An error occurred: {e}")
+            return None
+
diff --git a/vector_etl/source_mods/base.py b/vector_etl/source_mods/base.py
index 1041dc3..01c7683 100644
--- a/vector_etl/source_mods/base.py
+++ b/vector_etl/source_mods/base.py
@@ -8,3 +8,6 @@ def connect(self):
     @abstractmethod
     def fetch_data(self):
         pass
+    
+    
+

From 5d47ca5be99e051747f983cb18946b3f4b06a79a Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Fri, 9 Aug 2024 03:06:48 -0800
Subject: [PATCH 02/34] add bigquery and airtable

---
 vector_etl/source_mods/__init__.py        |  6 ++++
 vector_etl/source_mods/airtable_loader.py | 38 ++++++++++++++++++++++
 vector_etl/source_mods/google_bigquery.py | 39 +++++++++++++++++++++++
 3 files changed, 83 insertions(+)
 create mode 100644 vector_etl/source_mods/google_bigquery.py

diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 6f4cf62..0f93ad3 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -10,6 +10,8 @@
 from .google_cloud_storage import GoogleCloudStorageSource
 from .apache_cassandra_astra_loader import ApacheCassandraAstraSource
 from .local_file import LocalFileSource
+from .airtable_loader import AirTableSource
+from .google_bigquery import GoogleBigQuerySource
 
 def get_source_class(config):
     source_type = config['source_data_type']
@@ -33,5 +35,9 @@ def get_source_class(config):
         return GoogleCloudStorageSource(config)
     elif source_type == 'Apache Cassandra':
         return ApacheCassandraAstraSource(config)
+    elif source_type == 'AirTable':
+        return AirTableSource(config)
+    elif source_type == 'Google BigQuery':
+        return GoogleBigQuerySource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")
diff --git a/vector_etl/source_mods/airtable_loader.py b/vector_etl/source_mods/airtable_loader.py
index e69de29..e9bbfe9 100644
--- a/vector_etl/source_mods/airtable_loader.py
+++ b/vector_etl/source_mods/airtable_loader.py
@@ -0,0 +1,38 @@
+import requests
+from base import BaseSource
+from pprint import pprint
+import pandas as pd
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class AirTableSource(BaseSource):
+    def __init__(self,config):
+        self.config = config
+        self.url = f"{self.config['url']}{self.config['baseId']}/{self.config['tableIdOrName']}"
+        self.auth_token = config['auth_token']
+          
+    def connect(self):
+        headers = {
+            "Authorization": f"Bearer {self.auth_token}"
+        }
+        try:
+            response = requests.get(self.url,headers=headers)
+            data = response.json()['records']
+            return data
+        except Exception as e:
+            logger.error(f"An error occurred: {e}")
+            return None
+    
+    def fetch_data(self):
+        records = self.connect()
+        df_data = [data['fields'] for data in records ]
+        airtable_df = pd.DataFrame(df_data)
+        
+        return airtable_df
+
+
+
+
diff --git a/vector_etl/source_mods/google_bigquery.py b/vector_etl/source_mods/google_bigquery.py
new file mode 100644
index 0000000..3b98e43
--- /dev/null
+++ b/vector_etl/source_mods/google_bigquery.py
@@ -0,0 +1,39 @@
+import os 
+from google.cloud import bigquery
+from base import BaseSource
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class GoogleBigQuerySource(BaseSource):
+    def __init__(self,config):
+         self.config = config
+         self.google_application_credentials = config['GOOGLE_APPLICATION_CREDENTIALS']
+         self.client = None
+         self.connect()
+         
+            
+    def connect(self):
+         os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = self.google_application_credentials
+         self.client = bigquery.Client()
+     
+    def fetch_data(self):
+        if self.client:
+            try:
+                query_job = self.client.query(f"""{self.config.get("query"," ")}""") 
+                if query_job:
+                    dfrows = query_job.result().to_dataframe() 
+                    return dfrows
+                else:
+                      logger.error(f"No data returned: {e}")
+                      return None
+            except Exception as e:
+                 logger.error(f"An error occurred: {e}")
+                 return None
+                
+                
+ 
+                
+

From a2a6663cb7d685d8f2e43b8f48770a5f71737c59 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sat, 10 Aug 2024 11:29:32 -0800
Subject: [PATCH 03/34] add test case for all sources

---
 examples/airtable_to_pincone.yaml         |  22 +++++
 examples/google_bigquery_to_pincone.yaml  |  20 ++++
 tests/test_source_mods.py                 | 113 ++++++++++++++++++++++
 vector_etl/source_mods/google_bigquery.py |   6 +-
 vector_etl/target_mods/__init__.py        |   1 +
 5 files changed, 159 insertions(+), 3 deletions(-)
 create mode 100644 examples/airtable_to_pincone.yaml
 create mode 100644 examples/google_bigquery_to_pincone.yaml

diff --git a/examples/airtable_to_pincone.yaml b/examples/airtable_to_pincone.yaml
new file mode 100644
index 0000000..c47fc21
--- /dev/null
+++ b/examples/airtable_to_pincone.yaml
@@ -0,0 +1,22 @@
+source:
+  source_data_type: "AirTable"
+  url: ""
+  auth_token: ""
+  baseId: "sales"
+  tableIdOrName: "secure-connect-contextdata.zip"
+  
+embedding:
+  embedding_model: "OpenAI"
+  api_key: ""
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: ""
+  index_name: ""
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+
diff --git a/examples/google_bigquery_to_pincone.yaml b/examples/google_bigquery_to_pincone.yaml
new file mode 100644
index 0000000..3075154
--- /dev/null
+++ b/examples/google_bigquery_to_pincone.yaml
@@ -0,0 +1,20 @@
+source:
+  source_data_type: "Google BigQuery"
+  google_application_credentials: ""
+  query: ""
+  
+embedding:
+  embedding_model: "OpenAI"
+  api_key: ""
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: ""
+  index_name: ""
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+
diff --git a/tests/test_source_mods.py b/tests/test_source_mods.py
index 103b23e..bb10990 100644
--- a/tests/test_source_mods.py
+++ b/tests/test_source_mods.py
@@ -5,6 +5,9 @@
 from vector_etl.source_mods.s3_loader import S3Source
 from vector_etl.source_mods.database_loader import DatabaseSource
 from vector_etl.source_mods.local_file import LocalFileSource
+from vector_etl.source_mods.google_bigquery import GoogleBigQuerySource
+from vector_etl.source_mods.airtable_loader import AirTableSource
+from vector_etl.source_mods.apache_cassandra_astra_loader import ApacheCassandraAstraSource
 
 @pytest.fixture
 def s3_config():
@@ -16,7 +19,41 @@ def s3_config():
         'chunk_size': 1000,
         'chunk_overlap': 200
     }
+    
+@pytest.fixture
+def google_bigquery_config():
+    return {
+    "source_data_type": "Google BigQuery",
+    "google_application_credentials": "",
+     "query": "SELECT * FROM chipotle_stores LIMIT 10"
+        
+    }
+
 
+@pytest.fixture
+def airtable_config():
+    return {
+        "url":"airttable.com/sales",
+        "baseId":"sales",
+        "auth_token":"673989fhuhefiw0903",
+        "tableIdOrName":"survey" 
+    }
+    
+    
+ 
+ 
+@pytest.fixture
+def apache_cassandar_astra_config():
+    return {
+    "source_data_type": "Apache Cassandra",
+    "db_type": "cassandra_astra",
+    "clientId": "",
+    "secret": "",
+    "keyspace": "sales",
+    "secure_connect_bundle": "secure-connect-contextdata.zip",
+    "query": "SELECT * FROM chipotle_stores LIMIT 10",
+    }
+    
 @pytest.fixture
 def db_config():
     return {
@@ -99,4 +136,80 @@ def test_local_file_source_read_file(local_file_config):
         source = LocalFileSource(local_file_config)
         file_content = source.read_file('/path/to/test_file.csv')
         assert isinstance(file_content, BytesIO)
+        
+  
+def test_google_bigquery_connect(google_bigquery_config):
+    with patch('bigquery.connect') as  mock_connect:
+        source = GoogleBigQuerySource(google_bigquery_config)
+        source.connect()
+        mock_connect.assert_called_once_with(
+            source_data_type="Google BigQuery",
+    google_application_credentials="",
+     query="SELECT * FROM chipotle_stores LIMIT 10"
+        )
+        
+
+def test_google_bigquery_fetch_data(google_bigquery_config):
+      with patch('bigquery.connect') as  mock_connect:
+          mock_connect.result.to_dataframe.return_value = pd.DataFrame()
+          source =  GoogleBigQuerySource(db_config)
+          df = source.fetch_data()
+          assert isinstance(df, pd.DataFrame)
+
+
+
+
+def test_apache_cassandra_astra_connect(apache_cassandar_astra_config):
+    with patch('cassandra.cluster') as  mock_connect:
+        source =  ApacheCassandraAstraSource(apache_cassandar_astra_config)
+        source.connect()
+        mock_connect.assert_called_once_with(
+        source_data_type="Apache Cassandra",
+        db_type="cassandra_astra",
+        clientId= "",
+        secret= "",
+        keyspace="sales",
+        secure_connect_bundle="secure-connect-contextdata.zip",
+        query ="SELECT * FROM chipotle_stores LIMIT 10",
+        )
+        
+
+def test_apache_cassandra_astra_fetch_data(apache_cassandar_astra_config):
+      with patch('cassandra.cluster') as  mock_connect:
+          mock_connect.session.execute.return_value = [{"id":"","name":""}]
+          source =  ApacheCassandraAstraSource(db_config)
+          df = source.fetch_data()
+          assert isinstance(df, pd.DataFrame)
+          
+          
+
+
+def test_airtable_connect(airtable_config):
+    
+    with patch('requests.get') as  mock_connect:
+        source =  AirTableSource(airtable_config)
+        source.connect()
+        mock_connect.assert_called_once_with(
+        url="Apache Cassandra",
+        baseId="cassandra_astra",
+        tableIdOrName= "",
+        auth_token="secure-connect-contextdata.zip",
+       
+        )
+        
+
+def test_airtable_fetch_data(airtable_config):
+      with patch('requests.get') as  mock_connect:
+          mock_connect.return_value = [ {
+            "Address": "333 Post St",
+            "Name": "Union Square",
+            "Visited": True
+        }
+        ]
+          
+          source =  AirTableSource(db_config)
+          df = source.fetch_data()
+
+          assert isinstance(df, pd.DataFrame)
+
 
diff --git a/vector_etl/source_mods/google_bigquery.py b/vector_etl/source_mods/google_bigquery.py
index 3b98e43..fb1460a 100644
--- a/vector_etl/source_mods/google_bigquery.py
+++ b/vector_etl/source_mods/google_bigquery.py
@@ -10,14 +10,14 @@
 class GoogleBigQuerySource(BaseSource):
     def __init__(self,config):
          self.config = config
-         self.google_application_credentials = config['GOOGLE_APPLICATION_CREDENTIALS']
          self.client = None
          self.connect()
          
             
     def connect(self):
-         os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = self.google_application_credentials
-         self.client = bigquery.Client()
+        if self.config["db_type"] == 'google_bigquery':
+            os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = self.config['GOOGLE_APPLICATION_CREDENTIALS']
+            self.client = bigquery.Client()
      
     def fetch_data(self):
         if self.client:
diff --git a/vector_etl/target_mods/__init__.py b/vector_etl/target_mods/__init__.py
index f0be443..f19dd9b 100644
--- a/vector_etl/target_mods/__init__.py
+++ b/vector_etl/target_mods/__init__.py
@@ -8,6 +8,7 @@
 from .mongodb import MongoDBTarget
 from .neo4j import Neo4jTarget
 
+
 def get_target_database(config):
     target_type = config['target_database']
     if target_type == 'Pinecone':

From ce01cd3373e15afe9ab5e95e33e6ec1546729018 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Tue, 20 Aug 2024 04:01:01 -0800
Subject: [PATCH 04/34] removed atlas db source

---
 examples/apachecassandra_to_pinecone.yaml     | 24 ---------
 tests/test_source_mods.py                     | 36 --------------
 vector_etl/source_mods/__init__.py            |  3 --
 .../apache_cassandra_astra_loader.py          | 49 -------------------
 4 files changed, 112 deletions(-)
 delete mode 100644 examples/apachecassandra_to_pinecone.yaml
 delete mode 100644 vector_etl/source_mods/apache_cassandra_astra_loader.py

diff --git a/examples/apachecassandra_to_pinecone.yaml b/examples/apachecassandra_to_pinecone.yaml
deleted file mode 100644
index 6b93fd0..0000000
--- a/examples/apachecassandra_to_pinecone.yaml
+++ /dev/null
@@ -1,24 +0,0 @@
-source:
-  source_data_type: "Apache Cassandra"
-  db_type: "cassandra_astra"
-  clientId: ""
-  secret: ""
-  keyspace: "sales"
-  secure_connect_bundle: "secure-connect-contextdata.zip"
-  query: "SELECT * FROM chipotle_stores LIMIT 10"
-  
-embedding:
-  embedding_model: "OpenAI"
-  api_key: ""
-  model_name: "text-embedding-ada-002"
-
-target:
-  target_database: "Pinecone"
-  pinecone_api_key: ""
-  index_name: ""
-  dimension: 1536 #[Optional] Only required if creating a new index
-  metric: "cosine" #[Optional] Only required if creating a new index
-  cloud: "aws" #[Optional] Only required if creating a new index
-  region: "us-east-1" #[Optional] Only required if creating a new index
-
-
diff --git a/tests/test_source_mods.py b/tests/test_source_mods.py
index bb10990..c83dfe5 100644
--- a/tests/test_source_mods.py
+++ b/tests/test_source_mods.py
@@ -7,7 +7,6 @@
 from vector_etl.source_mods.local_file import LocalFileSource
 from vector_etl.source_mods.google_bigquery import GoogleBigQuerySource
 from vector_etl.source_mods.airtable_loader import AirTableSource
-from vector_etl.source_mods.apache_cassandra_astra_loader import ApacheCassandraAstraSource
 
 @pytest.fixture
 def s3_config():
@@ -40,20 +39,6 @@ def airtable_config():
     }
     
     
- 
- 
-@pytest.fixture
-def apache_cassandar_astra_config():
-    return {
-    "source_data_type": "Apache Cassandra",
-    "db_type": "cassandra_astra",
-    "clientId": "",
-    "secret": "",
-    "keyspace": "sales",
-    "secure_connect_bundle": "secure-connect-contextdata.zip",
-    "query": "SELECT * FROM chipotle_stores LIMIT 10",
-    }
-    
 @pytest.fixture
 def db_config():
     return {
@@ -159,27 +144,6 @@ def test_google_bigquery_fetch_data(google_bigquery_config):
 
 
 
-def test_apache_cassandra_astra_connect(apache_cassandar_astra_config):
-    with patch('cassandra.cluster') as  mock_connect:
-        source =  ApacheCassandraAstraSource(apache_cassandar_astra_config)
-        source.connect()
-        mock_connect.assert_called_once_with(
-        source_data_type="Apache Cassandra",
-        db_type="cassandra_astra",
-        clientId= "",
-        secret= "",
-        keyspace="sales",
-        secure_connect_bundle="secure-connect-contextdata.zip",
-        query ="SELECT * FROM chipotle_stores LIMIT 10",
-        )
-        
-
-def test_apache_cassandra_astra_fetch_data(apache_cassandar_astra_config):
-      with patch('cassandra.cluster') as  mock_connect:
-          mock_connect.session.execute.return_value = [{"id":"","name":""}]
-          source =  ApacheCassandraAstraSource(db_config)
-          df = source.fetch_data()
-          assert isinstance(df, pd.DataFrame)
           
           
 
diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 0f93ad3..9dae03a 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -8,7 +8,6 @@
 from .zendesk_loader import ZendeskSource
 from .google_drive import GoogleDriveSource
 from .google_cloud_storage import GoogleCloudStorageSource
-from .apache_cassandra_astra_loader import ApacheCassandraAstraSource
 from .local_file import LocalFileSource
 from .airtable_loader import AirTableSource
 from .google_bigquery import GoogleBigQuerySource
@@ -33,8 +32,6 @@ def get_source_class(config):
         return GoogleDriveSource(config)
     elif source_type == 'Google Cloud Storage':
         return GoogleCloudStorageSource(config)
-    elif source_type == 'Apache Cassandra':
-        return ApacheCassandraAstraSource(config)
     elif source_type == 'AirTable':
         return AirTableSource(config)
     elif source_type == 'Google BigQuery':
diff --git a/vector_etl/source_mods/apache_cassandra_astra_loader.py b/vector_etl/source_mods/apache_cassandra_astra_loader.py
deleted file mode 100644
index f4c394c..0000000
--- a/vector_etl/source_mods/apache_cassandra_astra_loader.py
+++ /dev/null
@@ -1,49 +0,0 @@
-import pandas as pd
-from cassandra.cluster import Cluster
-import logging
-from base import BaseSource
-from cassandra.auth import PlainTextAuthProvider
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-
-class ApacheCassandraAstraSource(BaseSource):
-    def __init__(self, config):
-        self.config = config
-        self.cluster = None
-        self.auth_provider = None
-        self.session = None
-        self.cloud_config = None
-        self.keyspace = self.config['keyspace']
-        self.connect()  # Initialize connection here
-  
-    def connect(self):
-        if self.config["db_type"] == 'cassandra_astra':
-            self.cloud_config = {'secure_connect_bundle': self.config['secure_connect_bundle']}
-            self.auth_provider = PlainTextAuthProvider(self.config['clientId'], self.config['secret'])
-            self.cluster = Cluster(cloud=self.cloud_config, auth_provider=self.auth_provider,protocol_version=3)
-            self.session = self.cluster.connect(self.keyspace)
-        else:
-            raise ValueError("Invalid database type")
-
-    def fetch_data(self):
-        if not self.session:
-            raise Exception("Session is not initialized. Ensure you call connect() first.")
-            
-        query = self.config.get("query", "")
-        prepared_statement = self.session.prepare(query)
-        
-        try:
-            db_data = self.session.execute(prepared_statement)
-            if db_data:
-                # Convert to Pandas DataFrame
-                df = pd.DataFrame(list(db_data))
-                return df
-            else:
-                logger.error(f"No data returned: {e}")
-                return None
-        except Exception as e:
-            logger.error(f"An error occurred: {e}")
-            return None
-

From b4a8e3909630a554bcb794b45d662b8cc053f4c0 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Tue, 20 Aug 2024 13:12:59 -0800
Subject: [PATCH 05/34] updated base module import

---
 vector_etl/source_mods/airtable_loader.py | 2 +-
 vector_etl/source_mods/google_bigquery.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/vector_etl/source_mods/airtable_loader.py b/vector_etl/source_mods/airtable_loader.py
index e9bbfe9..8c4566e 100644
--- a/vector_etl/source_mods/airtable_loader.py
+++ b/vector_etl/source_mods/airtable_loader.py
@@ -1,5 +1,5 @@
 import requests
-from base import BaseSource
+from .base import BaseSource
 from pprint import pprint
 import pandas as pd
 import logging
diff --git a/vector_etl/source_mods/google_bigquery.py b/vector_etl/source_mods/google_bigquery.py
index fb1460a..affa5f9 100644
--- a/vector_etl/source_mods/google_bigquery.py
+++ b/vector_etl/source_mods/google_bigquery.py
@@ -1,6 +1,6 @@
 import os 
 from google.cloud import bigquery
-from base import BaseSource
+from .base import BaseSource
 import logging
 
 logging.basicConfig(level=logging.INFO)

From f7232337bcdb3580b5f5c045f17f5d826cbae20a Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Wed, 21 Aug 2024 05:07:53 -0800
Subject: [PATCH 06/34] update airtablesource url to default
 https://api.airtable.com/v0/

---
 examples/airtable_to_pincone.yaml         | 3 +--
 vector_etl/main.py                        | 1 +
 vector_etl/source_mods/airtable_loader.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/airtable_to_pincone.yaml b/examples/airtable_to_pincone.yaml
index c47fc21..5e95087 100644
--- a/examples/airtable_to_pincone.yaml
+++ b/examples/airtable_to_pincone.yaml
@@ -1,9 +1,8 @@
 source:
   source_data_type: "AirTable"
-  url: ""
   auth_token: ""
   baseId: "sales"
-  tableIdOrName: "secure-connect-contextdata.zip"
+  tableIdOrName: ""
   
 embedding:
   embedding_model: "OpenAI"
diff --git a/vector_etl/main.py b/vector_etl/main.py
index 983ec51..e201875 100644
--- a/vector_etl/main.py
+++ b/vector_etl/main.py
@@ -6,6 +6,7 @@
 from vector_etl import __version__, run_etl_process
 from vector_etl.orchestrator import run_etl_process
 
+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
diff --git a/vector_etl/source_mods/airtable_loader.py b/vector_etl/source_mods/airtable_loader.py
index 8c4566e..0e14b58 100644
--- a/vector_etl/source_mods/airtable_loader.py
+++ b/vector_etl/source_mods/airtable_loader.py
@@ -11,7 +11,7 @@
 class AirTableSource(BaseSource):
     def __init__(self,config):
         self.config = config
-        self.url = f"{self.config['url']}{self.config['baseId']}/{self.config['tableIdOrName']}"
+        self.url = f"https://api.airtable.com/v0/{self.config['baseId']}/{self.config['tableIdOrName']}"
         self.auth_token = config['auth_token']
           
     def connect(self):

From 939ae2898b037fe05345c82a612a4ef11bd4136f Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Fri, 23 Aug 2024 17:48:22 -0800
Subject: [PATCH 07/34] added hubspot source

---
 vector_etl/source_mods/__init__.py        |   3 +
 vector_etl/source_mods/google_bigquery.py |  13 ++-
 vector_etl/source_mods/hubspot_loader.py  | 124 ++++++++++++++++++++++
 3 files changed, 138 insertions(+), 2 deletions(-)
 create mode 100644 vector_etl/source_mods/hubspot_loader.py

diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 9dae03a..8206443 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -11,6 +11,7 @@
 from .local_file import LocalFileSource
 from .airtable_loader import AirTableSource
 from .google_bigquery import GoogleBigQuerySource
+from .hubspot_loader import HubSpotSource
 
 def get_source_class(config):
     source_type = config['source_data_type']
@@ -36,5 +37,7 @@ def get_source_class(config):
         return AirTableSource(config)
     elif source_type == 'Google BigQuery':
         return GoogleBigQuerySource(config)
+    elif source_type == 'HubSpot':
+        return HubSpotSource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")
diff --git a/vector_etl/source_mods/google_bigquery.py b/vector_etl/source_mods/google_bigquery.py
index affa5f9..3a93a99 100644
--- a/vector_etl/source_mods/google_bigquery.py
+++ b/vector_etl/source_mods/google_bigquery.py
@@ -1,6 +1,6 @@
 import os 
 from google.cloud import bigquery
-from .base import BaseSource
+from base import BaseSource
 import logging
 
 logging.basicConfig(level=logging.INFO)
@@ -35,5 +35,14 @@ def fetch_data(self):
                 
                 
  
-                
+config = {"query":"SELECT * FROM bigquery-public-data.america_health_rankings.ahr LIMIT 100",
+          "GOOGLE_APPLICATION_CREDENTIALS":"contextData_bigquery_cred.json",
+          "db_type":'google_bigquery'
+          }
+
+
+data = GoogleBigQuerySource(config) 
+print(type(data.fetch_data()))
+print(data.fetch_data())    
+           
 
diff --git a/vector_etl/source_mods/hubspot_loader.py b/vector_etl/source_mods/hubspot_loader.py
new file mode 100644
index 0000000..5b7d381
--- /dev/null
+++ b/vector_etl/source_mods/hubspot_loader.py
@@ -0,0 +1,124 @@
+from .base import BaseSource
+import pandas as pd
+import requests
+import logging
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class HubSpotSource(BaseSource):
+    def __init__(self,config):
+        self.config = config
+        self.endpoints = None
+        self.access_token = self.config["access_token"]
+    
+    def connect(self,url):
+        headers = {
+            "authorization":f"Bearer {self.access_token}"
+        }
+        try:
+            response = requests.get(url=url,headers=headers)
+            logger.info(f"Status {response.status_code}")
+            return response.json()
+        except Exception as e:
+              logger.error(f"An error occurred: {e}")
+        
+    def fetch_data(self):
+       
+       if self.config['crm_object'] == "crm.companies":
+           self.endpoints = f"https://api.hubapi.com/crm/v3/objects/companies?limit={self.config['limit']}&archived={self.config['archive']}"
+           logger.info(f"Companies \n")
+           
+       elif  self.config['crm_object'] == "crm.contacts":
+           self.endpoints = f"https://api.hubapi.com/crm/v3/objects/contacts?limit={self.config['limit']}&archived={self.config['archive']}"
+           logger.info(f"Contacts \n")
+           
+       elif  self.config['crm_object'] == "crm.tickets":
+           self.endpoints = f"https://api.hubapi.com/crm/v3/objects/tickets?limit={self.config['limit']}&archived={self.config['archive']}"
+           logger.info(f"Tickets \n")
+           
+       elif  self.config['crm_object'] == "crm.deals":
+           self.endpoints = f"https://api.hubapi.com/crm/v3/objects/deals?limit={self.config['limit']}&archived={self.config['archive']}"
+           logger.info(f"Deals \n")
+           
+       elif  self.config['crm_object'] == "crm.products":
+           self.endpoints = f"https://api.hubapi.com/crm/v3/objects/products?limit={self.config['limit']}&archived={self.config['archive']}"
+           logger.info(f"products \n")
+           
+         
+       elif  self.config['crm_object'] == "crm.invoices":
+           self.endpoints = f"https://api.hubapi.com/crm/v3/objects/invoices?limit={self.config['limit']}&archived={self.config['archive']}"
+           logger.info(f"invoices \n")
+           
+       elif  self.config['crm_object'] == "crm.carts":
+           self.endpoints = f"https://api.hubapi.com/crm/v3/objects/carts?limit={self.config['limit']}&archived={self.config['archive']}"
+           logger.info(f"Carts \n")
+    
+       elif  self.config['crm_object'] == "crm.tasks":
+           self.endpoints = f"https://api.hubapi.com/crm/v3/objects/tasks?limit={self.config['limit']}&archived={self.config['archive']}"
+           logger.info(f"Tasks \n")
+           
+       elif  self.config['crm_object'] == "crm.payments":
+           self.endpoints = f"https://api.hubapi.com/crm/v3/objects/commerce_payments?limit={self.config['limit']}&archived={self.config['archive']}"
+           logger.info(f"Payments \n")
+           
+       elif  self.config['crm_object'] == "crm.orders":
+           self.endpoints = f"https://api.hubapi.com/crm/v3/objects/orders?limit={self.config['limit']}&archived={self.config['archive']}"
+           logger.info(f"Orders \n")
+                  
+       else:
+            raise ValueError(f"Unsupported Crm object type: check the object name {self.config['crm_object']}")
+        
+        
+       response = self.connect(self.endpoints)['results']
+       results = [results['properties'] for results in response]    
+       df  = pd.DataFrame(results)
+       logger.info(f" data \n {df}")
+        
+       return df
+        
+    
+    
+
+    
+    
+    
+    
+    
+    
+    
+    
+ 
+   
+    
+   
+    
+
+    
+    
+ 
+        
+        
+        
+        
+
+        
+        
+        
+
+
+
+
+        
+        
+
+
+
+
+
+
+
+
+

From ff45f2f7894d731f8ad44616b3ea25247ba9fd0e Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sat, 24 Aug 2024 13:25:42 -0800
Subject: [PATCH 08/34] added zoho crm source

---
 vector_etl/source_mods/__init__.py        |   4 +-
 vector_etl/source_mods/zoho_crm_loader.py | 153 ++++++++++++++++++++++
 2 files changed, 156 insertions(+), 1 deletion(-)
 create mode 100644 vector_etl/source_mods/zoho_crm_loader.py

diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 8206443..3014119 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -12,7 +12,7 @@
 from .airtable_loader import AirTableSource
 from .google_bigquery import GoogleBigQuerySource
 from .hubspot_loader import HubSpotSource
-
+from .zoho_crm_loader import ZohoCrmSource
 def get_source_class(config):
     source_type = config['source_data_type']
     if source_type == 'Amazon S3':
@@ -39,5 +39,7 @@ def get_source_class(config):
         return GoogleBigQuerySource(config)
     elif source_type == 'HubSpot':
         return HubSpotSource(config)
+    elif source_type == 'Zoho Crm':
+        return ZohoCrmSource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")
diff --git a/vector_etl/source_mods/zoho_crm_loader.py b/vector_etl/source_mods/zoho_crm_loader.py
new file mode 100644
index 0000000..a768066
--- /dev/null
+++ b/vector_etl/source_mods/zoho_crm_loader.py
@@ -0,0 +1,153 @@
+from base import BaseSource
+import pandas as pd
+import requests
+import logging
+from pprint import pprint
+import os
+import json
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+
+
+
+class ZohoCrmSource(BaseSource):
+    def __init__(self,config):
+        self.config = config
+        self.token = None
+        self.url = None
+        self.grant_type = self.config['grant_type']
+        self.client_id = self.config['client_id']
+        self.client_secret = self.config['client_secret']
+        self.code = self.config['code']
+        self.accounts_url = self.config['accounts_url']
+        
+        
+    def flatten_dict(self, d, parent_key='', sep='_'):
+
+        items = []
+        for k, v in d.items():
+            new_key = f"{parent_key}{sep}{k}" if parent_key else k
+            if isinstance(v, dict):
+                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
+            else:
+                items.append((new_key, v))
+        return dict(items)
+        
+        
+    def connect(self):
+        
+        data = {
+            "grant_type":self.grant_type,
+            "client_id": self.client_id,
+            "client_secret": self.client_secret,
+            "code": self.code
+        }
+        try:
+            if os.path.exists("token.json"):
+                with open("token.json",'r') as token_file:
+                    token_data = json.load(token_file)
+                    self.token = token_data.get("access_token")
+                    return self.token
+            else:  
+                response = requests.post(url=self.accounts_url, data=data)
+                logger.info(f"Status {response.status_code}")
+                with open("token.json", 'w') as token_file:
+                    json.dump({"access_token": response.json()["access_token"]}, token_file)
+   
+                logger.info("New token fetched and saved.")
+                tokens = response.json()["access_token"]
+                return tokens
+        except requests.exceptions.HTTPError as http_err:
+            logger.error(f"connection Error {http_err}")
+            
+        
+        
+    def fetch_data(self):
+        
+        self.token = self.connect()
+        if self.config['records'] == 'module.Contacts':
+            logger.info("Contact \n")
+            self.url = f"""https://www.zohoapis.com/crm/v5/Contacts?fields=Acount_Name,
+            First_Name,Lead_Source,Home,Fax,Skype_ID,Asst_Phone,Phone,
+            Title,Department,Twitter,Last_Name,Contact_Name,Phone,Email,Reporting_To,
+            Mailing_Street,Mailing_City,Mailing_State,Mailing_Zip,Mailing_Country,
+            Description,Contact_Owner,Lead_Source,Date_of_Birth,Contact_Image  
+            &converted=true&per_page={self.config['per_page']}"""
+            
+        elif self.config['records'] == 'module.Accounts':
+            logger.info("Accounts \n")
+            self.url = f"""https://www.zohoapis.com/crm/v5/Accounts?fields=Account_Owner,Account_Name,Account_Site,Parent_Account,
+            Account_Number,Account_Type,Industry,Annual_Revenue,Rating,Phone,Fax,Website,Ticker_Symbol,OwnerShip,Employees,Sic_Code,
+            Billing_Street,Billing_City,Billing_State,Billing_Code,Billing_Country,Shipping_Street,Shipping_City,Shipping_State,Shipping_Code,
+            Shipping_Country,Description 
+            &converted=true&per_page={self.config['per_page']}"""
+            
+       
+            
+            
+        elif self.config['records'] == 'module.Leads':
+            logger.info("Leads \n")
+            self.url = f"""https://www.zohoapis.com/crm/v5/Leads?fields=Lead_Owner,First_Name,Title,Mobile,Lead_Source,
+            Industry,Annual_Revenue,Company,Last_Name,Email,Fax,Website,Lead_Status,Rating,Skype_ID,
+            Description,Twitter,City,Street,State,Country,Zip_Code,No_of_Employees 
+            &converted=true&per_page={self.config['per_page']}"""
+            
+        elif self.config['records'] == 'module.Deals':
+            logger.info("Deals \n")
+            self.url = f"""https://www.zohoapis.com/crm/v5/Deals?fields=Deal_Owner,Deal_Name,Account_Name,
+            Type,Next_Step,Lead_Source,Contact_Name,Amount,Closing_Date,Stage,Probability,Expected_Revenue,
+            Campaign_Source,Description 
+            &converted=true&per_page={self.config['per_page']}"""
+        
+        
+            
+        elif self.config['records'] == 'module.Campaigns':
+            logger.info("Campaigns \n")
+            self.url = f"""https://www.zohoapis.com/crm/v5/Campaigns?fields=Campaign_Owner,Campaign_Name,Start_Date,
+            Expected_Revenue,Actual_Cost,Number_sent,Type,Status,End_Date,Budgeted_Cost,Expected_Response,Description
+            &converted=true&per_page={self.config['per_page']}"""
+            
+        
+        
+            
+        elif self.config['records'] == 'module.Tasks':
+            logger.info("Tasks \n")
+            self.url = f"""https://www.zohoapis.com/crm/v5/Tasks?fields=Task_Owner,Subject,Due_Date,Contact,Deal,Status,Priority,Reminder,
+            Repeat,Description 
+            &converted=true&per_page={self.config['per_page']}"""
+            
+       
+    
+        elif self.config['records'] == 'module.Calls':
+            logger.info("Calls \n")
+            self.url = f"""https://www.zohoapis.com/crm/v5/Calls?fields=Call_To,Related_To,Call_Type,Outgoing_Call_Status,
+            Call_Start_Time,Call_Owner,Subject,Created_By,Modified_By,Call_Purpose,Call_Agenda&converted=true&per_page={self.config['per_page']}"""
+            
+            
+        elif self.config['records'] == 'module':
+            logger.info("Calls \n")
+            self.url = "https://www.zohoapis.com/crm/v5/settings/modules,"
+            
+        headers = {"Authorization":f"Zoho-oauthtoken {self.token}"}
+        
+        response = requests.get(url=self.url,headers=headers).json()['data']
+        
+        flattened_data = [self.flatten_dict(item) for item in response]
+        
+                   
+        df  = pd.DataFrame(flattened_data )
+        
+        logger.info(f" data \n {df}")
+        
+        return df
+        
+        
+        
+
+
+
+

From fb47e0255df9fd495b52383b7688840b5e5e85aa Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sat, 24 Aug 2024 20:31:41 -0800
Subject: [PATCH 09/34] added zoho desk

---
 vector_etl/source_mods/__init__.py         |   3 +
 vector_etl/source_mods/zoho_crm_loader.py  |  11 +-
 vector_etl/source_mods/zoho_desk_loader.py | 130 +++++++++++++++++++++
 3 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 vector_etl/source_mods/zoho_desk_loader.py

diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 3014119..399f4f2 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -13,6 +13,7 @@
 from .google_bigquery import GoogleBigQuerySource
 from .hubspot_loader import HubSpotSource
 from .zoho_crm_loader import ZohoCrmSource
+from .zoho_desk_loader import ZohoDeskSource
 def get_source_class(config):
     source_type = config['source_data_type']
     if source_type == 'Amazon S3':
@@ -41,5 +42,7 @@ def get_source_class(config):
         return HubSpotSource(config)
     elif source_type == 'Zoho Crm':
         return ZohoCrmSource(config)
+    elif source_type == 'Zoho Desk':
+        return ZohoDeskSource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")
diff --git a/vector_etl/source_mods/zoho_crm_loader.py b/vector_etl/source_mods/zoho_crm_loader.py
index a768066..b06c4db 100644
--- a/vector_etl/source_mods/zoho_crm_loader.py
+++ b/vector_etl/source_mods/zoho_crm_loader.py
@@ -1,4 +1,4 @@
-from base import BaseSource
+from .base import BaseSource
 import pandas as pd
 import requests
 import logging
@@ -147,6 +147,15 @@ def fetch_data(self):
         
         
         
+        
+        
+    
+        
+        
+        
+
+
+
 
 
 
diff --git a/vector_etl/source_mods/zoho_desk_loader.py b/vector_etl/source_mods/zoho_desk_loader.py
new file mode 100644
index 0000000..f19cc54
--- /dev/null
+++ b/vector_etl/source_mods/zoho_desk_loader.py
@@ -0,0 +1,130 @@
+from .base import BaseSource
+import pandas as pd
+import requests
+import logging
+from pprint import pprint
+import os
+import json
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class ZohoDeskSource(BaseSource):
+    def __init__(self,config):
+        self.config = config
+        self.token = None
+        self.url = None
+        self.grant_type = self.config['grant_type']
+        self.client_id = self.config['client_id']
+        self.client_secret = self.config['client_secret']
+        self.code = self.config['code']
+        self.accounts_url = self.config['accounts_url']
+        
+        
+    def flatten_dict(self, d, parent_key='', sep='_'):
+
+        items = []
+        for k, v in d.items():
+            new_key = f"{parent_key}{sep}{k}" if parent_key else k
+            if isinstance(v, dict):
+                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
+            else:
+                items.append((new_key, v))
+        return dict(items)
+        
+        
+    def connect(self):
+        
+        data = {
+            "grant_type":self.grant_type,
+            "client_id": self.client_id,
+            "client_secret": self.client_secret,
+            "code": self.code
+        }
+        try:
+            if os.path.exists("token.json"):
+                with open("token.json",'r') as token_file:
+                    token_data = json.load(token_file)
+                    self.token = token_data.get("access_token")
+                    return self.token
+            else:  
+                response = requests.post(url=self.accounts_url, data=data)
+                logger.info(f"Status {response.status_code}")
+                with open("token.json", 'w') as token_file:
+                    json.dump({"access_token": response.json()["access_token"]}, token_file)
+   
+                logger.info("New token fetched and saved.")
+                tokens = response.json()["access_token"]
+                return tokens
+        except requests.exceptions.HTTPError as http_err:
+            logger.error(f"connection Error {http_err}")
+            
+        
+        
+    def fetch_data(self):
+        
+        self.token = self.connect()
+        if self.config['records'] == 'desk.agents':
+            logger.info("Agents \n")
+            self.url = f"https://desk.zoho.com/api/v1/agents?limit={self.config['limit']}"
+            
+            
+        elif self.config['records'] == 'desk.team':
+            logger.info("Teams \n")
+            self.url = f"https://desk.zoho.com/api/v1/teams"
+            headers = {"Authorization":f"Zoho-oauthtoken {self.token}"}
+        
+            response = requests.get(url=self.url,headers=headers).json()['teams']
+            
+            flattened_data = [self.flatten_dict(item) for item in response]
+            
+                    
+            df  = pd.DataFrame(flattened_data )
+            
+            logger.info(f" data \n {df}")
+            
+            return df
+            
+        
+             
+        elif self.config['records'] == 'desk.ticket':
+            logger.info("Ticket \n")
+            self.url = f"""https://desk.zoho.com/api/v1/tickets?include=contacts,
+            assignee,departments,team,isRead&limit={self.config['limit']}"""
+            
+        
+        elif self.config['records'] == 'desk.contacts':
+            logger.info("Contact \n")
+            self.url = f"https://desk.zoho.com/api/v1/contacts?limit={self.config['limit']}"
+            
+           
+        try:  
+            headers = {"Authorization":f"Zoho-oauthtoken {self.token}"}
+            
+            response = requests.get(url=self.url,headers=headers)
+            
+            flattened_data = [self.flatten_dict(item) for item in response]
+            
+                    
+            df  = pd.DataFrame(flattened_data )
+            
+            logger.info(f" data \n {df}")
+            
+            return df
+        except requests.exceptions.HTTPError as http_err:
+            logger.error(f"HTTP error occurred: {http_err}")            
+       
+        
+        
+        
+
+
+
+
+
+
+
+
+

From 07775e62e1cbe117e8518e29f0fc959ffcc05f40 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Wed, 28 Aug 2024 10:30:34 -0800
Subject: [PATCH 10/34] added intercom source

---
 vector_etl/source_mods/__init__.py         |   3 +
 vector_etl/source_mods/intercom_loader.py  | 131 +++++++++++++++++++++
 vector_etl/source_mods/zoho_desk_loader.py |   2 +-
 3 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 vector_etl/source_mods/intercom_loader.py

diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 399f4f2..731988c 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -14,6 +14,7 @@
 from .hubspot_loader import HubSpotSource
 from .zoho_crm_loader import ZohoCrmSource
 from .zoho_desk_loader import ZohoDeskSource
+from . intercom_loader import InterComSource
 def get_source_class(config):
     source_type = config['source_data_type']
     if source_type == 'Amazon S3':
@@ -44,5 +45,7 @@ def get_source_class(config):
         return ZohoCrmSource(config)
     elif source_type == 'Zoho Desk':
         return ZohoDeskSource(config)
+    elif source_type == "InterCom":
+        return InterComSource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")
diff --git a/vector_etl/source_mods/intercom_loader.py b/vector_etl/source_mods/intercom_loader.py
new file mode 100644
index 0000000..b771a09
--- /dev/null
+++ b/vector_etl/source_mods/intercom_loader.py
@@ -0,0 +1,131 @@
+from base import BaseSource
+import requests
+from pprint import pprint
+import logging
+import pandas as pd
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class InterComSource(BaseSource):
+    def __init__(self,config):
+        self.config = config
+        self.url = None
+        self.token = self.config['token']
+        
+        
+    def flatten_dict(self, d, parent_key='', sep='_'):
+        items = []
+        for k, v in d.items():
+            new_key = f"{parent_key}{sep}{k}" if parent_key else k
+            if isinstance(v, dict):
+                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
+            elif isinstance(v, list):
+                for i, item in enumerate(v):
+                    if isinstance(item, dict):
+                        items.extend(self.flatten_dict(item, f"{new_key}{sep}{i}", sep=sep).items())
+                    else:
+                        items.append((f"{new_key}{sep}{i}", item))
+            else:
+                items.append((new_key, v))
+        return dict(items)
+        
+        
+    def connect(self,url):
+        headers = {"Authorization": f"Bearer {self.token}",
+           "Content-type":"application/json",
+           "Intercom-Version":"2.11"}
+        response = requests.get(url=url,headers=headers)
+        
+        return response
+        
+        
+    def fetch_data(self):
+        
+        if self.config['records'] == 'intercom.articles':
+            logger.info(" articles \n")
+            self.url = f"https://api.intercom.io/articles"
+            
+            response = self.connect(self.url).json()['data']
+        
+           
+        if self.config['records'] == 'intercom.companies.scroll':
+            logger.info(" Companies \n")
+            self.url = f"https://api.intercom.io/companies/scroll"
+            
+            response = self.connect(self.url).json()['data']
+            
+        
+        if self.config['records'] == 'intercom.contacts':
+            logger.info(" Contacts \n")
+            self.url = f"https://api.intercom.io/contacts"
+            
+            response = self.connect(self.url).json()['data']
+            
+            
+            
+        if self.config['records'] == 'intercom.conversations':
+            logger.info(" conversations \n")
+            self.url = f"https://api.intercom.io/conversations"
+            
+            response = self.connect(self.url).json()['conversations']
+            
+            
+            
+        if self.config['records'] == 'intercom.collections':
+            logger.info(" collection \n")
+            self.url = f"https://api.intercom.io/help_center/collections"
+            
+            response = self.connect(self.url).json()['data']
+            
+            
+        if self.config['records'] == 'intercom.news_items':
+            logger.info(" news items \n")
+            self.url = f"https://api.intercom.io/news/news_items"
+            
+            response = self.connect(self.url).json()['data']
+            
+        
+        if self.config['records'] == 'intercom.segments':
+            logger.info(" segments \n")
+            self.url = f"https://api.intercom.io/segments"
+            
+            response = self.connect(self.url).json()['segments']
+            
+        
+         
+        if self.config['records'] == 'intercom.subscription_types':
+            logger.info(" subscription_types \n")
+            self.url = f"https://api.intercom.io/subscription_types"
+            
+            response = self.connect(self.url).json()['data']
+            
+            
+        if self.config['records'] == 'intercom.teams':
+            logger.info(" Teams \n")
+            self.url = f"https://api.intercom.io/teams"
+            
+            response = self.connect(self.url).json()['teams']
+            
+        if self.config['records'] == 'intercom.ticket_types':
+            logger.info(" ticket_types \n")
+            self.url = f"https://api.intercom.io/ticket_types"
+            
+            response = self.connect(self.url).json()['data']
+            
+        
+        try:    
+            flattened_data = [self.flatten_dict(item) for item in response]
+            pprint(flattened_data,indent=4)
+              
+            df  = pd.DataFrame(flattened_data )
+            
+            logger.info(f" data \n {df}")
+            
+            return df
+        except requests.exceptions.HTTPError as http_err:
+            logger.error(f"HTTP error occurred: {http_err}")   
+            
+
+
+
diff --git a/vector_etl/source_mods/zoho_desk_loader.py b/vector_etl/source_mods/zoho_desk_loader.py
index f19cc54..6280c42 100644
--- a/vector_etl/source_mods/zoho_desk_loader.py
+++ b/vector_etl/source_mods/zoho_desk_loader.py
@@ -103,7 +103,7 @@ def fetch_data(self):
         try:  
             headers = {"Authorization":f"Zoho-oauthtoken {self.token}"}
             
-            response = requests.get(url=self.url,headers=headers)
+            response = requests.get(url=self.url,headers=headers)['data']
             
             flattened_data = [self.flatten_dict(item) for item in response]
             

From 9005c5bdd09261d9a408d1c8481f79a4ec98f9b2 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Wed, 28 Aug 2024 16:14:03 -0800
Subject: [PATCH 11/34] added paystack source

---
 vector_etl/source_mods/__init__.py        |   5 +-
 vector_etl/source_mods/intercom_loader.py |   2 +-
 vector_etl/source_mods/paystack_loader.py | 112 ++++++++++++++++++++++
 3 files changed, 117 insertions(+), 2 deletions(-)
 create mode 100644 vector_etl/source_mods/paystack_loader.py

diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 731988c..c2af60e 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -14,7 +14,8 @@
 from .hubspot_loader import HubSpotSource
 from .zoho_crm_loader import ZohoCrmSource
 from .zoho_desk_loader import ZohoDeskSource
-from . intercom_loader import InterComSource
+from .intercom_loader import InterComSource
+from .paystack_loader import PayStackSource
 def get_source_class(config):
     source_type = config['source_data_type']
     if source_type == 'Amazon S3':
@@ -47,5 +48,7 @@ def get_source_class(config):
         return ZohoDeskSource(config)
     elif source_type == "InterCom":
         return InterComSource(config)
+    elif  source_type == 'PayStackS':
+        return PayStackSource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")
diff --git a/vector_etl/source_mods/intercom_loader.py b/vector_etl/source_mods/intercom_loader.py
index b771a09..745e951 100644
--- a/vector_etl/source_mods/intercom_loader.py
+++ b/vector_etl/source_mods/intercom_loader.py
@@ -1,4 +1,4 @@
-from base import BaseSource
+from .base import BaseSource
 import requests
 from pprint import pprint
 import logging
diff --git a/vector_etl/source_mods/paystack_loader.py b/vector_etl/source_mods/paystack_loader.py
new file mode 100644
index 0000000..3900090
--- /dev/null
+++ b/vector_etl/source_mods/paystack_loader.py
@@ -0,0 +1,112 @@
+from .base import BaseSource
+import requests
+from pprint import pprint
+import logging
+import pandas as pd
+
+from paystackapi.paystack import Paystack
+
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class PayStackSource(BaseSource):
+    def __init__(self,config):
+        self.config = config
+        self.paystack_secret_key = self.config['token']
+        
+        
+    def flatten_dict(self, d, parent_key='', sep='_'):
+        items = []
+        for k, v in d.items():
+            new_key = f"{parent_key}{sep}{k}" if parent_key else k
+            if isinstance(v, dict):
+                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
+            elif isinstance(v, list):
+                for i, item in enumerate(v):
+                    if isinstance(item, dict):
+                        items.extend(self.flatten_dict(item, f"{new_key}{sep}{i}", sep=sep).items())
+                    else:
+                        items.append((f"{new_key}{sep}{i}", item))
+            else:
+                items.append((new_key, v))
+        return dict(items)
+        
+        
+    def connect(self):
+        
+        paystack = Paystack(secret_key=self.paystack_secret_key)
+      
+        return paystack
+        
+        
+    def fetch_data(self):
+        
+        if self.config['records'] == 'paystack.transactions':
+            logger.info(" Transactions \n")
+            response = self.connect().transaction.list()['data']
+            
+        elif self.config['records'] == 'paystack.transactions.split':
+            logger.info(" Transactions split \n")
+            response = self.connect().transactionSplit.list()['data']
+            
+        elif self.config['records'] == 'paystack.invoice':
+            logger.info(" invoice \n")
+            response = self.connect().invoice.list()['data']
+        
+        elif self.config['records'] == 'paystack.product':
+            logger.info(" product  \n")
+            response = self.connect().product.list()['data']
+        
+        elif self.config['records'] == 'paystack.customer':
+            logger.info(" customer \n")
+            response = self.connect().customer.list()['data']
+        
+        elif self.config['records'] == 'paystack.plan':
+            logger.info(" plan \n")
+            response = self.connect().plan.list()['data']
+        
+        elif self.config['records'] == 'paystack.subaccount':
+            logger.info(" subaccount \n")
+            response = self.connect().subaccount().list()['data']
+        
+            
+        elif self.config['records'] == 'paystack.subscription':
+            logger.info(" subaccount \n")
+            response = self.connect().subscription.list()['data']
+            
+        
+        elif self.config['records'] == 'paystack.transfer':
+            logger.info(" transfer \n")
+            response = self.connect().transfer.list()['data']
+        
+        
+        elif self.config['records'] == 'paystack.bulkcharge':
+            logger.info(" bulkcharge \n")
+            response = self.connect().bulkcharge.list()['data']
+            
+        
+        elif self.config['records'] == 'paystack.refund':
+            logger.info(" refund \n")
+            response = self.connect().refund.list()['data']
+            
+          
+        try:    
+            flattened_data = [self.flatten_dict(item) for item in response]
+            pprint(flattened_data,indent=4)
+              
+            df  = pd.DataFrame(flattened_data )
+            
+            logger.info(f" data \n {df}")
+            
+            return df
+        except requests.exceptions.HTTPError as http_err:
+            logger.error(f"HTTP error occurred: {http_err}")  
+            
+
+
+
+
+
+
+

From 5c8e47b0bcf44ec6a11e6decb505dd601469bcec Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Wed, 28 Aug 2024 18:00:28 -0800
Subject: [PATCH 12/34] added flutterwave source

---
 vector_etl/source_mods/__init__.py           |   3 +
 vector_etl/source_mods/flutterwave_loader.py | 113 +++++++++++++++++++
 vector_etl/source_mods/intercom_loader.py    |  18 +--
 vector_etl/source_mods/paystack_loader.py    |   7 +-
 4 files changed, 127 insertions(+), 14 deletions(-)
 create mode 100644 vector_etl/source_mods/flutterwave_loader.py

diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index c2af60e..285e9fb 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -16,6 +16,7 @@
 from .zoho_desk_loader import ZohoDeskSource
 from .intercom_loader import InterComSource
 from .paystack_loader import PayStackSource
+from .flutterwave_loader import FlutterWaveSource
 def get_source_class(config):
     source_type = config['source_data_type']
     if source_type == 'Amazon S3':
@@ -50,5 +51,7 @@ def get_source_class(config):
         return InterComSource(config)
     elif  source_type == 'PayStackS':
         return PayStackSource(config)
+    elif source_type == "FlutterWave":
+        return FlutterWaveSource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")
diff --git a/vector_etl/source_mods/flutterwave_loader.py b/vector_etl/source_mods/flutterwave_loader.py
new file mode 100644
index 0000000..d49f70a
--- /dev/null
+++ b/vector_etl/source_mods/flutterwave_loader.py
@@ -0,0 +1,113 @@
+from .base import BaseSource
+import requests
+from pprint import pprint
+import logging
+import pandas as pd
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class FlutterWaveSource(BaseSource):
+    def __init__(self,config):
+        self.config = config
+        self.url = None
+        self.secret_key = self.config['secret_key']
+        
+        
+    def flatten_dict(self, d, parent_key='', sep='_'):
+        items = []
+        for k, v in d.items():
+            new_key = f"{parent_key}{sep}{k}" if parent_key else k
+            if isinstance(v, dict):
+                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
+            elif isinstance(v, list):
+                for i, item in enumerate(v):
+                    if isinstance(item, dict):
+                        items.extend(self.flatten_dict(item, f"{new_key}{sep}{i}", sep=sep).items())
+                    else:
+                        items.append((f"{new_key}{sep}{i}", item))
+            else:
+                items.append((new_key, v))
+        return dict(items)
+        
+        
+    def connect(self,url):
+        headers = {"Authorization": f"Bearer {self.secret_key}",
+           "Content-type":"application/json",
+           "Intercom-Version":"2.11"}
+        response = requests.get(url=url,headers=headers)
+        
+        return response
+        
+        
+    def fetch_data(self):
+        
+        if self.config['records'] == 'flutterwave.transfers':
+            logger.info(" Transfers \n")
+            self.url = f"https://api.flutterwave.com/v3/transfers"
+            
+            response = self.connect(self.url).json()['data']
+        
+        elif self.config['records'] == 'flutterwave.transactions':
+            logger.info(" transactions \n")
+            self.url = f"https://api.flutterwave.com/v3/transactions"
+            
+            response = self.connect(self.url).json()['data']
+            
+        
+        elif self.config['records'] == 'flutterwave.beneficiaries':
+            logger.info(" Transfers \n")
+            self.url = f"https://api.flutterwave.com/v3/beneficiaries"
+            
+            response = self.connect(self.url).json()['data']
+            
+        
+        elif self.config['records'] == 'flutterwave.subaccounts':
+            logger.info(" subaccounts \n")
+            self.url = f"https://api.flutterwave.com/v3/subaccounts"
+            
+            response = self.connect(self.url).json()['data']
+            
+        elif self.config['records'] == 'flutterwave.payout-subaccounts':
+            logger.info(" payout-subaccounts \n")
+            self.url = f"https://api.flutterwave.com/v3/payout-subaccounts"
+            
+            response = self.connect(self.url).json()['data']
+            
+        elif self.config['records'] == 'flutterwave.subscriptions':
+            logger.info(" subscriptions \n")
+            self.url = f"https://api.flutterwave.com/v3/subscriptions"
+            
+            response = self.connect(self.url).json()['data']
+            
+        
+        elif self.config['records'] == 'flutterwave.payment-plans':
+            logger.info(" payment-plans \n")
+            self.url = f"https://api.flutterwave.com/v3/payment-plans"
+            
+            response = self.connect(self.url).json()['data']
+        
+        
+      
+            
+        
+        try:    
+            flattened_data = [self.flatten_dict(item) for item in response]
+            pprint(flattened_data,indent=4)
+              
+            df  = pd.DataFrame(flattened_data )
+            
+            logger.info(f" data \n {df}")
+            
+            return df
+        except requests.exceptions.HTTPError as http_err:
+            logger.error(f"HTTP error occurred: {http_err}")   
+            
+            
+            
+
+
+
+
+
+
diff --git a/vector_etl/source_mods/intercom_loader.py b/vector_etl/source_mods/intercom_loader.py
index 745e951..b3a8581 100644
--- a/vector_etl/source_mods/intercom_loader.py
+++ b/vector_etl/source_mods/intercom_loader.py
@@ -49,14 +49,14 @@ def fetch_data(self):
             response = self.connect(self.url).json()['data']
         
            
-        if self.config['records'] == 'intercom.companies.scroll':
+        elif self.config['records'] == 'intercom.companies.scroll':
             logger.info(" Companies \n")
             self.url = f"https://api.intercom.io/companies/scroll"
             
             response = self.connect(self.url).json()['data']
             
         
-        if self.config['records'] == 'intercom.contacts':
+        elif self.config['records'] == 'intercom.contacts':
             logger.info(" Contacts \n")
             self.url = f"https://api.intercom.io/contacts"
             
@@ -64,7 +64,7 @@ def fetch_data(self):
             
             
             
-        if self.config['records'] == 'intercom.conversations':
+        elif self.config['records'] == 'intercom.conversations':
             logger.info(" conversations \n")
             self.url = f"https://api.intercom.io/conversations"
             
@@ -72,21 +72,21 @@ def fetch_data(self):
             
             
             
-        if self.config['records'] == 'intercom.collections':
+        elif self.config['records'] == 'intercom.collections':
             logger.info(" collection \n")
             self.url = f"https://api.intercom.io/help_center/collections"
             
             response = self.connect(self.url).json()['data']
             
             
-        if self.config['records'] == 'intercom.news_items':
+        elif self.config['records'] == 'intercom.news_items':
             logger.info(" news items \n")
             self.url = f"https://api.intercom.io/news/news_items"
             
             response = self.connect(self.url).json()['data']
             
         
-        if self.config['records'] == 'intercom.segments':
+        elif self.config['records'] == 'intercom.segments':
             logger.info(" segments \n")
             self.url = f"https://api.intercom.io/segments"
             
@@ -94,20 +94,20 @@ def fetch_data(self):
             
         
          
-        if self.config['records'] == 'intercom.subscription_types':
+        elif self.config['records'] == 'intercom.subscription_types':
             logger.info(" subscription_types \n")
             self.url = f"https://api.intercom.io/subscription_types"
             
             response = self.connect(self.url).json()['data']
             
             
-        if self.config['records'] == 'intercom.teams':
+        elif self.config['records'] == 'intercom.teams':
             logger.info(" Teams \n")
             self.url = f"https://api.intercom.io/teams"
             
             response = self.connect(self.url).json()['teams']
             
-        if self.config['records'] == 'intercom.ticket_types':
+        elif self.config['records'] == 'intercom.ticket_types':
             logger.info(" ticket_types \n")
             self.url = f"https://api.intercom.io/ticket_types"
             
diff --git a/vector_etl/source_mods/paystack_loader.py b/vector_etl/source_mods/paystack_loader.py
index 3900090..0fd7b8a 100644
--- a/vector_etl/source_mods/paystack_loader.py
+++ b/vector_etl/source_mods/paystack_loader.py
@@ -1,6 +1,4 @@
 from .base import BaseSource
-import requests
-from pprint import pprint
 import logging
 import pandas as pd
 
@@ -13,7 +11,7 @@
 class PayStackSource(BaseSource):
     def __init__(self,config):
         self.config = config
-        self.paystack_secret_key = self.config['token']
+        self.paystack_secret_key = self.config['paystack_secret_key']
         
         
     def flatten_dict(self, d, parent_key='', sep='_'):
@@ -93,14 +91,13 @@ def fetch_data(self):
           
         try:    
             flattened_data = [self.flatten_dict(item) for item in response]
-            pprint(flattened_data,indent=4)
               
             df  = pd.DataFrame(flattened_data )
             
             logger.info(f" data \n {df}")
             
             return df
-        except requests.exceptions.HTTPError as http_err:
+        except Exception as http_err:
             logger.error(f"HTTP error occurred: {http_err}")  
             
 

From cc0bf928723e94f4c53622a298590787a32ae69c Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Fri, 30 Aug 2024 06:11:21 -0800
Subject: [PATCH 13/34] added sources flutterwave,hubspot,zoho,paystack

---
 vector_etl/source_mods/__init__.py         |  4 ++--
 vector_etl/source_mods/google_bigquery.py  |  5 +----
 vector_etl/source_mods/hubspot_loader.py   |  2 +-
 vector_etl/source_mods/zoho_desk_loader.py | 16 +---------------
 4 files changed, 5 insertions(+), 22 deletions(-)

diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 285e9fb..96a4b1e 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -43,9 +43,9 @@ def get_source_class(config):
         return GoogleBigQuerySource(config)
     elif source_type == 'HubSpot':
         return HubSpotSource(config)
-    elif source_type == 'Zoho Crm':
+    elif source_type == 'ZohoCrm':
         return ZohoCrmSource(config)
-    elif source_type == 'Zoho Desk':
+    elif source_type == 'ZohoDesk':
         return ZohoDeskSource(config)
     elif source_type == "InterCom":
         return InterComSource(config)
diff --git a/vector_etl/source_mods/google_bigquery.py b/vector_etl/source_mods/google_bigquery.py
index 3a93a99..51e979d 100644
--- a/vector_etl/source_mods/google_bigquery.py
+++ b/vector_etl/source_mods/google_bigquery.py
@@ -41,8 +41,5 @@ def fetch_data(self):
           }
 
 
-data = GoogleBigQuerySource(config) 
-print(type(data.fetch_data()))
-print(data.fetch_data())    
-           
+
 
diff --git a/vector_etl/source_mods/hubspot_loader.py b/vector_etl/source_mods/hubspot_loader.py
index 5b7d381..ca93421 100644
--- a/vector_etl/source_mods/hubspot_loader.py
+++ b/vector_etl/source_mods/hubspot_loader.py
@@ -43,7 +43,7 @@ def fetch_data(self):
            self.endpoints = f"https://api.hubapi.com/crm/v3/objects/deals?limit={self.config['limit']}&archived={self.config['archive']}"
            logger.info(f"Deals \n")
            
-       elif  self.config['crm_object'] == "crm.products":
+       elif  self.config['crm_object'] == "crm_object":
            self.endpoints = f"https://api.hubapi.com/crm/v3/objects/products?limit={self.config['limit']}&archived={self.config['archive']}"
            logger.info(f"products \n")
            
diff --git a/vector_etl/source_mods/zoho_desk_loader.py b/vector_etl/source_mods/zoho_desk_loader.py
index 6280c42..9a083b8 100644
--- a/vector_etl/source_mods/zoho_desk_loader.py
+++ b/vector_etl/source_mods/zoho_desk_loader.py
@@ -100,21 +100,7 @@ def fetch_data(self):
             self.url = f"https://desk.zoho.com/api/v1/contacts?limit={self.config['limit']}"
             
            
-        try:  
-            headers = {"Authorization":f"Zoho-oauthtoken {self.token}"}
-            
-            response = requests.get(url=self.url,headers=headers)['data']
-            
-            flattened_data = [self.flatten_dict(item) for item in response]
-            
-                    
-            df  = pd.DataFrame(flattened_data )
-            
-            logger.info(f" data \n {df}")
-            
-            return df
-        except requests.exceptions.HTTPError as http_err:
-            logger.error(f"HTTP error occurred: {http_err}")            
+                  
        
         
         

From 2d6f4ec03c9762fbab30406ca7f596a1e1d6eaac Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Fri, 30 Aug 2024 06:12:55 -0800
Subject: [PATCH 14/34] added sources flutterwave,hubspot,zoho,paystack

---
 .gitignore                            |   3 +
 examples/flutterwave_to_pinecone.yaml |  20 ++++
 examples/hubspot_to_pinecone.yaml     |  23 +++++
 examples/intercom_to_pinecone.yaml    |  20 ++++
 examples/paystack_to_pincone.yaml     |  20 ++++
 examples/zohocrm_to_pinecone.yaml     |  25 +++++
 examples/zohodesk_to_pinecone.yaml    |  25 +++++
 tests/test_source_mods.py             | 132 ++++++++++++++++++++++++++
 8 files changed, 268 insertions(+)
 create mode 100644 examples/flutterwave_to_pinecone.yaml
 create mode 100644 examples/hubspot_to_pinecone.yaml
 create mode 100644 examples/intercom_to_pinecone.yaml
 create mode 100644 examples/paystack_to_pincone.yaml
 create mode 100644 examples/zohocrm_to_pinecone.yaml
 create mode 100644 examples/zohodesk_to_pinecone.yaml

diff --git a/.gitignore b/.gitignore
index e41350c..20ebccf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -154,6 +154,9 @@ dmypy.json
 # Cython debug symbols
 cython_debug/
 
+vector_etl/source_mods/google_bigquery.py
+vector_etl/source_mods/paystack_loader.py
+
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
diff --git a/examples/flutterwave_to_pinecone.yaml b/examples/flutterwave_to_pinecone.yaml
new file mode 100644
index 0000000..8e93e83
--- /dev/null
+++ b/examples/flutterwave_to_pinecone.yaml
@@ -0,0 +1,20 @@
+source:
+  source_data_type: "FlutterWave"
+  secret_key: ""
+  records: "flutterwave.payout-subaccounts"
+  
+embedding:
+  embedding_model: "OpenAI"
+  api_key: ""
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: ""
+  index_name: ""
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+
diff --git a/examples/hubspot_to_pinecone.yaml b/examples/hubspot_to_pinecone.yaml
new file mode 100644
index 0000000..aaaff75
--- /dev/null
+++ b/examples/hubspot_to_pinecone.yaml
@@ -0,0 +1,23 @@
+source:
+  source_data_type: "HubSpot"
+  archive: ""
+  limit: ""
+  client_secret: ""
+  access_token: ""
+  crm_object: ""
+  
+embedding:
+  embedding_model: "OpenAI"
+  api_key: ""
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: ""
+  index_name: ""
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+
diff --git a/examples/intercom_to_pinecone.yaml b/examples/intercom_to_pinecone.yaml
new file mode 100644
index 0000000..55195d6
--- /dev/null
+++ b/examples/intercom_to_pinecone.yaml
@@ -0,0 +1,20 @@
+source:
+  source_data_type: "InterCom"
+  token: "FlutterWave"
+  records: "intercom.teams"
+  
+embedding:
+  embedding_model: "OpenAI"
+  api_key: ""
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: ""
+  index_name: ""
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+
diff --git a/examples/paystack_to_pincone.yaml b/examples/paystack_to_pincone.yaml
new file mode 100644
index 0000000..b301a73
--- /dev/null
+++ b/examples/paystack_to_pincone.yaml
@@ -0,0 +1,20 @@
+source:
+  source_data_type: "PayStackS"
+  paystack_secret_key: ""
+  records: "paystack.transactions"
+  
+embedding:
+  embedding_model: "OpenAI"
+  api_key: ""
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: ""
+  index_name: ""
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+
diff --git a/examples/zohocrm_to_pinecone.yaml b/examples/zohocrm_to_pinecone.yaml
new file mode 100644
index 0000000..8d55cb1
--- /dev/null
+++ b/examples/zohocrm_to_pinecone.yaml
@@ -0,0 +1,25 @@
+source:
+  source_data_type: "ZohoCrm"
+  grant_type: ""
+  client_id: ""
+  client_secret: ""
+  code: ""
+  per_page: ""
+  records: ""
+  accounts_url: ""
+  
+embedding:
+  embedding_model: "OpenAI"
+  api_key: ""
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: ""
+  index_name: ""
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+
diff --git a/examples/zohodesk_to_pinecone.yaml b/examples/zohodesk_to_pinecone.yaml
new file mode 100644
index 0000000..8d408b3
--- /dev/null
+++ b/examples/zohodesk_to_pinecone.yaml
@@ -0,0 +1,25 @@
+source:
+  source_data_type: "ZohoDesk"
+  grant_type: ""
+  client_id: ""
+  client_secret: ""
+  code: ""
+  per_page: ""
+  records: ""
+  accounts_url: ""
+  
+embedding:
+  embedding_model: "OpenAI"
+  api_key: ""
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: ""
+  index_name: ""
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+
diff --git a/tests/test_source_mods.py b/tests/test_source_mods.py
index c83dfe5..0b74b4c 100644
--- a/tests/test_source_mods.py
+++ b/tests/test_source_mods.py
@@ -7,6 +7,12 @@
 from vector_etl.source_mods.local_file import LocalFileSource
 from vector_etl.source_mods.google_bigquery import GoogleBigQuerySource
 from vector_etl.source_mods.airtable_loader import AirTableSource
+from vector_etl.source_mods.hubspot_loader import HubSpotSource
+from vector_etl.source_mods.intercom_loader import InterComSource
+from vector_etl.source_mods.paystack_loader import PayStackSource
+from vector_etl.source_mods.zoho_crm_loader import ZohoCrmSource
+from vector_etl.source_mods.zoho_desk_loader import ZohoDeskSource
+from vector_etl.source_mods.flutterwave_loader import FlutterWaveSource
 
 @pytest.fixture
 def s3_config():
@@ -39,6 +45,66 @@ def airtable_config():
     }
     
     
+@pytest.fixture
+def zohodesk_config():
+    return{
+            "grant_type":"",
+            "client_id": "",
+            "client_secret": "",
+            "code": "",
+            "limit":"",
+            "records":"desk.team",
+            "accounts_url":""
+        }
+
+
+@pytest.fixture
+def zohocrm_config():
+    return{
+            "grant_type":"",
+            "client_id": "",
+            "client_secret": "",
+            "code": "",
+            "per_page":"10",
+            "records":"module.Call",
+            "accounts_url":""
+        }
+    
+
+@pytest.fixture
+def hubspot_config():
+    return{
+            "archive":"",
+            "limit": "",
+            "access_token": "",
+            "crm_object":"crm_object",
+        }
+    
+
+@pytest.fixture
+def paystack_config():
+    return{
+            "paystack_secret_key":"",
+            "records": "paystack.transactions",
+        }
+    
+
+
+@pytest.fixture
+def flutterwave_config():
+    return{
+            "secret_key":"",
+            "records": "flutterwave.payout-subaccounts",
+        }
+    
+@pytest.fixture
+def intercom_config():
+    return{
+            "token":"",
+            "records": "intercom.teams",
+        }
+    
+    
 @pytest.fixture
 def db_config():
     return {
@@ -175,5 +241,71 @@ def test_airtable_fetch_data(airtable_config):
           df = source.fetch_data()
 
           assert isinstance(df, pd.DataFrame)
+          
+
+
+
+def test_zohodesk_fetch_data(zohodesk_config):
+      with patch('requests.get') as  mock_connect:
+          mock_connect.return_value = [ {
+            "Address": "333 Post St",
+            "Name": "Union Square",
+            "Visited": True
+        }
+        ]
+          
+          source =  ZohoDeskSource(db_config)
+          df = source.fetch_data()
+
+          assert isinstance(df, pd.DataFrame)
+          
+
+
+
+def test_zohocrm_fetch_data(zohocrm_config):
+      with patch('requests.get') as  mock_connect:
+          mock_connect.return_value = [ {  }
+        ]
+          
+          source =  ZohoCrmSource(db_config)
+          df = source.fetch_data()
+
+          assert isinstance(df, pd.DataFrame)
+
+
+
+
+def test_paystack_fetch_data(paystack_config):
+      with patch('Paystack') as  mock_connect:
+          mock_connect.return_value = [{}]
+          
+          source =  PayStackSource(db_config)
+          df = source.fetch_data()
+
+          assert isinstance(df, pd.DataFrame)
+          
+
+
+
+def test_intercom_fetch_data(intercom_config):
+      with patch('requests.get') as  mock_connect:
+          mock_connect.return_value = [{}]
+          
+          source =  InterComSource(db_config)
+          df = source.fetch_data()
+
+          assert isinstance(df, pd.DataFrame)
+
+
+
+def test_flutterwave_fetch_data(flutterwave_config):
+      with patch('requests.get') as  mock_connect:
+          mock_connect.return_value = [{}]
+          
+          source =  FlutterWaveSource(db_config)
+          df = source.fetch_data()
+
+          assert isinstance(df, pd.DataFrame)
+
 
 

From 8c4f8fbb650c7e86c65674a4dbd055e3ec07d8d3 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sat, 31 Aug 2024 02:26:12 -0800
Subject: [PATCH 15/34] add 6 sources

---
 .gitignore | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 20ebccf..60993bb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -154,8 +154,7 @@ dmypy.json
 # Cython debug symbols
 cython_debug/
 
-vector_etl/source_mods/google_bigquery.py
-vector_etl/source_mods/paystack_loader.py
+
 
 # PyCharm
 #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can

From 86764ebbe929f4a83e8b4ea5173a70f12c89dfd3 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sun, 1 Sep 2024 06:15:15 -0800
Subject: [PATCH 16/34] added gmail source

---
 vector_etl/source_mods/__init__.py     |   3 +
 vector_etl/source_mods/gmail_loader.py | 102 +++++++++++++++++++++++++
 2 files changed, 105 insertions(+)
 create mode 100644 vector_etl/source_mods/gmail_loader.py

diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 96a4b1e..c2cbf14 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -17,6 +17,7 @@
 from .intercom_loader import InterComSource
 from .paystack_loader import PayStackSource
 from .flutterwave_loader import FlutterWaveSource
+from .gmail_loader import GmailSource
 def get_source_class(config):
     source_type = config['source_data_type']
     if source_type == 'Amazon S3':
@@ -53,5 +54,7 @@ def get_source_class(config):
         return PayStackSource(config)
     elif source_type == "FlutterWave":
         return FlutterWaveSource(config)
+    elif source_type == "Gmail":
+        return GmailSource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")
diff --git a/vector_etl/source_mods/gmail_loader.py b/vector_etl/source_mods/gmail_loader.py
new file mode 100644
index 0000000..9858bd0
--- /dev/null
+++ b/vector_etl/source_mods/gmail_loader.py
@@ -0,0 +1,102 @@
+import os.path
+import base64
+from google.auth.transport.requests import Request
+from google.oauth2.credentials import Credentials
+from google_auth_oauthlib.flow import InstalledAppFlow
+from googleapiclient.discovery import build
+from googleapiclient.errors import HttpError
+from pprint import pprint
+from .base import BaseSource
+import pandas as pd
+
+class GmailSource(BaseSource):
+    def __init__(self, config):
+        self.config = config
+        self.SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
+
+    def connect(self):
+        creds = None
+        if os.path.exists("token.json"):
+            creds = Credentials.from_authorized_user_file("token.json", self.SCOPES)
+        if not creds or not creds.valid:
+            if creds and creds.expired and creds.refresh_token:
+                creds.refresh(Request())
+            else:
+                flow = InstalledAppFlow.from_client_secrets_file(
+                    self.config['credentials'], self.SCOPES
+                )
+                creds = flow.run_local_server(port=0)
+                with open("token.json", "w") as token:
+                    token.write(creds.to_json())
+        return creds
+
+    def fetch_data(self):
+        creds = self.connect()
+        service = build("gmail", "v1", credentials=creds)
+
+        # Extract the label from config
+        label = self.config.get('gmail.label').upper()
+
+        messages = self.get_messages(service, label)
+        
+        if messages:
+            email_data = self.parse_messages(service, messages, label)
+            df = pd.DataFrame(email_data)
+            
+            return df
+        else:
+            print("No messages found.")
+            return None
+
+    def get_messages(self, service, label):
+        try:
+            results = service.users().messages().list(userId="me", labelIds=[label]).execute()
+            return results.get("messages", [])
+        except HttpError as error:
+            print(f"An error occurred while fetching messages for label {label}: {error}")
+            return None
+
+    def parse_messages(self, service, messages, label):
+        email_data = {
+            "id": [],
+            "threadId": [],
+            "label": [],
+            "subject": [],
+            "from": [],
+            "snippet": [],
+            "body": [],
+        }
+
+        for message in messages:
+            msg = service.users().messages().get(userId="me", id=message["id"]).execute()
+            headers = msg["payload"]["headers"]
+
+            subject, sender = self._get_header_info(headers)
+            snippet = msg.get("snippet", "")
+            body = self.get_body(msg)
+
+            email_data["id"].append(message["id"])
+            email_data["threadId"].append(message["threadId"])
+            email_data["label"].append(label)
+            email_data["subject"].append(subject)
+            email_data["from"].append(sender)
+            email_data["snippet"].append(snippet)
+            email_data["body"].append(body)
+
+        return email_data
+
+    def get_header_info(self, headers):
+        subject = None
+        sender = None
+        for header in headers:
+            if header["name"] == "Subject":
+                subject = header["value"]
+            if header["name"] == "From":
+                sender = header["value"]
+        return subject, sender
+
+    def get_body(self, msg):
+        if "data" in msg["payload"]["body"]:
+            return base64.urlsafe_b64decode(msg["payload"]["body"]["data"]).decode("utf-8")
+        else:
+            return ""

From 5155ae521a529c79ce313ed511e0c8155753ee59 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sun, 1 Sep 2024 06:17:41 -0800
Subject: [PATCH 17/34] added gmail source

---
 examples/gmail_to_pinecone.yaml |  20 ++++++
 tests/test_source_mods.py       | 106 +++++++++++++++++++++++++++-----
 2 files changed, 110 insertions(+), 16 deletions(-)
 create mode 100644 examples/gmail_to_pinecone.yaml

diff --git a/examples/gmail_to_pinecone.yaml b/examples/gmail_to_pinecone.yaml
new file mode 100644
index 0000000..794db28
--- /dev/null
+++ b/examples/gmail_to_pinecone.yaml
@@ -0,0 +1,20 @@
+source:
+  source_data_type: "Gmail"
+  credentisla: ""
+  gmail.label: 'IMPORTANT'
+  
+embedding:
+  embedding_model: "OpenAI"
+  api_key: ""
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: ""
+  index_name: ""
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+
diff --git a/tests/test_source_mods.py b/tests/test_source_mods.py
index 0b74b4c..58f066d 100644
--- a/tests/test_source_mods.py
+++ b/tests/test_source_mods.py
@@ -13,6 +13,7 @@
 from vector_etl.source_mods.zoho_crm_loader import ZohoCrmSource
 from vector_etl.source_mods.zoho_desk_loader import ZohoDeskSource
 from vector_etl.source_mods.flutterwave_loader import FlutterWaveSource
+from vector_etl.source_mods.gmail_loader import GmailSource
 
 @pytest.fixture
 def s3_config():
@@ -44,6 +45,16 @@ def airtable_config():
         "tableIdOrName":"survey" 
     }
     
+
+@pytest.fixture
+def gmail_config():
+    return {
+        'credentials': 'credentials.json', ## path to gmail crendtials
+        'gmail.label': 'IMPORTANT'  # Specify the label in the config
+    }
+    
+    
+    
     
 @pytest.fixture
 def zohodesk_config():
@@ -203,17 +214,11 @@ def test_google_bigquery_connect(google_bigquery_config):
 def test_google_bigquery_fetch_data(google_bigquery_config):
       with patch('bigquery.connect') as  mock_connect:
           mock_connect.result.to_dataframe.return_value = pd.DataFrame()
-          source =  GoogleBigQuerySource(db_config)
+          source =  GoogleBigQuerySource(google_bigquery_config)
           df = source.fetch_data()
           assert isinstance(df, pd.DataFrame)
 
 
-
-
-          
-          
-
-
 def test_airtable_connect(airtable_config):
     
     with patch('requests.get') as  mock_connect:
@@ -237,12 +242,24 @@ def test_airtable_fetch_data(airtable_config):
         }
         ]
           
-          source =  AirTableSource(db_config)
+          source =  AirTableSource(airtable_config)
           df = source.fetch_data()
 
           assert isinstance(df, pd.DataFrame)
           
-
+          
+def test_zohodesk_connect(zohodesk_config):
+    
+    with patch('requests.get') as  mock_connect:
+        source = ZohoDeskSource(zohodesk_config)
+        source.connect()
+        mock_connect.assert_called_once_with(
+        grant_type="",
+        client_id = "",
+        client_secret="",
+        code="",
+        accounts_url=""
+        ) 
 
 
 def test_zohodesk_fetch_data(zohodesk_config):
@@ -254,12 +271,23 @@ def test_zohodesk_fetch_data(zohodesk_config):
         }
         ]
           
-          source =  ZohoDeskSource(db_config)
+          source =  ZohoDeskSource(zohodesk_config)
           df = source.fetch_data()
 
           assert isinstance(df, pd.DataFrame)
           
-
+def test_zohocrm_connect(zohocrm_config):
+    
+    with patch('requests.get') as  mock_connect:
+        source = ZohoCrmSource(zohocrm_config)
+        source.connect()
+        mock_connect.assert_called_once_with(
+        grant_type="",
+        client_id = "",
+        client_secret="",
+        code="",
+        accounts_url=""
+        ) 
 
 
 def test_zohocrm_fetch_data(zohocrm_config):
@@ -267,42 +295,88 @@ def test_zohocrm_fetch_data(zohocrm_config):
           mock_connect.return_value = [ {  }
         ]
           
-          source =  ZohoCrmSource(db_config)
+          source =  ZohoCrmSource(zohocrm_config)
           df = source.fetch_data()
 
           assert isinstance(df, pd.DataFrame)
 
-
+def test_paystack_connect(paystack_config):
+    
+    with patch('requests.get') as  mock_connect:
+        source = PayStackSource(paystack_config)
+        source.connect()
+        mock_connect.assert_called_once_with(
+        paystack_secret_key="",
+        ) 
 
 
 def test_paystack_fetch_data(paystack_config):
       with patch('Paystack') as  mock_connect:
           mock_connect.return_value = [{}]
           
-          source =  PayStackSource(db_config)
+          source =  PayStackSource(paystack_config)
           df = source.fetch_data()
 
           assert isinstance(df, pd.DataFrame)
           
 
+def test_intercom_connect(intercom_config):
+    
+    with patch('requests.get') as  mock_connect:
+        source = InterComSource(intercom_config)
+        source.connect()
+        mock_connect.assert_called_once_with(
+        secret_key="",
+        )    
 
 
 def test_intercom_fetch_data(intercom_config):
       with patch('requests.get') as  mock_connect:
           mock_connect.return_value = [{}]
           
-          source =  InterComSource(db_config)
+          source =  InterComSource(intercom_config)
           df = source.fetch_data()
 
           assert isinstance(df, pd.DataFrame)
+          
+          
 
+def test_flutterwave_connect(flutterwave_config):
+    
+    with patch('requests.get') as  mock_connect:
+        source =  FlutterWaveSource(flutterwave_config)
+        source.connect()
+        mock_connect.assert_called_once_with(
+        secret_key="",
+        )    
 
 
 def test_flutterwave_fetch_data(flutterwave_config):
       with patch('requests.get') as  mock_connect:
           mock_connect.return_value = [{}]
           
-          source =  FlutterWaveSource(db_config)
+          source =  FlutterWaveSource(flutterwave_config)
+          df = source.fetch_data()
+
+          assert isinstance(df, pd.DataFrame)
+          
+
+
+
+def test_gmail_connect(gmail_config):
+    
+    with patch('InstalledAppFlow.from_client_secrets_file') as  mock_connect:
+        source =  GmailSource(gmail_config)
+        source.connect()
+        mock_connect.assert_called_once_with(
+        credentials="credential.json",
+        )    
+
+
+def test_gmail_fetch_data(gmail_config):
+      with patch('requests.get') as  mock_connect:
+          mock_connect.return_value = [{}]
+          source =  GmailSource(gmail_config)
           df = source.fetch_data()
 
           assert isinstance(df, pd.DataFrame)

From 1474ab3b28b173eeb53dacdc2960e585a9f8966a Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Tue, 3 Sep 2024 07:12:42 -0800
Subject: [PATCH 18/34] added mailchipsource

---
 examples/mailchimp_to_pinecone.yaml        |  21 +++++
 tests/test_source_mods.py                  |  32 +++++++
 vector_etl/source_mods/__init__.py         |   4 +
 vector_etl/source_mods/mailchimp_loader.py | 100 +++++++++++++++++++++
 4 files changed, 157 insertions(+)
 create mode 100644 examples/mailchimp_to_pinecone.yaml
 create mode 100644 vector_etl/source_mods/mailchimp_loader.py

diff --git a/examples/mailchimp_to_pinecone.yaml b/examples/mailchimp_to_pinecone.yaml
new file mode 100644
index 0000000..8fd7087
--- /dev/null
+++ b/examples/mailchimp_to_pinecone.yaml
@@ -0,0 +1,21 @@
+source:
+  source_data_type: "MailChimp"
+  api_key: ""
+  server_prefix: "us13"
+  records: "ConnectedSites"
+  
+embedding:
+  embedding_model: "OpenAI"
+  api_key: ""
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: ""
+  index_name: ""
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+
diff --git a/tests/test_source_mods.py b/tests/test_source_mods.py
index 58f066d..d9eb027 100644
--- a/tests/test_source_mods.py
+++ b/tests/test_source_mods.py
@@ -14,6 +14,7 @@
 from vector_etl.source_mods.zoho_desk_loader import ZohoDeskSource
 from vector_etl.source_mods.flutterwave_loader import FlutterWaveSource
 from vector_etl.source_mods.gmail_loader import GmailSource
+from vector_etl.source_mods.mailchimp_loader import MailChimpMarketingSource
 
 @pytest.fixture
 def s3_config():
@@ -138,6 +139,15 @@ def local_file_config():
         'chunk_overlap': 200
     }
 
+
+@pytest.fixture
+def mailchimp_config():
+    return {
+        'api_key': 'test_key',
+        'server': 'test_secret',
+        'records': 'test_bucket',
+    }
+    
 def test_s3_source_connect(s3_config):
     with patch('boto3.client') as mock_client:
         source = S3Source(s3_config)
@@ -380,6 +390,28 @@ def test_gmail_fetch_data(gmail_config):
           df = source.fetch_data()
 
           assert isinstance(df, pd.DataFrame)
+          
+          
+
+
+def test_mailchimp_connect(mailchimp_config):
+    
+    with patch('MailchimpMarketing.Client.set_config') as  mock_connect:
+        source =  MailChimpMarketingSource(mailchimp_config)
+        source.connect()
+        mock_connect.assert_called_once_with(
+        api_key="",
+        server=""
+        )    
+
+
+def test_mailchimp_fetch_data(mailchimp_config):
+      with patch('MailchimpMarketing.Client.set_config') as  mock_connect:
+          mock_connect.return_value = [{}]
+          source =   MailChimpMarketingSource(mailchimp_config)
+          df = source.fetch_data()
+
+          assert isinstance(df, pd.DataFrame)
 
 
 
diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index c2cbf14..c6d3b7f 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -18,6 +18,8 @@
 from .paystack_loader import PayStackSource
 from .flutterwave_loader import FlutterWaveSource
 from .gmail_loader import GmailSource
+from .mailchimp_loader import MailChimpMarketingSource
+
 def get_source_class(config):
     source_type = config['source_data_type']
     if source_type == 'Amazon S3':
@@ -56,5 +58,7 @@ def get_source_class(config):
         return FlutterWaveSource(config)
     elif source_type == "Gmail":
         return GmailSource(config)
+    elif source_type == "MailChimp":
+        return MailChimpMarketingSource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")
diff --git a/vector_etl/source_mods/mailchimp_loader.py b/vector_etl/source_mods/mailchimp_loader.py
new file mode 100644
index 0000000..4277c58
--- /dev/null
+++ b/vector_etl/source_mods/mailchimp_loader.py
@@ -0,0 +1,100 @@
+from base import BaseSource
+import mailchimp_marketing as MailchimpMarketing
+from mailchimp_marketing.api_client import ApiClientError
+import pandas as pd
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+from pprint import pprint
+
+
+
+class MailChimpMarketingSource(BaseSource):
+    def __init__(self,config):
+        self.config = config
+        self.api_key = self.config['api_key']
+        self.server_prefix = self.config['server_prefix']
+        
+    
+    def flatten_dict(self, d, parent_key='', sep='_'):
+        items = []
+        for k, v in d.items():
+            new_key = f"{parent_key}{sep}{k}" if parent_key else k
+            if isinstance(v, dict):
+                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
+            elif isinstance(v, list):
+                for i, item in enumerate(v):
+                    if isinstance(item, dict):
+                        items.extend(self.flatten_dict(item, f"{new_key}{sep}{i}", sep=sep).items())
+                    else:
+                        items.append((f"{new_key}{sep}{i}", item))
+            else:
+                items.append((new_key, v))
+        return dict(items)
+        
+        
+    def connect(self):
+        try:
+            client =  MailchimpMarketing.Client()
+            client.set_config({
+            "api_key":self.api_key,
+            "server": self.server_prefix
+            })
+            return client
+        except ApiClientError as error:
+            print("Error: {}".format(error.text))
+        
+        
+        
+    
+    
+    def fetch_data(self):
+        client = self.connect()
+        if self.config['records'] == "campaign":
+           response = client.campaigns.list()['campaigns']
+        
+        elif self.config['records'] == "campaignFolders":
+           response = client.campaignFolders.list()['folders'] 
+           
+        
+        elif self.config['records'] == "ConnectedSites":
+           response = client.connectedSites.list()['sites'] 
+           
+        
+        elif self.config['records'] == "conversations":
+           response = client.ecommerce.stores()['conversations'] 
+           
+        
+        elif self.config['records'] == "ecommerce":
+           response = client.conversations.list()['stores'] 
+        
+        elif self.config['records'] == "facebookAds":
+           response = client.facebookAds.list()['facebook_ads']
+           
+        elif self.config['records'] == "landingpages":
+           response = client.landingPages.get_all()['landing_pages'] 
+           
+        
+        elif self.config['records'] == "reports":
+           response = client.reports.get_all_campaign_reports()['reports'] 
+           
+        
+    
+        try:    
+            flattened_data = [self.flatten_dict(item) for item in response]
+            pprint(flattened_data,indent=4)
+              
+            df  = pd.DataFrame(flattened_data )
+            
+            logger.info(f" data \n {df}")
+            
+            return df
+        except ApiClientError as error:
+            logger.error(f"HTTP error occurred: {error.text}")   
+            
+        
+
+
+
+
+

From d21089ad481b8f20167fcf703b6b1496f6b8ea0b Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Fri, 6 Sep 2024 17:33:02 -0800
Subject: [PATCH 19/34]  update requirements.txt file

---
 .gitignore                                 |   1 +
 requirements.txt                           | 280 +++++++++++++++++----
 vector_etl/source_mods/mailchimp_loader.py |   2 +-
 3 files changed, 239 insertions(+), 44 deletions(-)

diff --git a/.gitignore b/.gitignore
index 60993bb..9d5a561 100644
--- a/.gitignore
+++ b/.gitignore
@@ -176,6 +176,7 @@ vector_etl/tempfile_downloads/
 *_bkp.py
 vector_etl/source_mods/backup/
 vector_etl/target_mods/backup/
+vector_etl/source_mods/bitbucket_loader.py
 
 # Additional files
 .DS_Store
diff --git a/requirements.txt b/requirements.txt
index 6baad92..4ff3cc3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,43 +1,237 @@
-boto3
-botocore
-cohere
-cffi
-openai
-psycopg2-binary
-pinecone-client
-requests
-tiktoken
-python-dotenv
-pydantic
-mysql-connector-python
-pymysql
-pandas
-qdrant-client
-singlestoredb
-weaviate-client
-azure-storage-blob
-google-cloud-storage
-snowflake-connector-python
-stripe
-vecs
-simple-salesforce
-google-generativeai
-anthropic
-pympler
-unstructured[all-docs]
-dropbox
-zenpy
-lancedb
-pyyaml
-google-auth
-google-auth-oauthlib
-google-auth-httplib2
-google-api-python-client
-unstructured-client
-box-sdk-gen
-pymongo
-neo4j
-python-magic
-pytest
-nltk
-pymilvus
+annotated-types==0.7.0
+anthropic==0.32.0
+antlr4-python3-runtime==4.9.3
+anyio==4.4.0
+asn1crypto==1.5.1
+attrs==24.2.0
+Authlib==1.3.1
+azure-core==1.30.2
+azure-storage-blob==12.22.0
+backoff==2.2.1
+backports.zoneinfo==0.2.1
+beautifulsoup4==4.12.3
+boto3==1.34.156
+botocore==1.34.156
+box-sdk-gen==1.2.0
+build==1.2.1
+cachetools==5.4.0
+cassandra-driver==3.29.1
+certifi==2024.7.4
+cffi==1.17.0
+chardet==5.2.0
+charset-normalizer==3.3.2
+click==8.1.7
+cohere==5.6.2
+colorama==0.4.6
+coloredlogs==15.0.1
+confuse==2.0.1
+contourpy==1.1.1
+cryptography==42.0.8
+cycler==0.12.1
+dataclasses-json==0.6.7
+db-dtypes==1.2.0
+decorator==5.1.1
+deepdiff==7.0.1
+Deprecated==1.2.14
+deprecation==2.1.0
+distro==1.9.0
+dnspython==2.6.1
+dropbox==12.0.2
+effdet==0.4.1
+emoji==2.12.1
+enum-compat==0.0.3
+et-xmlfile==1.1.0
+exceptiongroup==1.2.2
+fastavro==1.9.5
+filelock==3.15.4
+filetype==1.2.0
+flatbuffers==24.3.25
+flupy==1.2.0
+fonttools==4.53.1
+freshbooks-sdk==1.2.1
+fsspec==2024.6.1
+geomet==0.2.1.post1
+google-ai-generativelanguage==0.1.0
+google-api-core==2.19.1
+google-api-python-client==2.140.0
+google-auth==2.33.0
+google-auth-httplib2==0.2.0
+google-auth-oauthlib==1.2.1
+google-cloud-bigquery==3.25.0
+google-cloud-core==2.4.1
+google-cloud-storage==2.18.1
+google-crc32c==1.5.0
+google-generativeai==0.1.0rc1
+google-resumable-media==2.7.1
+googleapis-common-protos==1.63.2
+greenlet==3.0.3
+grpcio==1.65.4
+grpcio-health-checking==1.62.3
+grpcio-status==1.62.3
+grpcio-tools==1.62.3
+h11==0.14.0
+h2==4.1.0
+hpack==4.0.0
+httpcore==1.0.5
+httplib2==0.22.0
+httpx==0.27.0
+httpx-sse==0.4.0
+huggingface-hub==0.24.5
+humanfriendly==10.0
+hyperframe==6.0.1
+idna==3.7
+importlib_metadata==8.2.0
+importlib_resources==6.4.0
+iniconfig==2.0.0
+intuit-oauth==1.2.6
+iopath==0.1.10
+isodate==0.6.1
+Jinja2==3.1.4
+jiter==0.5.0
+jmespath==1.0.1
+joblib==1.4.2
+jsonpath-python==1.0.6
+kiwisolver==1.4.5
+lancedb==0.6.13
+langdetect==1.0.9
+layoutparser==0.3.4
+lxml==5.2.2
+mailchimp-marketing==3.0.80
+Markdown==3.6
+MarkupSafe==2.1.5
+marshmallow==3.21.2
+matplotlib==3.7.5
+more-itertools==10.4.0
+mpmath==1.3.0
+msg-parser==1.2.0
+mypy-extensions==1.0.0
+mysql-connector==2.2.9
+mysql-connector-python==9.0.0
+neo4j==5.23.1
+nest-asyncio==1.6.0
+networkx==3.1
+nltk==3.8.1
+numpy==1.24.4
+oauthlib==3.2.2
+olefile==0.47
+omegaconf==2.3.0
+onnx==1.16.2
+onnxruntime==1.15.1
+openai==1.40.1
+opencv-python==4.10.0.84
+openpyxl==3.1.5
+ordered-set==4.1.0
+overrides==7.7.0
+packaging==24.1
+pandas==2.0.3
+parameterized==0.9.0
+parsimonious==0.10.0
+paystackapi==2.1.3
+pdf2image==1.17.0
+pdfminer.six==20231228
+pdfplumber==0.11.3
+pgvector==0.1.8
+pikepdf==9.1.0
+pillow==10.4.0
+pinecone-client==5.0.1
+pinecone-plugin-inference==1.0.3
+pinecone-plugin-interface==0.0.7
+platformdirs==4.2.2
+pluggy==1.5.0
+ply==3.11
+portalocker==2.10.1
+proto-plus==1.24.0
+protobuf==4.25.4
+psycopg2-binary==2.9.9
+py==1.11.0
+pyarrow==15.0.0
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pycocotools==2.0.7
+pycparser==2.22
+pycryptodome==3.20.0
+pydantic==2.8.2
+pydantic_core==2.20.1
+PyJWT==2.9.0
+pylance==0.10.12
+pymongo==4.8.0
+Pympler==1.1
+PyMySQL==1.1.1
+pyOpenSSL==24.2.1
+pypandoc==1.13
+pyparsing==3.1.2
+pypdf==4.3.1
+pypdfium2==4.30.0
+pyproject_hooks==1.1.0
+pyreadline3==3.4.1
+pytesseract==0.3.10
+pytest==8.3.2
+python-dateutil==2.9.0.post0
+python-docx==1.1.2
+python-dotenv==1.0.1
+python-iso639==2024.4.27
+python-magic==0.4.27
+python-multipart==0.0.9
+python-paypal-api==0.1.2
+python-pptx==0.6.23
+pytz==2024.1
+pywin32==306
+PyYAML==6.0.2
+qdrant-client==1.10.1
+rapidfuzz==3.9.6
+ratelimiter==1.2.0.post0
+rave-python==1.4.0
+regex==2024.7.24
+requests==2.32.3
+requests-file==2.1.0
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+retry==0.9.2
+rsa==4.9
+s3transfer==0.10.2
+safetensors==0.4.4
+scipy==1.10.1
+semver==3.0.2
+simple-salesforce==1.12.6
+singlestoredb==1.6.2
+six==1.16.0
+sniffio==1.3.1
+snowflake-connector-python==3.12.0
+sortedcontainers==2.4.0
+soupsieve==2.5
+SQLAlchemy==2.0.32
+sqlparams==6.0.1
+stone==3.3.1
+stripe==10.6.0
+sympy==1.13.1
+tabulate==0.9.0
+tiktoken==0.7.0
+timm==1.0.8
+tokenizers==0.19.1
+tomli==2.0.1
+tomlkit==0.13.0
+torch==2.4.0
+torchvision==0.19.0
+tqdm==4.66.5
+transformers==4.44.0
+types-requests==2.31.0.6
+types-urllib3==1.26.25.14
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+tzdata==2024.1
+unstructured==0.11.8
+unstructured-client==0.25.4
+unstructured-inference==0.7.18
+unstructured.pytesseract==0.3.12
+uritemplate==4.1.1
+urllib3==1.26.19
+validators==0.33.0
+vecs==0.4.4
+weaviate-client==4.7.1
+wrapt==1.16.0
+xlrd==2.0.1
+XlsxWriter==3.2.0
+zeep==4.2.1
+zenpy==2.0.49
+zipp==3.19.2
+zohocrmsdk7_0==2.0.0
diff --git a/vector_etl/source_mods/mailchimp_loader.py b/vector_etl/source_mods/mailchimp_loader.py
index 4277c58..d77a49d 100644
--- a/vector_etl/source_mods/mailchimp_loader.py
+++ b/vector_etl/source_mods/mailchimp_loader.py
@@ -1,4 +1,4 @@
-from base import BaseSource
+from .base import BaseSource
 import mailchimp_marketing as MailchimpMarketing
 from mailchimp_marketing.api_client import ApiClientError
 import pandas as pd

From bf890c2e9a45c7b2050e3a25eeddd3469bb2efba Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Fri, 6 Sep 2024 17:55:24 -0800
Subject: [PATCH 20/34] updates airtable loader

---
 vector_etl/source_mods/airtable_loader.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vector_etl/source_mods/airtable_loader.py b/vector_etl/source_mods/airtable_loader.py
index 0e14b58..35889c1 100644
--- a/vector_etl/source_mods/airtable_loader.py
+++ b/vector_etl/source_mods/airtable_loader.py
@@ -1,6 +1,5 @@
 import requests
 from .base import BaseSource
-from pprint import pprint
 import pandas as pd
 import logging
 

From fb45322ba6ba167de6b6ea8f92180d5e6b02a48c Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Fri, 6 Sep 2024 18:01:21 -0800
Subject: [PATCH 21/34] updated bigquery

---
 vector_etl/source_mods/google_bigquery.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vector_etl/source_mods/google_bigquery.py b/vector_etl/source_mods/google_bigquery.py
index 51e979d..c82a5a2 100644
--- a/vector_etl/source_mods/google_bigquery.py
+++ b/vector_etl/source_mods/google_bigquery.py
@@ -1,6 +1,6 @@
 import os 
 from google.cloud import bigquery
-from base import BaseSource
+from .base import BaseSource
 import logging
 
 logging.basicConfig(level=logging.INFO)

From 2d4b77521d92352aca595f42acc0c9bc40f19c1e Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sat, 7 Sep 2024 07:18:22 -0800
Subject: [PATCH 22/34] update source config file

---
 examples/airtable_to_pincone.yaml         |  2 ++
 examples/flutterwave_to_pinecone.yaml     |  2 ++
 examples/gmail_to_pinecone.yaml           |  2 ++
 examples/google_bigquery_to_pincone.yaml  |  1 +
 examples/hubspot_to_pinecone.yaml         |  3 ++-
 examples/intercom_to_pinecone.yaml        |  3 +++
 examples/mailchimp_to_pinecone.yaml       |  2 ++
 examples/paystack_to_pincone.yaml         |  2 ++
 examples/zohocrm_to_pinecone.yaml         |  2 ++
 examples/zohodesk_to_pinecone.yaml        |  4 +++-
 vector_etl/source_mods/__init__.py        |  2 +-
 vector_etl/source_mods/airtable_loader.py | 18 +++++++++++++++++-
 vector_etl/source_mods/paystack_loader.py |  1 -
 13 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/examples/airtable_to_pincone.yaml b/examples/airtable_to_pincone.yaml
index 5e95087..d28a5ae 100644
--- a/examples/airtable_to_pincone.yaml
+++ b/examples/airtable_to_pincone.yaml
@@ -18,4 +18,6 @@ target:
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
+embed_columns: []
+
 
diff --git a/examples/flutterwave_to_pinecone.yaml b/examples/flutterwave_to_pinecone.yaml
index 8e93e83..54fae51 100644
--- a/examples/flutterwave_to_pinecone.yaml
+++ b/examples/flutterwave_to_pinecone.yaml
@@ -17,4 +17,6 @@ target:
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
+embed_columns: []
+
 
diff --git a/examples/gmail_to_pinecone.yaml b/examples/gmail_to_pinecone.yaml
index 794db28..e342151 100644
--- a/examples/gmail_to_pinecone.yaml
+++ b/examples/gmail_to_pinecone.yaml
@@ -17,4 +17,6 @@ target:
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
+embed_columns: []
+
 
diff --git a/examples/google_bigquery_to_pincone.yaml b/examples/google_bigquery_to_pincone.yaml
index 3075154..a6df386 100644
--- a/examples/google_bigquery_to_pincone.yaml
+++ b/examples/google_bigquery_to_pincone.yaml
@@ -17,4 +17,5 @@ target:
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
+embed_columns: []
 
diff --git a/examples/hubspot_to_pinecone.yaml b/examples/hubspot_to_pinecone.yaml
index aaaff75..5a0f4b1 100644
--- a/examples/hubspot_to_pinecone.yaml
+++ b/examples/hubspot_to_pinecone.yaml
@@ -2,7 +2,6 @@ source:
   source_data_type: "HubSpot"
   archive: ""
   limit: ""
-  client_secret: ""
   access_token: ""
   crm_object: ""
   
@@ -20,4 +19,6 @@ target:
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
+embed_columns: []
+
 
diff --git a/examples/intercom_to_pinecone.yaml b/examples/intercom_to_pinecone.yaml
index 55195d6..bc16438 100644
--- a/examples/intercom_to_pinecone.yaml
+++ b/examples/intercom_to_pinecone.yaml
@@ -18,3 +18,6 @@ target:
   region: "us-east-1" #[Optional] Only required if creating a new index
 
 
+embed_columns: []
+
+
diff --git a/examples/mailchimp_to_pinecone.yaml b/examples/mailchimp_to_pinecone.yaml
index 8fd7087..e44d0e7 100644
--- a/examples/mailchimp_to_pinecone.yaml
+++ b/examples/mailchimp_to_pinecone.yaml
@@ -18,4 +18,6 @@ target:
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
+embed_columns: []
+
 
diff --git a/examples/paystack_to_pincone.yaml b/examples/paystack_to_pincone.yaml
index b301a73..99adffa 100644
--- a/examples/paystack_to_pincone.yaml
+++ b/examples/paystack_to_pincone.yaml
@@ -17,4 +17,6 @@ target:
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
+embed_columns: []
+
 
diff --git a/examples/zohocrm_to_pinecone.yaml b/examples/zohocrm_to_pinecone.yaml
index 8d55cb1..7753a5d 100644
--- a/examples/zohocrm_to_pinecone.yaml
+++ b/examples/zohocrm_to_pinecone.yaml
@@ -22,4 +22,6 @@ target:
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
+embed_columns: []
+
 
diff --git a/examples/zohodesk_to_pinecone.yaml b/examples/zohodesk_to_pinecone.yaml
index 8d408b3..7a929ed 100644
--- a/examples/zohodesk_to_pinecone.yaml
+++ b/examples/zohodesk_to_pinecone.yaml
@@ -4,7 +4,7 @@ source:
   client_id: ""
   client_secret: ""
   code: ""
-  per_page: ""
+  limit: ""
   records: ""
   accounts_url: ""
   
@@ -22,4 +22,6 @@ target:
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
+embed_columns: []
+
 
diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index c6d3b7f..63c0097 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -52,7 +52,7 @@ def get_source_class(config):
         return ZohoDeskSource(config)
     elif source_type == "InterCom":
         return InterComSource(config)
-    elif  source_type == 'PayStackS':
+    elif source_type == 'PayStacks':
         return PayStackSource(config)
     elif source_type == "FlutterWave":
         return FlutterWaveSource(config)
diff --git a/vector_etl/source_mods/airtable_loader.py b/vector_etl/source_mods/airtable_loader.py
index 35889c1..951aa92 100644
--- a/vector_etl/source_mods/airtable_loader.py
+++ b/vector_etl/source_mods/airtable_loader.py
@@ -1,5 +1,5 @@
 import requests
-from .base import BaseSource
+from base import BaseSource
 import pandas as pd
 import logging
 
@@ -31,7 +31,23 @@ def fetch_data(self):
         airtable_df = pd.DataFrame(df_data)
         
         return airtable_df
+    
+    
+    
+config = {
+    "auth_token": "patbteTvK8Hp5ceLd.d2efae5a90755f783f475c3515e7b17752f1e313bdb5c34d96726203838a709c",
+  "baseId": "appjx8zUtVJcjvxys",
+  "tableIdOrName": "Sales"
+}
+
+
+
+table = AirTableSource(config)
+
+
+data = table.fetch_data()
 
+print(data)
 
 
 
diff --git a/vector_etl/source_mods/paystack_loader.py b/vector_etl/source_mods/paystack_loader.py
index 0fd7b8a..b27c8a4 100644
--- a/vector_etl/source_mods/paystack_loader.py
+++ b/vector_etl/source_mods/paystack_loader.py
@@ -1,7 +1,6 @@
 from .base import BaseSource
 import logging
 import pandas as pd
-
 from paystackapi.paystack import Paystack
 
 

From 2ceefa84cbe3e86907b5687ddf9d61f83dd4bba6 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sat, 7 Sep 2024 07:35:14 -0800
Subject: [PATCH 23/34] update source config file

---
 vector_etl/source_mods/airtable_loader.py | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/vector_etl/source_mods/airtable_loader.py b/vector_etl/source_mods/airtable_loader.py
index 951aa92..35889c1 100644
--- a/vector_etl/source_mods/airtable_loader.py
+++ b/vector_etl/source_mods/airtable_loader.py
@@ -1,5 +1,5 @@
 import requests
-from base import BaseSource
+from .base import BaseSource
 import pandas as pd
 import logging
 
@@ -31,23 +31,7 @@ def fetch_data(self):
         airtable_df = pd.DataFrame(df_data)
         
         return airtable_df
-    
-    
-    
-config = {
-    "auth_token": "patbteTvK8Hp5ceLd.d2efae5a90755f783f475c3515e7b17752f1e313bdb5c34d96726203838a709c",
-  "baseId": "appjx8zUtVJcjvxys",
-  "tableIdOrName": "Sales"
-}
-
-
-
-table = AirTableSource(config)
-
-
-data = table.fetch_data()
 
-print(data)
 
 
 

From a591af41691dd2a438d3218c725e10f744d9f285 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sat, 7 Sep 2024 08:10:33 -0800
Subject: [PATCH 24/34] update setup

---
 examples/zohodesk_to_pinecone.yaml         | 1 -
 setup.py                                   | 3 +++
 vector_etl/source_mods/zoho_desk_loader.py | 6 +++---
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/examples/zohodesk_to_pinecone.yaml b/examples/zohodesk_to_pinecone.yaml
index 7a929ed..6059df7 100644
--- a/examples/zohodesk_to_pinecone.yaml
+++ b/examples/zohodesk_to_pinecone.yaml
@@ -4,7 +4,6 @@ source:
   client_id: ""
   client_secret: ""
   code: ""
-  limit: ""
   records: ""
   accounts_url: ""
   
diff --git a/setup.py b/setup.py
index 32e3a6b..d34fecf 100644
--- a/setup.py
+++ b/setup.py
@@ -69,6 +69,9 @@
         "pytest",
         "nltk",
         "pymilvus",
+        "zohocrmsdk7_0",
+        "paystackapi",
+        "mailchimp-marketing"
     ],
     entry_points={
         "console_scripts": [
diff --git a/vector_etl/source_mods/zoho_desk_loader.py b/vector_etl/source_mods/zoho_desk_loader.py
index 9a083b8..df073af 100644
--- a/vector_etl/source_mods/zoho_desk_loader.py
+++ b/vector_etl/source_mods/zoho_desk_loader.py
@@ -68,7 +68,7 @@ def fetch_data(self):
         self.token = self.connect()
         if self.config['records'] == 'desk.agents':
             logger.info("Agents \n")
-            self.url = f"https://desk.zoho.com/api/v1/agents?limit={self.config['limit']}"
+            self.url = f"https://desk.zoho.com/api/v1/agents"
             
             
         elif self.config['records'] == 'desk.team':
@@ -92,12 +92,12 @@ def fetch_data(self):
         elif self.config['records'] == 'desk.ticket':
             logger.info("Ticket \n")
             self.url = f"""https://desk.zoho.com/api/v1/tickets?include=contacts,
-            assignee,departments,team,isRead&limit={self.config['limit']}"""
+            assignee,departments,team,isRead"""
             
         
         elif self.config['records'] == 'desk.contacts':
             logger.info("Contact \n")
-            self.url = f"https://desk.zoho.com/api/v1/contacts?limit={self.config['limit']}"
+            self.url = f"https://desk.zoho.com/api/v1/contacts"
             
            
                   

From 2ecdd07e7f60f074d360bb8364d0cb6067ff68be Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sun, 8 Sep 2024 02:02:31 -0800
Subject: [PATCH 25/34] update hubspot source to raise valueerror is limit
 value  is greater than 100

---
 vector_etl/source_mods/hubspot_loader.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/vector_etl/source_mods/hubspot_loader.py b/vector_etl/source_mods/hubspot_loader.py
index ca93421..d396fba 100644
--- a/vector_etl/source_mods/hubspot_loader.py
+++ b/vector_etl/source_mods/hubspot_loader.py
@@ -72,12 +72,17 @@ def fetch_data(self):
             raise ValueError(f"Unsupported Crm object type: check the object name {self.config['crm_object']}")
         
         
-       response = self.connect(self.endpoints)['results']
-       results = [results['properties'] for results in response]    
-       df  = pd.DataFrame(results)
-       logger.info(f" data \n {df}")
+       response = self.connect(self.endpoints)
+       
+       if 'results' in response:
+            print(response)
+            results = [results['properties'] for results in response['results']]    
+            df  = pd.DataFrame(results)
+            logger.info(f" data \n {df}")
+            return df
+       else:
+          raise ValueError(response['message'])
         
-       return df
         
     
     

From 7104583154eac1015d4812b8f070535805ef0a7b Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sun, 8 Sep 2024 17:44:18 -0800
Subject: [PATCH 26/34] added DigitalOcean Source

---
 examples/digital_ocean_space_to_pincone.yaml  | 25 +++++++
 .../digital_ocean_spaces_loader.py            | 72 +++++++++++++++++++
 vector_etl/source_mods/gmail_loader.py        |  2 +-
 3 files changed, 98 insertions(+), 1 deletion(-)
 create mode 100644 examples/digital_ocean_space_to_pincone.yaml
 create mode 100644 vector_etl/source_mods/digital_ocean_spaces_loader.py

diff --git a/examples/digital_ocean_space_to_pincone.yaml b/examples/digital_ocean_space_to_pincone.yaml
new file mode 100644
index 0000000..fd2c771
--- /dev/null
+++ b/examples/digital_ocean_space_to_pincone.yaml
@@ -0,0 +1,25 @@
+source:
+  source_data_type: "DigitalOcean"
+  bucket_name: "scrap-data"
+  prefix: "latestArticles_Monday-26-August-2024"
+  region_name: 'https://nyc3.digitaloceanspaces.com'
+  endpoint_url: 'nyc3'
+  file_type: "csv" #required if prefix is a directory: Will retrieve all files with filetype
+  aws_access_key_id: "your-access-key"
+  aws_secret_access_key: "your-secret-access-key"
+
+embedding:
+  embedding_model: "OpenAI"
+  api_key: "your-openai-api-key"
+  model_name: "text-embedding-ada-002"
+
+target:
+  target_database: "Pinecone"
+  pinecone_api_key: "your-pinecone-api-key"
+  index_name: "my-index"
+  dimension: 1536 #[Optional] Only required if creating a new index
+  metric: "cosine" #[Optional] Only required if creating a new index
+  cloud: "aws" #[Optional] Only required if creating a new index
+  region: "us-east-1" #[Optional] Only required if creating a new index
+
+embed_columns: [] #Empty Array: File based sources do not require embedding columns
\ No newline at end of file
diff --git a/vector_etl/source_mods/digital_ocean_spaces_loader.py b/vector_etl/source_mods/digital_ocean_spaces_loader.py
new file mode 100644
index 0000000..0b62efb
--- /dev/null
+++ b/vector_etl/source_mods/digital_ocean_spaces_loader.py
@@ -0,0 +1,72 @@
+import boto3
+import logging
+from io import BytesIO
+import os
+from .file_loader import FileBaseSource
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+class DigitalOceanSpaceSource(FileBaseSource):
+    def __init__(self, config):
+        super().__init__(config)
+        self.s3_client = None
+        self.bucket_name = config['bucket_name']
+        self.prefix = config.get('prefix', '')
+        self.file_type = config.get('file_type', '')
+
+    def connect(self):
+        logger.info("Connecting to DigitalOcean Space...")
+        self.s3_client = boto3.client(
+            's3',
+            region_name=self.config['region_name'],
+            endpoint_url=self.config['endpoint_url'],
+            aws_access_key_id=self.config['aws_access_key_id'],
+            aws_secret_access_key=self.config['aws_secret_access_key']
+        )
+        logger.info("Connected to DigitalOcean Space successfully.")
+
+    def list_files(self):
+        if not self.s3_client:
+            self.connect()
+
+        paginator = self.s3_client.get_paginator('list_objects_v2')
+        files = []
+        for page in paginator.paginate(Bucket=self.bucket_name, Prefix=self.prefix):
+            for obj in page.get('Contents', []):
+                if obj['Key'].endswith(self.file_type):
+                    files.append(obj['Key'])
+
+        return files
+
+    def read_file(self, file_path):
+        downloaded_files = []
+
+        local_file_path = os.path.join(os.getcwd(), file_path.split('/')[-1])
+        self.s3_client.download_file(self.bucket_name, file_path, local_file_path)
+        downloaded_files.append(file_path)
+        logger.info(f"Downloaded {file_path} to {os.getcwd()}")
+
+        return downloaded_files
+
+    def download_file(self, file_path):
+        if not self.s3_client:
+            self.connect()
+
+        download_folder = 'tempfile_downloads'
+        if not os.path.exists(download_folder):
+            os.makedirs(download_folder)
+
+        logger.info("Downloading files from DigitalOcean Space...")
+
+        local_file_path = os.path.join(download_folder, file_path.split('/')[-1])
+        self.s3_client.download_file(self.bucket_name, file_path, local_file_path)
+        logger.info(f"Downloaded {file_path} to {os.getcwd()}")
+
+    def delete_directory(self, path):
+        for root, dirs, files in os.walk(path, topdown=False):
+            for file in files:
+                os.remove(os.path.join(root, file))
+            for dir in dirs:
+                os.rmdir(os.path.join(root, dir))
+        os.rmdir(path)
diff --git a/vector_etl/source_mods/gmail_loader.py b/vector_etl/source_mods/gmail_loader.py
index 9858bd0..1eaedbe 100644
--- a/vector_etl/source_mods/gmail_loader.py
+++ b/vector_etl/source_mods/gmail_loader.py
@@ -71,7 +71,7 @@ def parse_messages(self, service, messages, label):
             msg = service.users().messages().get(userId="me", id=message["id"]).execute()
             headers = msg["payload"]["headers"]
 
-            subject, sender = self._get_header_info(headers)
+            subject, sender = self.get_header_info(headers)
             snippet = msg.get("snippet", "")
             body = self.get_body(msg)
 

From 8354c17f30a32ad2f6d962616b588c04edbf565a Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sun, 8 Sep 2024 18:01:29 -0800
Subject: [PATCH 27/34] added digitalOcean Source Type

---
 vector_etl/source_mods/__init__.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 63c0097..9b808d0 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -19,7 +19,7 @@
 from .flutterwave_loader import FlutterWaveSource
 from .gmail_loader import GmailSource
 from .mailchimp_loader import MailChimpMarketingSource
-
+from .digital_ocean_spaces_loader import DigitalOceanSpaceSource
 def get_source_class(config):
     source_type = config['source_data_type']
     if source_type == 'Amazon S3':
@@ -60,5 +60,7 @@ def get_source_class(config):
         return GmailSource(config)
     elif source_type == "MailChimp":
         return MailChimpMarketingSource(config)
+    elif source_type == "DigitalOcean":
+        DigitalOceanSpaceSource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")

From 81728af63a5b6d14de290288f711f38babf40041 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sun, 8 Sep 2024 18:52:59 -0800
Subject: [PATCH 28/34] updated digitalOcean source

---
 vector_etl/source_mods/digital_ocean_spaces_loader.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vector_etl/source_mods/digital_ocean_spaces_loader.py b/vector_etl/source_mods/digital_ocean_spaces_loader.py
index 0b62efb..4c5a0af 100644
--- a/vector_etl/source_mods/digital_ocean_spaces_loader.py
+++ b/vector_etl/source_mods/digital_ocean_spaces_loader.py
@@ -49,6 +49,7 @@ def read_file(self, file_path):
 
         return downloaded_files
 
+
     def download_file(self, file_path):
         if not self.s3_client:
             self.connect()
@@ -63,10 +64,12 @@ def download_file(self, file_path):
         self.s3_client.download_file(self.bucket_name, file_path, local_file_path)
         logger.info(f"Downloaded {file_path} to {os.getcwd()}")
 
+
     def delete_directory(self, path):
+
         for root, dirs, files in os.walk(path, topdown=False):
             for file in files:
                 os.remove(os.path.join(root, file))
             for dir in dirs:
                 os.rmdir(os.path.join(root, dir))
-        os.rmdir(path)
+        os.rmdir(path)
\ No newline at end of file

From 978ade356a3071a92819f02a9fb7fdf09ab5ad10 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sun, 8 Sep 2024 18:58:03 -0800
Subject: [PATCH 29/34] updated digitalOcean source

---
 vector_etl/source_mods/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index 9b808d0..c4687c4 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -20,6 +20,7 @@
 from .gmail_loader import GmailSource
 from .mailchimp_loader import MailChimpMarketingSource
 from .digital_ocean_spaces_loader import DigitalOceanSpaceSource
+
 def get_source_class(config):
     source_type = config['source_data_type']
     if source_type == 'Amazon S3':
@@ -61,6 +62,6 @@ def get_source_class(config):
     elif source_type == "MailChimp":
         return MailChimpMarketingSource(config)
     elif source_type == "DigitalOcean":
-        DigitalOceanSpaceSource(config)
+       return DigitalOceanSpaceSource(config)
     else:
         raise ValueError(f"Unsupported source type: {source_type}")

From 4e754e9d5aad7449e54495ba53e75f39cac86efa Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Fri, 13 Sep 2024 14:45:46 -0800
Subject: [PATCH 30/34] remove secrete

---
 examples/airtable_to_pincone.yaml            |   8 +-
 examples/digital_ocean_space_to_pincone.yaml |  19 +--
 examples/flutterwave_to_pinecone.yaml        |  22 ---
 examples/gmail_to_pinecone.yaml              |  22 ---
 examples/hubspot_to_pinecone.yaml            |   8 +-
 examples/intercom_to_pinecone.yaml           |   2 +-
 examples/mailchimp_to_pinecone.yaml          |  23 ---
 examples/paystack_to_pincone.yaml            |  22 ---
 examples/zohocrm_to_pinecone.yaml            |  27 ----
 examples/zohodesk_to_pinecone.yaml           |  26 ---
 setup.py                                     |   3 -
 vector_etl/source_mods/flutterwave_loader.py | 113 -------------
 vector_etl/source_mods/gmail_loader.py       | 102 ------------
 vector_etl/source_mods/google_bigquery.py    |  45 ------
 vector_etl/source_mods/mailchimp_loader.py   | 100 ------------
 vector_etl/source_mods/paystack_loader.py    | 108 -------------
 vector_etl/source_mods/zoho_crm_loader.py    | 162 -------------------
 vector_etl/source_mods/zoho_desk_loader.py   | 116 -------------
 18 files changed, 20 insertions(+), 908 deletions(-)
 delete mode 100644 examples/flutterwave_to_pinecone.yaml
 delete mode 100644 examples/gmail_to_pinecone.yaml
 delete mode 100644 examples/mailchimp_to_pinecone.yaml
 delete mode 100644 examples/paystack_to_pincone.yaml
 delete mode 100644 examples/zohocrm_to_pinecone.yaml
 delete mode 100644 examples/zohodesk_to_pinecone.yaml
 delete mode 100644 vector_etl/source_mods/flutterwave_loader.py
 delete mode 100644 vector_etl/source_mods/gmail_loader.py
 delete mode 100644 vector_etl/source_mods/google_bigquery.py
 delete mode 100644 vector_etl/source_mods/mailchimp_loader.py
 delete mode 100644 vector_etl/source_mods/paystack_loader.py
 delete mode 100644 vector_etl/source_mods/zoho_crm_loader.py
 delete mode 100644 vector_etl/source_mods/zoho_desk_loader.py

diff --git a/examples/airtable_to_pincone.yaml b/examples/airtable_to_pincone.yaml
index d28a5ae..32b0d38 100644
--- a/examples/airtable_to_pincone.yaml
+++ b/examples/airtable_to_pincone.yaml
@@ -1,7 +1,7 @@
 source:
   source_data_type: "AirTable"
   auth_token: ""
-  baseId: "sales"
+  baseId: ""
   tableIdOrName: ""
   
 embedding:
@@ -11,13 +11,13 @@ embedding:
 
 target:
   target_database: "Pinecone"
-  pinecone_api_key: ""
-  index_name: ""
+  pinecone_api_key: "0f6d7a64-f37c-4c72-a9f6-d00f9b1db2c2"
+  index_name: "context-data-etl-test"
   dimension: 1536 #[Optional] Only required if creating a new index
   metric: "cosine" #[Optional] Only required if creating a new index
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
-embed_columns: []
 
+embed_columns: []
 
diff --git a/examples/digital_ocean_space_to_pincone.yaml b/examples/digital_ocean_space_to_pincone.yaml
index fd2c771..5efb031 100644
--- a/examples/digital_ocean_space_to_pincone.yaml
+++ b/examples/digital_ocean_space_to_pincone.yaml
@@ -1,12 +1,12 @@
 source:
   source_data_type: "DigitalOcean"
-  bucket_name: "scrap-data"
-  prefix: "latestArticles_Monday-26-August-2024"
-  region_name: 'https://nyc3.digitaloceanspaces.com'
-  endpoint_url: 'nyc3'
+  bucket_name: ""
+  prefix: ""
+  region_name: ''
+  endpoint_url: ''
   file_type: "csv" #required if prefix is a directory: Will retrieve all files with filetype
-  aws_access_key_id: "your-access-key"
-  aws_secret_access_key: "your-secret-access-key"
+  aws_access_key_id: ""
+  aws_secret_access_key: ""
 
 embedding:
   embedding_model: "OpenAI"
@@ -15,11 +15,12 @@ embedding:
 
 target:
   target_database: "Pinecone"
-  pinecone_api_key: "your-pinecone-api-key"
-  index_name: "my-index"
+  pinecone_api_key: ""
+  index_name: ""
   dimension: 1536 #[Optional] Only required if creating a new index
   metric: "cosine" #[Optional] Only required if creating a new index
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
-embed_columns: [] #Empty Array: File based sources do not require embedding columns
\ No newline at end of file
+
+embed_columns: []
\ No newline at end of file
diff --git a/examples/flutterwave_to_pinecone.yaml b/examples/flutterwave_to_pinecone.yaml
deleted file mode 100644
index 54fae51..0000000
--- a/examples/flutterwave_to_pinecone.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-source:
-  source_data_type: "FlutterWave"
-  secret_key: ""
-  records: "flutterwave.payout-subaccounts"
-  
-embedding:
-  embedding_model: "OpenAI"
-  api_key: ""
-  model_name: "text-embedding-ada-002"
-
-target:
-  target_database: "Pinecone"
-  pinecone_api_key: ""
-  index_name: ""
-  dimension: 1536 #[Optional] Only required if creating a new index
-  metric: "cosine" #[Optional] Only required if creating a new index
-  cloud: "aws" #[Optional] Only required if creating a new index
-  region: "us-east-1" #[Optional] Only required if creating a new index
-
-embed_columns: []
-
-
diff --git a/examples/gmail_to_pinecone.yaml b/examples/gmail_to_pinecone.yaml
deleted file mode 100644
index e342151..0000000
--- a/examples/gmail_to_pinecone.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-source:
-  source_data_type: "Gmail"
-  credentisla: ""
-  gmail.label: 'IMPORTANT'
-  
-embedding:
-  embedding_model: "OpenAI"
-  api_key: ""
-  model_name: "text-embedding-ada-002"
-
-target:
-  target_database: "Pinecone"
-  pinecone_api_key: ""
-  index_name: ""
-  dimension: 1536 #[Optional] Only required if creating a new index
-  metric: "cosine" #[Optional] Only required if creating a new index
-  cloud: "aws" #[Optional] Only required if creating a new index
-  region: "us-east-1" #[Optional] Only required if creating a new index
-
-embed_columns: []
-
-
diff --git a/examples/hubspot_to_pinecone.yaml b/examples/hubspot_to_pinecone.yaml
index 5a0f4b1..05f6836 100644
--- a/examples/hubspot_to_pinecone.yaml
+++ b/examples/hubspot_to_pinecone.yaml
@@ -1,9 +1,9 @@
 source:
   source_data_type: "HubSpot"
-  archive: ""
-  limit: ""
+  archive: "false"
+  limit: "100"
   access_token: ""
-  crm_object: ""
+  crm_object: "crm.contacts"
   
 embedding:
   embedding_model: "OpenAI"
@@ -19,6 +19,8 @@ target:
   cloud: "aws" #[Optional] Only required if creating a new index
   region: "us-east-1" #[Optional] Only required if creating a new index
 
+
 embed_columns: []
 
 
+
diff --git a/examples/intercom_to_pinecone.yaml b/examples/intercom_to_pinecone.yaml
index bc16438..c7ed855 100644
--- a/examples/intercom_to_pinecone.yaml
+++ b/examples/intercom_to_pinecone.yaml
@@ -1,6 +1,6 @@
 source:
   source_data_type: "InterCom"
-  token: "FlutterWave"
+  token: ""
   records: "intercom.teams"
   
 embedding:
diff --git a/examples/mailchimp_to_pinecone.yaml b/examples/mailchimp_to_pinecone.yaml
deleted file mode 100644
index e44d0e7..0000000
--- a/examples/mailchimp_to_pinecone.yaml
+++ /dev/null
@@ -1,23 +0,0 @@
-source:
-  source_data_type: "MailChimp"
-  api_key: ""
-  server_prefix: "us13"
-  records: "ConnectedSites"
-  
-embedding:
-  embedding_model: "OpenAI"
-  api_key: ""
-  model_name: "text-embedding-ada-002"
-
-target:
-  target_database: "Pinecone"
-  pinecone_api_key: ""
-  index_name: ""
-  dimension: 1536 #[Optional] Only required if creating a new index
-  metric: "cosine" #[Optional] Only required if creating a new index
-  cloud: "aws" #[Optional] Only required if creating a new index
-  region: "us-east-1" #[Optional] Only required if creating a new index
-
-embed_columns: []
-
-
diff --git a/examples/paystack_to_pincone.yaml b/examples/paystack_to_pincone.yaml
deleted file mode 100644
index 99adffa..0000000
--- a/examples/paystack_to_pincone.yaml
+++ /dev/null
@@ -1,22 +0,0 @@
-source:
-  source_data_type: "PayStackS"
-  paystack_secret_key: ""
-  records: "paystack.transactions"
-  
-embedding:
-  embedding_model: "OpenAI"
-  api_key: ""
-  model_name: "text-embedding-ada-002"
-
-target:
-  target_database: "Pinecone"
-  pinecone_api_key: ""
-  index_name: ""
-  dimension: 1536 #[Optional] Only required if creating a new index
-  metric: "cosine" #[Optional] Only required if creating a new index
-  cloud: "aws" #[Optional] Only required if creating a new index
-  region: "us-east-1" #[Optional] Only required if creating a new index
-
-embed_columns: []
-
-
diff --git a/examples/zohocrm_to_pinecone.yaml b/examples/zohocrm_to_pinecone.yaml
deleted file mode 100644
index 7753a5d..0000000
--- a/examples/zohocrm_to_pinecone.yaml
+++ /dev/null
@@ -1,27 +0,0 @@
-source:
-  source_data_type: "ZohoCrm"
-  grant_type: ""
-  client_id: ""
-  client_secret: ""
-  code: ""
-  per_page: ""
-  records: ""
-  accounts_url: ""
-  
-embedding:
-  embedding_model: "OpenAI"
-  api_key: ""
-  model_name: "text-embedding-ada-002"
-
-target:
-  target_database: "Pinecone"
-  pinecone_api_key: ""
-  index_name: ""
-  dimension: 1536 #[Optional] Only required if creating a new index
-  metric: "cosine" #[Optional] Only required if creating a new index
-  cloud: "aws" #[Optional] Only required if creating a new index
-  region: "us-east-1" #[Optional] Only required if creating a new index
-
-embed_columns: []
-
-
diff --git a/examples/zohodesk_to_pinecone.yaml b/examples/zohodesk_to_pinecone.yaml
deleted file mode 100644
index 6059df7..0000000
--- a/examples/zohodesk_to_pinecone.yaml
+++ /dev/null
@@ -1,26 +0,0 @@
-source:
-  source_data_type: "ZohoDesk"
-  grant_type: ""
-  client_id: ""
-  client_secret: ""
-  code: ""
-  records: ""
-  accounts_url: ""
-  
-embedding:
-  embedding_model: "OpenAI"
-  api_key: ""
-  model_name: "text-embedding-ada-002"
-
-target:
-  target_database: "Pinecone"
-  pinecone_api_key: ""
-  index_name: ""
-  dimension: 1536 #[Optional] Only required if creating a new index
-  metric: "cosine" #[Optional] Only required if creating a new index
-  cloud: "aws" #[Optional] Only required if creating a new index
-  region: "us-east-1" #[Optional] Only required if creating a new index
-
-embed_columns: []
-
-
diff --git a/setup.py b/setup.py
index d34fecf..32e3a6b 100644
--- a/setup.py
+++ b/setup.py
@@ -69,9 +69,6 @@
         "pytest",
         "nltk",
         "pymilvus",
-        "zohocrmsdk7_0",
-        "paystackapi",
-        "mailchimp-marketing"
     ],
     entry_points={
         "console_scripts": [
diff --git a/vector_etl/source_mods/flutterwave_loader.py b/vector_etl/source_mods/flutterwave_loader.py
deleted file mode 100644
index d49f70a..0000000
--- a/vector_etl/source_mods/flutterwave_loader.py
+++ /dev/null
@@ -1,113 +0,0 @@
-from .base import BaseSource
-import requests
-from pprint import pprint
-import logging
-import pandas as pd
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-class FlutterWaveSource(BaseSource):
-    def __init__(self,config):
-        self.config = config
-        self.url = None
-        self.secret_key = self.config['secret_key']
-        
-        
-    def flatten_dict(self, d, parent_key='', sep='_'):
-        items = []
-        for k, v in d.items():
-            new_key = f"{parent_key}{sep}{k}" if parent_key else k
-            if isinstance(v, dict):
-                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
-            elif isinstance(v, list):
-                for i, item in enumerate(v):
-                    if isinstance(item, dict):
-                        items.extend(self.flatten_dict(item, f"{new_key}{sep}{i}", sep=sep).items())
-                    else:
-                        items.append((f"{new_key}{sep}{i}", item))
-            else:
-                items.append((new_key, v))
-        return dict(items)
-        
-        
-    def connect(self,url):
-        headers = {"Authorization": f"Bearer {self.secret_key}",
-           "Content-type":"application/json",
-           "Intercom-Version":"2.11"}
-        response = requests.get(url=url,headers=headers)
-        
-        return response
-        
-        
-    def fetch_data(self):
-        
-        if self.config['records'] == 'flutterwave.transfers':
-            logger.info(" Transfers \n")
-            self.url = f"https://api.flutterwave.com/v3/transfers"
-            
-            response = self.connect(self.url).json()['data']
-        
-        elif self.config['records'] == 'flutterwave.transactions':
-            logger.info(" transactions \n")
-            self.url = f"https://api.flutterwave.com/v3/transactions"
-            
-            response = self.connect(self.url).json()['data']
-            
-        
-        elif self.config['records'] == 'flutterwave.beneficiaries':
-            logger.info(" Transfers \n")
-            self.url = f"https://api.flutterwave.com/v3/beneficiaries"
-            
-            response = self.connect(self.url).json()['data']
-            
-        
-        elif self.config['records'] == 'flutterwave.subaccounts':
-            logger.info(" subaccounts \n")
-            self.url = f"https://api.flutterwave.com/v3/subaccounts"
-            
-            response = self.connect(self.url).json()['data']
-            
-        elif self.config['records'] == 'flutterwave.payout-subaccounts':
-            logger.info(" payout-subaccounts \n")
-            self.url = f"https://api.flutterwave.com/v3/payout-subaccounts"
-            
-            response = self.connect(self.url).json()['data']
-            
-        elif self.config['records'] == 'flutterwave.subscriptions':
-            logger.info(" subscriptions \n")
-            self.url = f"https://api.flutterwave.com/v3/subscriptions"
-            
-            response = self.connect(self.url).json()['data']
-            
-        
-        elif self.config['records'] == 'flutterwave.payment-plans':
-            logger.info(" payment-plans \n")
-            self.url = f"https://api.flutterwave.com/v3/payment-plans"
-            
-            response = self.connect(self.url).json()['data']
-        
-        
-      
-            
-        
-        try:    
-            flattened_data = [self.flatten_dict(item) for item in response]
-            pprint(flattened_data,indent=4)
-              
-            df  = pd.DataFrame(flattened_data )
-            
-            logger.info(f" data \n {df}")
-            
-            return df
-        except requests.exceptions.HTTPError as http_err:
-            logger.error(f"HTTP error occurred: {http_err}")   
-            
-            
-            
-
-
-
-
-
-
diff --git a/vector_etl/source_mods/gmail_loader.py b/vector_etl/source_mods/gmail_loader.py
deleted file mode 100644
index 1eaedbe..0000000
--- a/vector_etl/source_mods/gmail_loader.py
+++ /dev/null
@@ -1,102 +0,0 @@
-import os.path
-import base64
-from google.auth.transport.requests import Request
-from google.oauth2.credentials import Credentials
-from google_auth_oauthlib.flow import InstalledAppFlow
-from googleapiclient.discovery import build
-from googleapiclient.errors import HttpError
-from pprint import pprint
-from .base import BaseSource
-import pandas as pd
-
-class GmailSource(BaseSource):
-    def __init__(self, config):
-        self.config = config
-        self.SCOPES = ["https://www.googleapis.com/auth/gmail.readonly"]
-
-    def connect(self):
-        creds = None
-        if os.path.exists("token.json"):
-            creds = Credentials.from_authorized_user_file("token.json", self.SCOPES)
-        if not creds or not creds.valid:
-            if creds and creds.expired and creds.refresh_token:
-                creds.refresh(Request())
-            else:
-                flow = InstalledAppFlow.from_client_secrets_file(
-                    self.config['credentials'], self.SCOPES
-                )
-                creds = flow.run_local_server(port=0)
-                with open("token.json", "w") as token:
-                    token.write(creds.to_json())
-        return creds
-
-    def fetch_data(self):
-        creds = self.connect()
-        service = build("gmail", "v1", credentials=creds)
-
-        # Extract the label from config
-        label = self.config.get('gmail.label').upper()
-
-        messages = self.get_messages(service, label)
-        
-        if messages:
-            email_data = self.parse_messages(service, messages, label)
-            df = pd.DataFrame(email_data)
-            
-            return df
-        else:
-            print("No messages found.")
-            return None
-
-    def get_messages(self, service, label):
-        try:
-            results = service.users().messages().list(userId="me", labelIds=[label]).execute()
-            return results.get("messages", [])
-        except HttpError as error:
-            print(f"An error occurred while fetching messages for label {label}: {error}")
-            return None
-
-    def parse_messages(self, service, messages, label):
-        email_data = {
-            "id": [],
-            "threadId": [],
-            "label": [],
-            "subject": [],
-            "from": [],
-            "snippet": [],
-            "body": [],
-        }
-
-        for message in messages:
-            msg = service.users().messages().get(userId="me", id=message["id"]).execute()
-            headers = msg["payload"]["headers"]
-
-            subject, sender = self.get_header_info(headers)
-            snippet = msg.get("snippet", "")
-            body = self.get_body(msg)
-
-            email_data["id"].append(message["id"])
-            email_data["threadId"].append(message["threadId"])
-            email_data["label"].append(label)
-            email_data["subject"].append(subject)
-            email_data["from"].append(sender)
-            email_data["snippet"].append(snippet)
-            email_data["body"].append(body)
-
-        return email_data
-
-    def get_header_info(self, headers):
-        subject = None
-        sender = None
-        for header in headers:
-            if header["name"] == "Subject":
-                subject = header["value"]
-            if header["name"] == "From":
-                sender = header["value"]
-        return subject, sender
-
-    def get_body(self, msg):
-        if "data" in msg["payload"]["body"]:
-            return base64.urlsafe_b64decode(msg["payload"]["body"]["data"]).decode("utf-8")
-        else:
-            return ""
diff --git a/vector_etl/source_mods/google_bigquery.py b/vector_etl/source_mods/google_bigquery.py
deleted file mode 100644
index c82a5a2..0000000
--- a/vector_etl/source_mods/google_bigquery.py
+++ /dev/null
@@ -1,45 +0,0 @@
-import os 
-from google.cloud import bigquery
-from .base import BaseSource
-import logging
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-
-class GoogleBigQuerySource(BaseSource):
-    def __init__(self,config):
-         self.config = config
-         self.client = None
-         self.connect()
-         
-            
-    def connect(self):
-        if self.config["db_type"] == 'google_bigquery':
-            os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = self.config['GOOGLE_APPLICATION_CREDENTIALS']
-            self.client = bigquery.Client()
-     
-    def fetch_data(self):
-        if self.client:
-            try:
-                query_job = self.client.query(f"""{self.config.get("query"," ")}""") 
-                if query_job:
-                    dfrows = query_job.result().to_dataframe() 
-                    return dfrows
-                else:
-                      logger.error(f"No data returned: {e}")
-                      return None
-            except Exception as e:
-                 logger.error(f"An error occurred: {e}")
-                 return None
-                
-                
- 
-config = {"query":"SELECT * FROM bigquery-public-data.america_health_rankings.ahr LIMIT 100",
-          "GOOGLE_APPLICATION_CREDENTIALS":"contextData_bigquery_cred.json",
-          "db_type":'google_bigquery'
-          }
-
-
-
-
diff --git a/vector_etl/source_mods/mailchimp_loader.py b/vector_etl/source_mods/mailchimp_loader.py
deleted file mode 100644
index d77a49d..0000000
--- a/vector_etl/source_mods/mailchimp_loader.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from .base import BaseSource
-import mailchimp_marketing as MailchimpMarketing
-from mailchimp_marketing.api_client import ApiClientError
-import pandas as pd
-import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-from pprint import pprint
-
-
-
-class MailChimpMarketingSource(BaseSource):
-    def __init__(self,config):
-        self.config = config
-        self.api_key = self.config['api_key']
-        self.server_prefix = self.config['server_prefix']
-        
-    
-    def flatten_dict(self, d, parent_key='', sep='_'):
-        items = []
-        for k, v in d.items():
-            new_key = f"{parent_key}{sep}{k}" if parent_key else k
-            if isinstance(v, dict):
-                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
-            elif isinstance(v, list):
-                for i, item in enumerate(v):
-                    if isinstance(item, dict):
-                        items.extend(self.flatten_dict(item, f"{new_key}{sep}{i}", sep=sep).items())
-                    else:
-                        items.append((f"{new_key}{sep}{i}", item))
-            else:
-                items.append((new_key, v))
-        return dict(items)
-        
-        
-    def connect(self):
-        try:
-            client =  MailchimpMarketing.Client()
-            client.set_config({
-            "api_key":self.api_key,
-            "server": self.server_prefix
-            })
-            return client
-        except ApiClientError as error:
-            print("Error: {}".format(error.text))
-        
-        
-        
-    
-    
-    def fetch_data(self):
-        client = self.connect()
-        if self.config['records'] == "campaign":
-           response = client.campaigns.list()['campaigns']
-        
-        elif self.config['records'] == "campaignFolders":
-           response = client.campaignFolders.list()['folders'] 
-           
-        
-        elif self.config['records'] == "ConnectedSites":
-           response = client.connectedSites.list()['sites'] 
-           
-        
-        elif self.config['records'] == "conversations":
-           response = client.ecommerce.stores()['conversations'] 
-           
-        
-        elif self.config['records'] == "ecommerce":
-           response = client.conversations.list()['stores'] 
-        
-        elif self.config['records'] == "facebookAds":
-           response = client.facebookAds.list()['facebook_ads']
-           
-        elif self.config['records'] == "landingpages":
-           response = client.landingPages.get_all()['landing_pages'] 
-           
-        
-        elif self.config['records'] == "reports":
-           response = client.reports.get_all_campaign_reports()['reports'] 
-           
-        
-    
-        try:    
-            flattened_data = [self.flatten_dict(item) for item in response]
-            pprint(flattened_data,indent=4)
-              
-            df  = pd.DataFrame(flattened_data )
-            
-            logger.info(f" data \n {df}")
-            
-            return df
-        except ApiClientError as error:
-            logger.error(f"HTTP error occurred: {error.text}")   
-            
-        
-
-
-
-
-
diff --git a/vector_etl/source_mods/paystack_loader.py b/vector_etl/source_mods/paystack_loader.py
deleted file mode 100644
index b27c8a4..0000000
--- a/vector_etl/source_mods/paystack_loader.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from .base import BaseSource
-import logging
-import pandas as pd
-from paystackapi.paystack import Paystack
-
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-class PayStackSource(BaseSource):
-    def __init__(self,config):
-        self.config = config
-        self.paystack_secret_key = self.config['paystack_secret_key']
-        
-        
-    def flatten_dict(self, d, parent_key='', sep='_'):
-        items = []
-        for k, v in d.items():
-            new_key = f"{parent_key}{sep}{k}" if parent_key else k
-            if isinstance(v, dict):
-                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
-            elif isinstance(v, list):
-                for i, item in enumerate(v):
-                    if isinstance(item, dict):
-                        items.extend(self.flatten_dict(item, f"{new_key}{sep}{i}", sep=sep).items())
-                    else:
-                        items.append((f"{new_key}{sep}{i}", item))
-            else:
-                items.append((new_key, v))
-        return dict(items)
-        
-        
-    def connect(self):
-        
-        paystack = Paystack(secret_key=self.paystack_secret_key)
-      
-        return paystack
-        
-        
-    def fetch_data(self):
-        
-        if self.config['records'] == 'paystack.transactions':
-            logger.info(" Transactions \n")
-            response = self.connect().transaction.list()['data']
-            
-        elif self.config['records'] == 'paystack.transactions.split':
-            logger.info(" Transactions split \n")
-            response = self.connect().transactionSplit.list()['data']
-            
-        elif self.config['records'] == 'paystack.invoice':
-            logger.info(" invoice \n")
-            response = self.connect().invoice.list()['data']
-        
-        elif self.config['records'] == 'paystack.product':
-            logger.info(" product  \n")
-            response = self.connect().product.list()['data']
-        
-        elif self.config['records'] == 'paystack.customer':
-            logger.info(" customer \n")
-            response = self.connect().customer.list()['data']
-        
-        elif self.config['records'] == 'paystack.plan':
-            logger.info(" plan \n")
-            response = self.connect().plan.list()['data']
-        
-        elif self.config['records'] == 'paystack.subaccount':
-            logger.info(" subaccount \n")
-            response = self.connect().subaccount().list()['data']
-        
-            
-        elif self.config['records'] == 'paystack.subscription':
-            logger.info(" subaccount \n")
-            response = self.connect().subscription.list()['data']
-            
-        
-        elif self.config['records'] == 'paystack.transfer':
-            logger.info(" transfer \n")
-            response = self.connect().transfer.list()['data']
-        
-        
-        elif self.config['records'] == 'paystack.bulkcharge':
-            logger.info(" bulkcharge \n")
-            response = self.connect().bulkcharge.list()['data']
-            
-        
-        elif self.config['records'] == 'paystack.refund':
-            logger.info(" refund \n")
-            response = self.connect().refund.list()['data']
-            
-          
-        try:    
-            flattened_data = [self.flatten_dict(item) for item in response]
-              
-            df  = pd.DataFrame(flattened_data )
-            
-            logger.info(f" data \n {df}")
-            
-            return df
-        except Exception as http_err:
-            logger.error(f"HTTP error occurred: {http_err}")  
-            
-
-
-
-
-
-
-
diff --git a/vector_etl/source_mods/zoho_crm_loader.py b/vector_etl/source_mods/zoho_crm_loader.py
deleted file mode 100644
index b06c4db..0000000
--- a/vector_etl/source_mods/zoho_crm_loader.py
+++ /dev/null
@@ -1,162 +0,0 @@
-from .base import BaseSource
-import pandas as pd
-import requests
-import logging
-from pprint import pprint
-import os
-import json
-
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-
-
-
-
-class ZohoCrmSource(BaseSource):
-    def __init__(self,config):
-        self.config = config
-        self.token = None
-        self.url = None
-        self.grant_type = self.config['grant_type']
-        self.client_id = self.config['client_id']
-        self.client_secret = self.config['client_secret']
-        self.code = self.config['code']
-        self.accounts_url = self.config['accounts_url']
-        
-        
-    def flatten_dict(self, d, parent_key='', sep='_'):
-
-        items = []
-        for k, v in d.items():
-            new_key = f"{parent_key}{sep}{k}" if parent_key else k
-            if isinstance(v, dict):
-                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
-            else:
-                items.append((new_key, v))
-        return dict(items)
-        
-        
-    def connect(self):
-        
-        data = {
-            "grant_type":self.grant_type,
-            "client_id": self.client_id,
-            "client_secret": self.client_secret,
-            "code": self.code
-        }
-        try:
-            if os.path.exists("token.json"):
-                with open("token.json",'r') as token_file:
-                    token_data = json.load(token_file)
-                    self.token = token_data.get("access_token")
-                    return self.token
-            else:  
-                response = requests.post(url=self.accounts_url, data=data)
-                logger.info(f"Status {response.status_code}")
-                with open("token.json", 'w') as token_file:
-                    json.dump({"access_token": response.json()["access_token"]}, token_file)
-   
-                logger.info("New token fetched and saved.")
-                tokens = response.json()["access_token"]
-                return tokens
-        except requests.exceptions.HTTPError as http_err:
-            logger.error(f"connection Error {http_err}")
-            
-        
-        
-    def fetch_data(self):
-        
-        self.token = self.connect()
-        if self.config['records'] == 'module.Contacts':
-            logger.info("Contact \n")
-            self.url = f"""https://www.zohoapis.com/crm/v5/Contacts?fields=Acount_Name,
-            First_Name,Lead_Source,Home,Fax,Skype_ID,Asst_Phone,Phone,
-            Title,Department,Twitter,Last_Name,Contact_Name,Phone,Email,Reporting_To,
-            Mailing_Street,Mailing_City,Mailing_State,Mailing_Zip,Mailing_Country,
-            Description,Contact_Owner,Lead_Source,Date_of_Birth,Contact_Image  
-            &converted=true&per_page={self.config['per_page']}"""
-            
-        elif self.config['records'] == 'module.Accounts':
-            logger.info("Accounts \n")
-            self.url = f"""https://www.zohoapis.com/crm/v5/Accounts?fields=Account_Owner,Account_Name,Account_Site,Parent_Account,
-            Account_Number,Account_Type,Industry,Annual_Revenue,Rating,Phone,Fax,Website,Ticker_Symbol,OwnerShip,Employees,Sic_Code,
-            Billing_Street,Billing_City,Billing_State,Billing_Code,Billing_Country,Shipping_Street,Shipping_City,Shipping_State,Shipping_Code,
-            Shipping_Country,Description 
-            &converted=true&per_page={self.config['per_page']}"""
-            
-       
-            
-            
-        elif self.config['records'] == 'module.Leads':
-            logger.info("Leads \n")
-            self.url = f"""https://www.zohoapis.com/crm/v5/Leads?fields=Lead_Owner,First_Name,Title,Mobile,Lead_Source,
-            Industry,Annual_Revenue,Company,Last_Name,Email,Fax,Website,Lead_Status,Rating,Skype_ID,
-            Description,Twitter,City,Street,State,Country,Zip_Code,No_of_Employees 
-            &converted=true&per_page={self.config['per_page']}"""
-            
-        elif self.config['records'] == 'module.Deals':
-            logger.info("Deals \n")
-            self.url = f"""https://www.zohoapis.com/crm/v5/Deals?fields=Deal_Owner,Deal_Name,Account_Name,
-            Type,Next_Step,Lead_Source,Contact_Name,Amount,Closing_Date,Stage,Probability,Expected_Revenue,
-            Campaign_Source,Description 
-            &converted=true&per_page={self.config['per_page']}"""
-        
-        
-            
-        elif self.config['records'] == 'module.Campaigns':
-            logger.info("Campaigns \n")
-            self.url = f"""https://www.zohoapis.com/crm/v5/Campaigns?fields=Campaign_Owner,Campaign_Name,Start_Date,
-            Expected_Revenue,Actual_Cost,Number_sent,Type,Status,End_Date,Budgeted_Cost,Expected_Response,Description
-            &converted=true&per_page={self.config['per_page']}"""
-            
-        
-        
-            
-        elif self.config['records'] == 'module.Tasks':
-            logger.info("Tasks \n")
-            self.url = f"""https://www.zohoapis.com/crm/v5/Tasks?fields=Task_Owner,Subject,Due_Date,Contact,Deal,Status,Priority,Reminder,
-            Repeat,Description 
-            &converted=true&per_page={self.config['per_page']}"""
-            
-       
-    
-        elif self.config['records'] == 'module.Calls':
-            logger.info("Calls \n")
-            self.url = f"""https://www.zohoapis.com/crm/v5/Calls?fields=Call_To,Related_To,Call_Type,Outgoing_Call_Status,
-            Call_Start_Time,Call_Owner,Subject,Created_By,Modified_By,Call_Purpose,Call_Agenda&converted=true&per_page={self.config['per_page']}"""
-            
-            
-        elif self.config['records'] == 'module':
-            logger.info("Calls \n")
-            self.url = "https://www.zohoapis.com/crm/v5/settings/modules,"
-            
-        headers = {"Authorization":f"Zoho-oauthtoken {self.token}"}
-        
-        response = requests.get(url=self.url,headers=headers).json()['data']
-        
-        flattened_data = [self.flatten_dict(item) for item in response]
-        
-                   
-        df  = pd.DataFrame(flattened_data )
-        
-        logger.info(f" data \n {df}")
-        
-        return df
-        
-        
-        
-        
-        
-    
-        
-        
-        
-
-
-
-
-
-
-
diff --git a/vector_etl/source_mods/zoho_desk_loader.py b/vector_etl/source_mods/zoho_desk_loader.py
deleted file mode 100644
index df073af..0000000
--- a/vector_etl/source_mods/zoho_desk_loader.py
+++ /dev/null
@@ -1,116 +0,0 @@
-from .base import BaseSource
-import pandas as pd
-import requests
-import logging
-from pprint import pprint
-import os
-import json
-
-
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
-
-class ZohoDeskSource(BaseSource):
-    def __init__(self,config):
-        self.config = config
-        self.token = None
-        self.url = None
-        self.grant_type = self.config['grant_type']
-        self.client_id = self.config['client_id']
-        self.client_secret = self.config['client_secret']
-        self.code = self.config['code']
-        self.accounts_url = self.config['accounts_url']
-        
-        
-    def flatten_dict(self, d, parent_key='', sep='_'):
-
-        items = []
-        for k, v in d.items():
-            new_key = f"{parent_key}{sep}{k}" if parent_key else k
-            if isinstance(v, dict):
-                items.extend(self.flatten_dict(v, new_key, sep=sep).items())
-            else:
-                items.append((new_key, v))
-        return dict(items)
-        
-        
-    def connect(self):
-        
-        data = {
-            "grant_type":self.grant_type,
-            "client_id": self.client_id,
-            "client_secret": self.client_secret,
-            "code": self.code
-        }
-        try:
-            if os.path.exists("token.json"):
-                with open("token.json",'r') as token_file:
-                    token_data = json.load(token_file)
-                    self.token = token_data.get("access_token")
-                    return self.token
-            else:  
-                response = requests.post(url=self.accounts_url, data=data)
-                logger.info(f"Status {response.status_code}")
-                with open("token.json", 'w') as token_file:
-                    json.dump({"access_token": response.json()["access_token"]}, token_file)
-   
-                logger.info("New token fetched and saved.")
-                tokens = response.json()["access_token"]
-                return tokens
-        except requests.exceptions.HTTPError as http_err:
-            logger.error(f"connection Error {http_err}")
-            
-        
-        
-    def fetch_data(self):
-        
-        self.token = self.connect()
-        if self.config['records'] == 'desk.agents':
-            logger.info("Agents \n")
-            self.url = f"https://desk.zoho.com/api/v1/agents"
-            
-            
-        elif self.config['records'] == 'desk.team':
-            logger.info("Teams \n")
-            self.url = f"https://desk.zoho.com/api/v1/teams"
-            headers = {"Authorization":f"Zoho-oauthtoken {self.token}"}
-        
-            response = requests.get(url=self.url,headers=headers).json()['teams']
-            
-            flattened_data = [self.flatten_dict(item) for item in response]
-            
-                    
-            df  = pd.DataFrame(flattened_data )
-            
-            logger.info(f" data \n {df}")
-            
-            return df
-            
-        
-             
-        elif self.config['records'] == 'desk.ticket':
-            logger.info("Ticket \n")
-            self.url = f"""https://desk.zoho.com/api/v1/tickets?include=contacts,
-            assignee,departments,team,isRead"""
-            
-        
-        elif self.config['records'] == 'desk.contacts':
-            logger.info("Contact \n")
-            self.url = f"https://desk.zoho.com/api/v1/contacts"
-            
-           
-                  
-       
-        
-        
-        
-
-
-
-
-
-
-
-
-

From d59de562bd7a3f16389e9d67b0aaadb59b0dae2e Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Tue, 20 Aug 2024 13:11:04 -0800
Subject: [PATCH 31/34] updated base module  import

---
 vector_etl/source_mods/airtable_loader.py |  4 +++
 vector_etl/source_mods/google_bigquery.py | 39 +++++++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 vector_etl/source_mods/google_bigquery.py

diff --git a/vector_etl/source_mods/airtable_loader.py b/vector_etl/source_mods/airtable_loader.py
index 35889c1..4eaf933 100644
--- a/vector_etl/source_mods/airtable_loader.py
+++ b/vector_etl/source_mods/airtable_loader.py
@@ -1,5 +1,9 @@
 import requests
 from .base import BaseSource
+<<<<<<< HEAD
+=======
+from pprint import pprint
+>>>>>>> f842a37 (updated base module  import)
 import pandas as pd
 import logging
 
diff --git a/vector_etl/source_mods/google_bigquery.py b/vector_etl/source_mods/google_bigquery.py
new file mode 100644
index 0000000..affa5f9
--- /dev/null
+++ b/vector_etl/source_mods/google_bigquery.py
@@ -0,0 +1,39 @@
+import os 
+from google.cloud import bigquery
+from .base import BaseSource
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class GoogleBigQuerySource(BaseSource):
+    def __init__(self,config):
+         self.config = config
+         self.client = None
+         self.connect()
+         
+            
+    def connect(self):
+        if self.config["db_type"] == 'google_bigquery':
+            os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = self.config['GOOGLE_APPLICATION_CREDENTIALS']
+            self.client = bigquery.Client()
+     
+    def fetch_data(self):
+        if self.client:
+            try:
+                query_job = self.client.query(f"""{self.config.get("query"," ")}""") 
+                if query_job:
+                    dfrows = query_job.result().to_dataframe() 
+                    return dfrows
+                else:
+                      logger.error(f"No data returned: {e}")
+                      return None
+            except Exception as e:
+                 logger.error(f"An error occurred: {e}")
+                 return None
+                
+                
+ 
+                
+

From 8e5dda327fde7dd7fbed2700e8fd2f9caf4b22c6 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Fri, 13 Sep 2024 15:52:33 -0800
Subject: [PATCH 32/34] remove secret key

---
 tests/test_source_mods.py          | 240 ++++++-----------------------
 vector_etl/source_mods/__init__.py |  21 ---
 2 files changed, 46 insertions(+), 215 deletions(-)

diff --git a/tests/test_source_mods.py b/tests/test_source_mods.py
index d9eb027..e81f594 100644
--- a/tests/test_source_mods.py
+++ b/tests/test_source_mods.py
@@ -5,16 +5,10 @@
 from vector_etl.source_mods.s3_loader import S3Source
 from vector_etl.source_mods.database_loader import DatabaseSource
 from vector_etl.source_mods.local_file import LocalFileSource
-from vector_etl.source_mods.google_bigquery import GoogleBigQuerySource
 from vector_etl.source_mods.airtable_loader import AirTableSource
 from vector_etl.source_mods.hubspot_loader import HubSpotSource
 from vector_etl.source_mods.intercom_loader import InterComSource
-from vector_etl.source_mods.paystack_loader import PayStackSource
-from vector_etl.source_mods.zoho_crm_loader import ZohoCrmSource
-from vector_etl.source_mods.zoho_desk_loader import ZohoDeskSource
-from vector_etl.source_mods.flutterwave_loader import FlutterWaveSource
-from vector_etl.source_mods.gmail_loader import GmailSource
-from vector_etl.source_mods.mailchimp_loader import MailChimpMarketingSource
+from vector_etl.source_mods.digital_ocean_spaces_loader import DigitalOceanSpaceSource
 
 @pytest.fixture
 def s3_config():
@@ -27,14 +21,21 @@ def s3_config():
         'chunk_overlap': 200
     }
     
+
 @pytest.fixture
-def google_bigquery_config():
+def digital_ocean_config():
     return {
-    "source_data_type": "Google BigQuery",
-    "google_application_credentials": "",
-     "query": "SELECT * FROM chipotle_stores LIMIT 10"
-        
+        'aws_access_key_id': 'test_key',
+        'endpoint_url':'text_endpoint',
+        'region_name':'text_region',
+        'aws_secret_access_key': 'test_secret',
+        'bucket_name': 'test_bucket',
+        'prefix': 'test_prefix/',
+        'chunk_size': 1000,
+        'chunk_overlap': 200
     }
+    
+
 
 
 @pytest.fixture
@@ -47,40 +48,7 @@ def airtable_config():
     }
     
 
-@pytest.fixture
-def gmail_config():
-    return {
-        'credentials': 'credentials.json', ## path to gmail crendtials
-        'gmail.label': 'IMPORTANT'  # Specify the label in the config
-    }
-    
-    
-    
-    
-@pytest.fixture
-def zohodesk_config():
-    return{
-            "grant_type":"",
-            "client_id": "",
-            "client_secret": "",
-            "code": "",
-            "limit":"",
-            "records":"desk.team",
-            "accounts_url":""
-        }
-
 
-@pytest.fixture
-def zohocrm_config():
-    return{
-            "grant_type":"",
-            "client_id": "",
-            "client_secret": "",
-            "code": "",
-            "per_page":"10",
-            "records":"module.Call",
-            "accounts_url":""
-        }
     
 
 @pytest.fixture
@@ -93,21 +61,6 @@ def hubspot_config():
         }
     
 
-@pytest.fixture
-def paystack_config():
-    return{
-            "paystack_secret_key":"",
-            "records": "paystack.transactions",
-        }
-    
-
-
-@pytest.fixture
-def flutterwave_config():
-    return{
-            "secret_key":"",
-            "records": "flutterwave.payout-subaccounts",
-        }
     
 @pytest.fixture
 def intercom_config():
@@ -140,13 +93,8 @@ def local_file_config():
     }
 
 
-@pytest.fixture
-def mailchimp_config():
-    return {
-        'api_key': 'test_key',
-        'server': 'test_secret',
-        'records': 'test_bucket',
-    }
+
+
     
 def test_s3_source_connect(s3_config):
     with patch('boto3.client') as mock_client:
@@ -210,23 +158,6 @@ def test_local_file_source_read_file(local_file_config):
         assert isinstance(file_content, BytesIO)
         
   
-def test_google_bigquery_connect(google_bigquery_config):
-    with patch('bigquery.connect') as  mock_connect:
-        source = GoogleBigQuerySource(google_bigquery_config)
-        source.connect()
-        mock_connect.assert_called_once_with(
-            source_data_type="Google BigQuery",
-    google_application_credentials="",
-     query="SELECT * FROM chipotle_stores LIMIT 10"
-        )
-        
-
-def test_google_bigquery_fetch_data(google_bigquery_config):
-      with patch('bigquery.connect') as  mock_connect:
-          mock_connect.result.to_dataframe.return_value = pd.DataFrame()
-          source =  GoogleBigQuerySource(google_bigquery_config)
-          df = source.fetch_data()
-          assert isinstance(df, pd.DataFrame)
 
 
 def test_airtable_connect(airtable_config):
@@ -257,77 +188,7 @@ def test_airtable_fetch_data(airtable_config):
 
           assert isinstance(df, pd.DataFrame)
           
-          
-def test_zohodesk_connect(zohodesk_config):
-    
-    with patch('requests.get') as  mock_connect:
-        source = ZohoDeskSource(zohodesk_config)
-        source.connect()
-        mock_connect.assert_called_once_with(
-        grant_type="",
-        client_id = "",
-        client_secret="",
-        code="",
-        accounts_url=""
-        ) 
-
 
-def test_zohodesk_fetch_data(zohodesk_config):
-      with patch('requests.get') as  mock_connect:
-          mock_connect.return_value = [ {
-            "Address": "333 Post St",
-            "Name": "Union Square",
-            "Visited": True
-        }
-        ]
-          
-          source =  ZohoDeskSource(zohodesk_config)
-          df = source.fetch_data()
-
-          assert isinstance(df, pd.DataFrame)
-          
-def test_zohocrm_connect(zohocrm_config):
-    
-    with patch('requests.get') as  mock_connect:
-        source = ZohoCrmSource(zohocrm_config)
-        source.connect()
-        mock_connect.assert_called_once_with(
-        grant_type="",
-        client_id = "",
-        client_secret="",
-        code="",
-        accounts_url=""
-        ) 
-
-
-def test_zohocrm_fetch_data(zohocrm_config):
-      with patch('requests.get') as  mock_connect:
-          mock_connect.return_value = [ {  }
-        ]
-          
-          source =  ZohoCrmSource(zohocrm_config)
-          df = source.fetch_data()
-
-          assert isinstance(df, pd.DataFrame)
-
-def test_paystack_connect(paystack_config):
-    
-    with patch('requests.get') as  mock_connect:
-        source = PayStackSource(paystack_config)
-        source.connect()
-        mock_connect.assert_called_once_with(
-        paystack_secret_key="",
-        ) 
-
-
-def test_paystack_fetch_data(paystack_config):
-      with patch('Paystack') as  mock_connect:
-          mock_connect.return_value = [{}]
-          
-          source =  PayStackSource(paystack_config)
-          df = source.fetch_data()
-
-          assert isinstance(df, pd.DataFrame)
           
 
 def test_intercom_connect(intercom_config):
@@ -340,78 +201,69 @@ def test_intercom_connect(intercom_config):
         )    
 
 
-def test_intercom_fetch_data(intercom_config):
+def test_hubspot_fetch_data(hubspot_config):
       with patch('requests.get') as  mock_connect:
           mock_connect.return_value = [{}]
           
-          source =  InterComSource(intercom_config)
+          source =  HubSpotSource(hubspot_config)
           df = source.fetch_data()
 
           assert isinstance(df, pd.DataFrame)
           
-          
 
-def test_flutterwave_connect(flutterwave_config):
+
+
+def test_hubspot_connect(hubspot_config):
     
     with patch('requests.get') as  mock_connect:
-        source =  FlutterWaveSource(flutterwave_config)
+        source = HubSpotSource(hubspot_config)
         source.connect()
         mock_connect.assert_called_once_with(
         secret_key="",
         )    
 
 
-def test_flutterwave_fetch_data(flutterwave_config):
+def test_intercom_fetch_data(intercom_config):
       with patch('requests.get') as  mock_connect:
           mock_connect.return_value = [{}]
           
-          source =  FlutterWaveSource(flutterwave_config)
+          source =  InterComSource(intercom_config)
           df = source.fetch_data()
 
           assert isinstance(df, pd.DataFrame)
           
+          
 
 
-
-def test_gmail_connect(gmail_config):
-    
-    with patch('InstalledAppFlow.from_client_secrets_file') as  mock_connect:
-        source =  GmailSource(gmail_config)
+def test_digital_ocean_source_connect(s3_config):
+    with patch('boto3.client') as mock_client:
+        source = DigitalOceanSpaceSource(s3_config)
         source.connect()
-        mock_connect.assert_called_once_with(
-        credentials="credential.json",
-        )    
+        mock_client.assert_called_once_with(
+            's3',
+            aws_access_key_id='test_key',
+            aws_secret_access_key='test_secret',
+            endpoint_url='text_endpoint',
+            region_name='text_region',
+        )
 
+def test_s3_digital_ocean_list_files(s3_config):
+    with patch('boto3.client') as mock_client:
+        mock_paginator = Mock()
+        mock_paginator.paginate.return_value = [
+            {'Contents': [{'Key': 'test_prefix/file1.csv'}, {'Key': 'test_prefix/file2.csv'}]}
+        ]
+        mock_client.return_value.get_paginator.return_value = mock_paginator
 
-def test_gmail_fetch_data(gmail_config):
-      with patch('requests.get') as  mock_connect:
-          mock_connect.return_value = [{}]
-          source =  GmailSource(gmail_config)
-          df = source.fetch_data()
+        source = DigitalOceanSpaceSource(s3_config)
+        source.connect()
+        files = source.list_files()
 
-          assert isinstance(df, pd.DataFrame)
+        assert files == ['test_prefix/file1.csv', 'test_prefix/file2.csv']
+          
           
           
 
 
-def test_mailchimp_connect(mailchimp_config):
-    
-    with patch('MailchimpMarketing.Client.set_config') as  mock_connect:
-        source =  MailChimpMarketingSource(mailchimp_config)
-        source.connect()
-        mock_connect.assert_called_once_with(
-        api_key="",
-        server=""
-        )    
-
-
-def test_mailchimp_fetch_data(mailchimp_config):
-      with patch('MailchimpMarketing.Client.set_config') as  mock_connect:
-          mock_connect.return_value = [{}]
-          source =   MailChimpMarketingSource(mailchimp_config)
-          df = source.fetch_data()
-
-          assert isinstance(df, pd.DataFrame)
-
 
 
diff --git a/vector_etl/source_mods/__init__.py b/vector_etl/source_mods/__init__.py
index c4687c4..5274afe 100644
--- a/vector_etl/source_mods/__init__.py
+++ b/vector_etl/source_mods/__init__.py
@@ -10,15 +10,8 @@
 from .google_cloud_storage import GoogleCloudStorageSource
 from .local_file import LocalFileSource
 from .airtable_loader import AirTableSource
-from .google_bigquery import GoogleBigQuerySource
 from .hubspot_loader import HubSpotSource
-from .zoho_crm_loader import ZohoCrmSource
-from .zoho_desk_loader import ZohoDeskSource
 from .intercom_loader import InterComSource
-from .paystack_loader import PayStackSource
-from .flutterwave_loader import FlutterWaveSource
-from .gmail_loader import GmailSource
-from .mailchimp_loader import MailChimpMarketingSource
 from .digital_ocean_spaces_loader import DigitalOceanSpaceSource
 
 def get_source_class(config):
@@ -43,24 +36,10 @@ def get_source_class(config):
         return GoogleCloudStorageSource(config)
     elif source_type == 'AirTable':
         return AirTableSource(config)
-    elif source_type == 'Google BigQuery':
-        return GoogleBigQuerySource(config)
     elif source_type == 'HubSpot':
         return HubSpotSource(config)
-    elif source_type == 'ZohoCrm':
-        return ZohoCrmSource(config)
-    elif source_type == 'ZohoDesk':
-        return ZohoDeskSource(config)
     elif source_type == "InterCom":
         return InterComSource(config)
-    elif source_type == 'PayStacks':
-        return PayStackSource(config)
-    elif source_type == "FlutterWave":
-        return FlutterWaveSource(config)
-    elif source_type == "Gmail":
-        return GmailSource(config)
-    elif source_type == "MailChimp":
-        return MailChimpMarketingSource(config)
     elif source_type == "DigitalOcean":
        return DigitalOceanSpaceSource(config)
     else:

From a3ff626f3a6517ceeebebb5bf8db6e232aa24cbd Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sun, 15 Sep 2024 12:51:03 -0800
Subject: [PATCH 33/34] accept incoming merged changes

---
 vector_etl/source_mods/airtable_loader.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/vector_etl/source_mods/airtable_loader.py b/vector_etl/source_mods/airtable_loader.py
index 4eaf933..0e14b58 100644
--- a/vector_etl/source_mods/airtable_loader.py
+++ b/vector_etl/source_mods/airtable_loader.py
@@ -1,9 +1,6 @@
 import requests
 from .base import BaseSource
-<<<<<<< HEAD
-=======
 from pprint import pprint
->>>>>>> f842a37 (updated base module  import)
 import pandas as pd
 import logging
 

From 9c048719972cca60c56c0ad5f9129a20fe508983 Mon Sep 17 00:00:00 2001
From: owolabi-develop <owolabidevelop84@gmail.com>
Date: Sun, 15 Sep 2024 13:08:16 -0800
Subject: [PATCH 34/34] added airtable, hubspot, digitalocean and intercom 
 sources sample yaml config

---
 README.md | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/README.md b/README.md
index ed2588f..1f2f76f 100644
--- a/README.md
+++ b/README.md
@@ -307,6 +307,46 @@ source:
   chunk_overlap: 0
 ```
 
+##### DigitalOcean Source
+```yaml
+source:
+  source_data_type: "DigitalOcean"
+  bucket_name: "my-bucket"
+  key: "path/to/files/"
+  file_type: ".csv"
+  region_name: 'your region'
+  endpoint_url: 'your endpointurl'
+  aws_access_key_id: "your-access-key"
+  aws_secret_access_key: "your-secret-key"
+```
+
+##### Airtable Source
+```yaml
+source:
+  source_data_type: "AirTable"
+  auth_token: ""
+  baseId: ""
+  tableIdOrName: ""
+```
+
+##### Hubspot Source
+```yaml
+source:
+  source_data_type: "HubSpot"
+  archive: "false"
+  limit: "100"
+  access_token: ""
+  crm_object: "crm.contacts"
+```
+
+##### Intercom Source
+```yaml
+source:
+  source_data_type: "InterCom"
+  token: ""
+  records: "intercom.teams"
+```
+
 #### Using Unstructured to process source files
 
 Starting from version 0.1.6.3, you can now add Unstructured as file processing API. Users can now utilize the [Unstructured's Serverless API](https://unstructured.io/api-key-hosted) to efficiently extract data from a multitude of file based sources.