From d80965bc02d8dccf6f123f9c94502fa45292fa69 Mon Sep 17 00:00:00 2001 From: Ashish Acharya Date: Fri, 1 Dec 2023 14:12:18 -0600 Subject: [PATCH 1/5] WIP add url counts page --- ...0040_collection_url_count_prod_and_more.py | 32 +++++ ...ollection_url_count_production_and_more.py | 22 ++++ sde_collections/models/collection.py | 113 ++++++++++++++++++ sde_collections/sinequa_api.py | 7 +- sde_collections/tasks.py | 22 ++++ sde_collections/templatetags/__init__.py | 0 .../templatetags/get_server_url.py | 8 ++ sde_collections/urls.py | 5 + sde_collections/views.py | 55 +++++++++ sde_indexing_helper/static/js/url_counts.js | 4 + .../url_counts_by_environment.html | 53 ++++++++ 11 files changed, 319 insertions(+), 2 deletions(-) create mode 100644 sde_collections/migrations/0040_collection_url_count_prod_and_more.py create mode 100644 sde_collections/migrations/0041_rename_url_count_prod_collection_url_count_production_and_more.py create mode 100644 sde_collections/templatetags/__init__.py create mode 100644 sde_collections/templatetags/get_server_url.py create mode 100644 sde_indexing_helper/static/js/url_counts.js create mode 100644 sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html diff --git a/sde_collections/migrations/0040_collection_url_count_prod_and_more.py b/sde_collections/migrations/0040_collection_url_count_prod_and_more.py new file mode 100644 index 00000000..c106f522 --- /dev/null +++ b/sde_collections/migrations/0040_collection_url_count_prod_and_more.py @@ -0,0 +1,32 @@ +# Generated by Django 4.2.6 on 2023-12-01 16:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("sde_collections", "0039_includepattern"), + ] + + operations = [ + migrations.AddField( + model_name="collection", + name="url_count_prod", + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name="collection", + name="url_count_secret_prod", + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name="collection", + name="url_count_secret_test", + field=models.IntegerField(default=0), + ), + migrations.AddField( + model_name="collection", + name="url_count_test", + field=models.IntegerField(default=0), + ), + ] diff --git a/sde_collections/migrations/0041_rename_url_count_prod_collection_url_count_production_and_more.py b/sde_collections/migrations/0041_rename_url_count_prod_collection_url_count_production_and_more.py new file mode 100644 index 00000000..719b04ac --- /dev/null +++ b/sde_collections/migrations/0041_rename_url_count_prod_collection_url_count_production_and_more.py @@ -0,0 +1,22 @@ +# Generated by Django 4.2.6 on 2023-12-01 17:40 + +from django.db import migrations + + +class Migration(migrations.Migration): + dependencies = [ + ("sde_collections", "0040_collection_url_count_prod_and_more"), + ] + + operations = [ + migrations.RenameField( + model_name="collection", + old_name="url_count_prod", + new_name="url_count_production", + ), + migrations.RenameField( + model_name="collection", + old_name="url_count_secret_prod", + new_name="url_count_secret_production", + ), + ] diff --git a/sde_collections/models/collection.py b/sde_collections/models/collection.py index f930f5cb..7b147b7a 100644 --- a/sde_collections/models/collection.py +++ b/sde_collections/models/collection.py @@ -8,6 +8,7 @@ from config_generation.db_to_xml import XmlEditor +from ..sinequa_api import server_configs from ..utils.github_helper import GitHubHandler from .collection_choice_fields import ( ConnectorChoices, @@ -19,6 +20,10 @@ WorkflowStatusChoices, ) +# from ..tasks import import_candidate_urls_counts_from_api +# from sde_collections.tasks import import_candidate_urls_counts_from_api + + User = get_user_model() @@ -94,6 +99,11 @@ class Collection(models.Model): ) curation_started = models.DateTimeField("Curation Started", null=True, blank=True) + url_count_test = models.IntegerField(default=0) + url_count_secret_test = models.IntegerField(default=0) + url_count_production = models.IntegerField(default=0) + url_count_secret_production = models.IntegerField(default=0) + class Meta: """Meta definition for Collection.""" @@ -154,6 +164,109 @@ def workflow_status_button_color(self) -> str: } return color_choices[self.workflow_status] + def get_server_url(self, server_name) -> str: + base_url = server_configs[server_name]["base_url"] + app_name = server_configs[server_name]["app_name"] + query_name = server_configs[server_name]["query_name"] + + if "secret" in server_name: + folder = "SDE" + else: + folder = "SMD" + + payload = { + "name": query_name, + "scope": "All", + "text": "", + "advanced": { + "collection": f"/{folder}/{self.config_folder}/", + }, + } + encoded_payload = urllib.parse.quote(json.dumps(payload)) + return f"{base_url}/app/{app_name}/#/search?query={encoded_payload}" + + # def fetch_url_counts(self): + # for collection in Collection.objects.all(): + # if not ( + # collection.url_count_test == 0 + # and collection.url_count_secret_test == 0 + # and collection.url_count_production == 0 + # and collection.url_count_secret_production == 0 + # ): + # continue + # collection_ids = [collection.id] + # server_names = [ + # "test", + # "secret_test", + # "production", + # "secret_production", + # ] + # for server_name in server_names: + # count = import_candidate_urls_counts_from_api( + # server_name, collection_ids + # ) + # setattr(collection, f"url_count_{server_name}", count) + # collection.save() + + # def fetch_url_counts(self): + # """Fetch the URL counts from the production webapp.""" + # ENVIRONMENTS = { + # "test": { + # "url": "https://sciencediscoveryengine.test.nasa.gov", + # "query": "query-smd-primary", + # "app": "nasa-sba-smd", + # "variable": "url_count_test", + # "folder": "SMD", + # }, + # "secret_test": { + # "url": "https://sciencediscoveryengine.test.nasa.gov", + # "query": "query-sde-primary", + # "app": "nasa-sba-sde", + # "variable": "url_count_secret_test", + # "folder": "SDE", + # }, + # "prod": { + # "url": "https://sciencediscoveryengine.nasa.gov", + # "query": "query-smd-primary", + # "app": "nasa-sba-smd", + # "variable": "url_count_prod", + # "folder": "SMD", + # }, + # "secret_prod": { + # "url": "https://sciencediscoveryengine.nasa.gov", + # "query": "query-sde-primary", + # "app": "nasa-sba-sde", + # "variable": "url_count_secret_prod", + # "folder": "SDE", + # }, + # } + + # totals = [] + + # for environment_name, environment_config in ENVIRONMENTS.items(): + # count = import_candidate_urls_counts_from_api( + # server_name=environment_name, collection_ids=[self.id] + # ) + + # # setattr(self, environment_config["variable"], response_json["total"]) + # totals.append(count) + # # self.save() + # return totals + + # from sde_collections.tasks import import_candidate_urls_counts_from_api + # for collection in Collection.objects.all(): + # collection_ids = [collection.id] + # server_names = [ + # "test", + # "secret_test", + # "production", + # "secret_production", + # ] + # for server_name in server_names: + # count = import_candidate_urls_counts_from_api(server_name, collection_ids) + # setattr(collection, f"url_count_{server_name}", count) + # collection.save() + def _process_exclude_list(self): """Process the exclude list.""" return [ diff --git a/sde_collections/sinequa_api.py b/sde_collections/sinequa_api.py index 00c59929..9a9d37e1 100644 --- a/sde_collections/sinequa_api.py +++ b/sde_collections/sinequa_api.py @@ -39,6 +39,7 @@ def __init__(self, server_name: str) -> None: self.app_name: str = server_configs[server_name]["app_name"] self.query_name: str = server_configs[server_name]["query_name"] self.base_url: str = server_configs[server_name]["base_url"] + self.folder: str = "SDE" if self.app_name == "nasa-sba-sde" else "SMD" def process_response(self, url: str, payload: dict[str, Any]) -> Any: response = requests.post(url, headers={}, json=payload, verify=False) @@ -63,8 +64,10 @@ def query(self, page: int, collection_config_folder: str = "") -> Any: }, } - if collection_config_folder: - payload["query"]["advanced"]["collection"] = f"/SDE/{collection_config_folder}/" + # if collection_config_folder: + payload["query"]["advanced"][ + "collection" + ] = f"/{self.folder}/{collection_config_folder}/" response = self.process_response(url, payload) diff --git a/sde_collections/tasks.py b/sde_collections/tasks.py index 761d92a1..b9c550a1 100644 --- a/sde_collections/tasks.py +++ b/sde_collections/tasks.py @@ -13,6 +13,10 @@ from .sinequa_api import Api from .utils.github_helper import GitHubHandler +# from django.apps import apps + +# Collection = apps.get_model("Collection") + def _get_data_to_import(collection, server_name): # ignore these because they are API collections and don't have URLs @@ -24,6 +28,13 @@ def _get_data_to_import(collection, server_name): "/SMD/CASEI_Platform/", "/SMD/CMR_API/", "/SMD/PDS_API_Legacy_All/", + "/SDE/ASTRO_NAVO_HEASARC/", + "/SDE/CASEI_Campaign/", + "/SDE/CASEI_Deployment/", + "/SDE/CASEI_Instrument/", + "/SDE/CASEI_Platform/", + "/SDE/CMR_API/", + "/SDE/PDS_API_Legacy_All/", ] data_to_import = [] @@ -97,6 +108,17 @@ def import_candidate_urls_from_api(server_name="test", collection_ids=[]): shutil.rmtree(TEMP_FOLDER_NAME) +@celery_app.task(soft_time_limit=10000) +def import_candidate_urls_counts_from_api(server_name, collection_ids=[]): + collections = Collection.objects.filter(id__in=collection_ids) + + for collection in collections: + data_to_import = _get_data_to_import( + server_name=server_name, collection=collection + ) + return len(data_to_import) + + @celery_app.task() def push_to_github_task(collection_ids): collections = Collection.objects.filter(id__in=collection_ids) diff --git a/sde_collections/templatetags/__init__.py b/sde_collections/templatetags/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/sde_collections/templatetags/get_server_url.py b/sde_collections/templatetags/get_server_url.py new file mode 100644 index 00000000..edc3c763 --- /dev/null +++ b/sde_collections/templatetags/get_server_url.py @@ -0,0 +1,8 @@ +from django import template + +register = template.Library() + + +@register.simple_tag +def get_server_url(collection, server_name): + return collection.get_server_url(server_name) diff --git a/sde_collections/urls.py b/sde_collections/urls.py index 3b709f58..61b71315 100644 --- a/sde_collections/urls.py +++ b/sde_collections/urls.py @@ -37,6 +37,11 @@ view=views.WebappGitHubConsolidationView.as_view(), name="consolidate_db_and_github_configs", ), + path( + "url-counts/", + view=views.URLCountView.as_view(), + name="consolidate_db_and_github_configs", + ), # List all CandidateURL instances: /candidate-urls/ # Retrieve a specific CandidateURL instance: /candidate-urls/{id}/ # Create a new CandidateURL instance: /candidate-urls/ diff --git a/sde_collections/views.py b/sde_collections/views.py index b73aa9a3..cc990643 100644 --- a/sde_collections/views.py +++ b/sde_collections/views.py @@ -393,3 +393,58 @@ def get_context_data(self, **kwargs): context["differences"] = self.data return context + + +class URLCountView(LoginRequiredMixin, ListView): + """ + Show the count of URLs on various systems + """ + + template_name = "sde_collections/url_counts_by_environment.html" + model = Collection + context_object_name = "collections" + + # def get(self, request, *args, **kwargs): + # if not request.GET.get("reindex") == "true": + # self.data = generate_db_github_metadata_differences() + # else: + # # this needs to be a celery task eventually + # self.data = generate_db_github_metadata_differences( + # reindex_configs_from_github=True + # ) + + # return super().get(request, *args, **kwargs) + + # def post(self, request, *args, **kwargs): + # config_folder = self.request.POST.get("config_folder") + # field = self.request.POST.get("field") + # new_value = self.request.POST.get("github_value") + + # if new_value and new_value != "None": + # new_value = new_value.strip() + # if field == "division": + # new_value = Divisions.lookup_by_text(new_value) + # elif field == "document_type": + # new_value = DocumentTypes.lookup_by_text(new_value) + # elif field == "connector": + # new_value = ConnectorChoices.lookup_by_text(new_value) + + # Collection.objects.filter(config_folder=config_folder).update( + # **{field: new_value} + # ) + # messages.success( + # request, f"Successfully updated {field} of {config_folder}." + # ) + # else: + # messages.error( + # request, + # f"Can't update empty value from GitHub: {field} of {config_folder}.", + # ) + + # return redirect("sde_collections:consolidate_db_and_github_configs") + + # def get_context_data(self, **kwargs): + # context = super().get_context_data(**kwargs) + # context["differences"] = self.data + + # return context diff --git a/sde_indexing_helper/static/js/url_counts.js b/sde_indexing_helper/static/js/url_counts.js new file mode 100644 index 00000000..6fdbba0d --- /dev/null +++ b/sde_indexing_helper/static/js/url_counts.js @@ -0,0 +1,4 @@ +let table = $('#url_counts_table').DataTable({ + "paging": false, + "stateSave": true, +}); diff --git a/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html b/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html new file mode 100644 index 00000000..09a19627 --- /dev/null +++ b/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html @@ -0,0 +1,53 @@ +{% extends "layouts/base.html" %} +{% load static %} +{% load i18n %} +{% load get_server_url %} +{% block title %}URL counts by environment{% endblock %} +{% block stylesheets %} + {{ block.super }} + +{% endblock stylesheets %} +{% block content %} + {% csrf_token %} +
+

URL counts by environment

+ + + + + + + + + + + + + + {% for collection in collections %} + + + + + + + + + {% endfor %} + +
FolderNameTestSecret TestProductionSecret Production
{{ collection.config_folder }}{{ collection.name }}{{ collection.url_count_test }}{{ collection.url_count_secret_test }}{{ collection.url_count_production }}{{ collection.url_count_secret_production }}
+{% endblock content %} +{% block javascripts %} + + + + + +{% endblock javascripts %} From ef0ce1570dab440e1e6eff09f143c239d5b8cbf0 Mon Sep 17 00:00:00 2001 From: Ashish Acharya Date: Fri, 1 Dec 2023 14:13:47 -0600 Subject: [PATCH 2/5] Make the header sticky --- sde_indexing_helper/static/js/url_counts.js | 1 + 1 file changed, 1 insertion(+) diff --git a/sde_indexing_helper/static/js/url_counts.js b/sde_indexing_helper/static/js/url_counts.js index 6fdbba0d..b609c7fa 100644 --- a/sde_indexing_helper/static/js/url_counts.js +++ b/sde_indexing_helper/static/js/url_counts.js @@ -1,4 +1,5 @@ let table = $('#url_counts_table').DataTable({ "paging": false, "stateSave": true, + "fixedHeader": true, }); From efab5fa871f861af363a91e36c0ec88465e27ef1 Mon Sep 17 00:00:00 2001 From: Ashish Acharya Date: Mon, 4 Dec 2023 21:26:53 -0600 Subject: [PATCH 3/5] WIP: url counts page in progress --- sde_indexing_helper/static/js/url_counts.js | 102 +++++++++++++++++- .../url_counts_by_environment.html | 10 ++ 2 files changed, 108 insertions(+), 4 deletions(-) diff --git a/sde_indexing_helper/static/js/url_counts.js b/sde_indexing_helper/static/js/url_counts.js index b609c7fa..e4b15b90 100644 --- a/sde_indexing_helper/static/js/url_counts.js +++ b/sde_indexing_helper/static/js/url_counts.js @@ -1,5 +1,99 @@ -let table = $('#url_counts_table').DataTable({ - "paging": false, - "stateSave": true, - "fixedHeader": true, +$(document).ready(function () { + let table = $('#url_counts_table').DataTable({ + initComplete: function (settings, json) { + // calculate the sum when table is first created: + doSum(); + }, + "paging": false, + "stateSave": true, + "fixedHeader": true, + }); + + $('#url_counts_table').on('draw.dt', function () { + // re-calculate the sum whenever the table is re-displayed: + doSum(); + }); + + // This provides the sum of all records: + function doSum() { + // get the DataTables API object: + var table = $('#url_counts_table').DataTable(); + // set up the initial (unsummed) data array for the footer row: + var totals = ['Totals', '', 0, 0, 0, 0]; + // iterate all rows - use table.rows( {search: 'applied'} ).data() + // if you want to sum only filtered (visible) rows: + totals = table.rows().data() + // sum the amounts: + .reduce(function (sum, record) { + for (let i = 2; i <= 8; i++) { + sum[i] = sum[i] + numberFromString(record[i]); + } + return sum; + }, totals); + // place the sum in the relevant footer cell: + for (let i = 1; i <= 8; i++) { + var column = table.column(i); + $(column.footer()).html(formatNumber(totals[i])); + } + } + + function numberFromString(s) { + // Check if the input is a string + if (typeof s === 'string') { + // Create a temporary div element + var tempDiv = document.createElement('div'); + // Set the inner HTML of the div to the input string + tempDiv.innerHTML = s; + // Extract the text content from the div + var text = tempDiv.textContent || tempDiv.innerText || ""; + + // Remove any non-numeric characters (except for the decimal point) + return text.replace(/[^\d.-]/g, '') * 1; + } else if (typeof s === 'number') { + // If it's already a number, return it as is + return s; + } else { + // If the input is neither a string nor a number, return 0 + return 0; + } + } + + function formatNumber(n) { + return n.toLocaleString(); // or whatever you prefer here + } + }); + +// let table = $('#url_counts_table').DataTable({ +// "paging": false, +// "stateSave": true, +// "fixedHeader": true, +// initComplete: function (settings, json) { +// // calculate the sum when table is first created: +// doSum(); +// } + +// $('#url_counts_table').on('draw.dt', function () { +// // re-calculate the sum whenever the table is re-displayed: +// doSum(); +// }) + + +// // "footerCallback": function (row, data, start, end, display) { +// // var api = this.api(); + +// // // Calculate the total for the first column in the displayed data +// // var total = api +// // .column(2, { page: 'current' }) +// // .data() +// // .reduce(function (a, b) { +// // return a + b; +// // }, 0); + +// // // Update the footer +// // $(api.column(2).footer()).html(total); +// // $(api.column(3).footer()).html(total); +// // $(api.column(4).footer()).html(total); +// // $(api.column(5).footer()).html(total); +// // } +// }); diff --git a/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html b/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html index 09a19627..8b2c7b48 100644 --- a/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html +++ b/sde_indexing_helper/templates/sde_collections/url_counts_by_environment.html @@ -36,6 +36,16 @@

URL counts by environment