diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index 7726aed..d8d33c5 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -53,6 +53,8 @@ amount of time for the query to complete will not be cleared after the query is finished. By default, this information will be displayed but will be cleared after the query is finished. + * ``--graph`` (Optional[line argument]): + Visualizes the query result as a graph. * ``--use_geodataframe `` (Optional[line argument]): Return the query result as a geopandas.GeoDataFrame. If present, the argument that follows the ``--use_geodataframe`` flag @@ -61,7 +63,6 @@ See geopandas.GeoDataFrame for details. The Coordinate Reference System will be set to “EPSG:4326”. - * ``--params `` (Optional[line argument]): If present, the argument following the ``--params`` flag must be either: @@ -97,14 +98,15 @@ import ast from concurrent import futures import copy +import json import re import sys +import threading import time from typing import Any, List, Tuple import warnings import IPython # type: ignore -from IPython import display # type: ignore from IPython.core import magic_arguments # type: ignore from IPython.core.getipython import get_ipython from google.api_core import client_info @@ -114,10 +116,12 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.job import QueryJobConfig +import pandas from bigquery_magics import line_arg_parser as lap import bigquery_magics._versions_helpers import bigquery_magics.config +import bigquery_magics.graph_server as graph_server import bigquery_magics.line_arg_parser.exceptions import bigquery_magics.version @@ -391,6 +395,12 @@ def _create_dataset_if_necessary(client, dataset_id): "Defaults to engine set in the query setting in console." ), ) +@magic_arguments.argument( + "--graph", + action="store_true", + default=False, + help=("Visualizes the query results as a graph"), +) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -425,7 +435,7 @@ def _cell_magic(line, query): def _parse_magic_args(line: str) -> Tuple[List[Any], Any]: # The built-in parser does not recognize Python structures such as dicts, thus - # we extract the "--params" option and inteprpret it separately. + # we extract the "--params" option and interpret it separately. try: params_option_value, rest_of_args = _split_args_line(line) @@ -586,6 +596,72 @@ def _handle_result(result, args): return result +def _is_colab() -> bool: + """Check if code is running in Google Colab""" + try: + import google.colab # noqa: F401 + + return True + except ImportError: + return False + + +def _colab_callback(query: str, params: str): + return IPython.core.display.JSON( + graph_server.convert_graph_data(query_results=json.loads(params)) + ) + + +singleton_server_thread: threading.Thread = None + + +def _add_graph_widget(query_result): + try: + from spanner_graphs.graph_visualization import generate_visualization_html + except ImportError as err: + customized_error = ImportError( + "Use of --graph requires the spanner-graph-notebook package to be installed. Install it with `pip install 'bigquery-magics[spanner-graph-notebook]'`." + ) + raise customized_error from err + + # In Jupyter, create an http server to be invoked from the Javascript to populate the + # visualizer widget. In colab, we are not able to create an http server on a + # background thread, so we use a special colab-specific api to register a callback, + # to be invoked from Javascript. + if _is_colab(): + from google.colab import output + + output.register_callback("graph_visualization.Query", _colab_callback) + else: + global singleton_server_thread + alive = singleton_server_thread and singleton_server_thread.is_alive() + if not alive: + singleton_server_thread = graph_server.graph_server.init() + + # Create html to invoke the graph server + html_content = generate_visualization_html( + query="placeholder query", + port=graph_server.graph_server.port, + params=query_result.to_json().replace("\\", "\\\\").replace('"', '\\"'), + ) + IPython.display.display(IPython.core.display.HTML(html_content)) + + +def _is_valid_json(s: str): + try: + json.loads(s) + return True + except (json.JSONDecodeError, TypeError): + return False + + +def _supports_graph_widget(query_result: pandas.DataFrame): + num_rows, num_columns = query_result.shape + if num_columns != 1: + return False + return query_result[query_result.columns[0]].apply(_is_valid_json).all() + + def _make_bq_query( query: str, args: Any, @@ -634,7 +710,7 @@ def _make_bq_query( return if not args.verbose: - display.clear_output() + IPython.display.clear_output() if args.dry_run: # TODO(tswast): Use _handle_result() here, too, but perhaps change the @@ -671,6 +747,8 @@ def _make_bq_query( else: result = result.to_dataframe(**dataframe_kwargs) + if args.graph and _supports_graph_widget(result): + _add_graph_widget(result) return _handle_result(result, args) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py new file mode 100644 index 0000000..7c55279 --- /dev/null +++ b/bigquery_magics/graph_server.py @@ -0,0 +1,248 @@ +# Copyright 2024 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import atexit +import http.server +import json +import socketserver +import threading +from typing import Dict, List + + +def convert_graph_data(query_results: Dict[str, Dict[str, str]]): + """ + Converts graph data to the form expected by the visualization framework. + + Receives graph data as a dictionary, produced by converting the underlying + DataFrame representing the query results into JSON, then into a + python dictionary. Converts it into a form expected by the visualization + framework. + + Args: + query_results: + A dictionary with one key/value pair per column. For each column: + - The key is the name of the column (str) + - The value is another dictionary with one key/value pair per row. + Row each row: + - The key is a string that specifies the integer index of the row + (e.g. '0', '1', '2') + - The value is a JSON string containing the result of the query + for the current row/column. (Note: We only support graph + visualization for columns of type JSON). + """ + # Delay spanner imports until this function is called to avoid making + # spanner-graph-notebook (and its dependencies) hard requirements for bigquery + # magics users, who don't need graph visualization. + # + # Note that these imports do not need to be in a try/except, as this function + # does not even get called unless spanner_graphs has already been confirmed + # to exist upstream. + from google.cloud.spanner_v1.types import StructType, Type, TypeCode + import networkx + from spanner_graphs.conversion import ( + columns_to_native_numpy, + prepare_data_for_graphing, + ) + + try: + column_name = None + column_value = None + for key, value in query_results.items(): + if column_name is None: + if not isinstance(key, str): + raise ValueError(f"Expected outer key to be str, got {type(key)}") + if not isinstance(value, dict): + raise ValueError( + f"Expected outer value to be dict, got {type(value)}" + ) + column_name = key + column_value = value + else: + # TODO: Implement multi-column support. + raise ValueError( + "Query has multiple columns - graph visualization not supported" + ) + if column_name is None or column_value is None: + raise ValueError( + "query result with no columns is not supported for graph visualization" + ) + + fields: List[StructType.Field] = [ + StructType.Field(name=column_name, type=Type(code=TypeCode.JSON)) + ] + data = {column_name: []} + rows = [] + for value_key, value_value in column_value.items(): + if not isinstance(value_key, str): + raise ValueError(f"Expected inner key to be str, got {type(value_key)}") + if not isinstance(value_value, str): + raise ValueError( + f"Expected inner value to be str, got {type(value_value)}" + ) + row_json = json.loads(value_value) + + if row_json is not None: + data[column_name].append(row_json) + rows.append([row_json]) + + d, ignored_columns = columns_to_native_numpy(data, fields) + + graph: networkx.classes.DiGraph = prepare_data_for_graphing( + incoming=d, schema_json=None + ) + + nodes = [] + for node_id, node in graph.nodes(data=True): + nodes.append(node) + + edges = [] + for from_id, to_id, edge in graph.edges(data=True): + edges.append(edge) + + return { + "response": { + "nodes": nodes, + "edges": edges, + "schema": None, + "rows": rows, + "query_result": data, + } + } + except Exception as e: + return {"error": getattr(e, "message", str(e))} + + +class GraphServer: + """ + Http server invoked by Javascript to obtain the query results for visualization. + + The server is invoked by Javascript, generated as part of + spanner_graphs.graph_visualization.generate_visualization_html(). + + This server is used only in Jupyter; in colab, google.colab.output.register_callback() + is used instead. + """ + + host = "http://localhost" + endpoints = { + "get_ping": "/get_ping", + "post_ping": "/post_ping", + "post_query": "/post_query", + } + + def __init__(self): + self.port = None + self.url = None + self._server = None + + def build_route(self, endpoint): + """ + Returns a url for connecting to the given endpoint. + Supported values include: + - "get_ping": sends a GET request to ping the server. + - "post_ping": sends a POST request to ping the server. + - "post_query": sends a POST request to obtain query results. + """ + return f"{self.url}{endpoint}" + + def _start_server(self): + class ThreadedTCPServer(socketserver.TCPServer): + # Allow socket reuse to avoid "Address already in use" errors + allow_reuse_address = True + # Daemon threads automatically terminate when the main program exits + daemon_threads = True + + with ThreadedTCPServer(("", self.port), GraphServerHandler) as httpd: + self._server = httpd + self._server.serve_forever() + + def init(self): + """ + Starts the HTTP server. The server runs forever, until stop_server() is called. + """ + import portpicker + + self.port = portpicker.pick_unused_port() + self.url = f"{GraphServer.host}:{self.port}" + + server_thread = threading.Thread(target=self._start_server) + server_thread.start() + return server_thread + + def stop_server(self): + """ + Starts the HTTP server, if it is currently running. + """ + if self._server: + self._server.shutdown() + print("BigQuery-magics graph server shutting down...") + self._server = None + + +global graph_server +graph_server = GraphServer() + + +class GraphServerHandler(http.server.SimpleHTTPRequestHandler): + """ + Handles HTTP requests send to the graph server. + """ + + def log_message(self, format, *args): + pass + + def do_json_response(self, data): + self.send_response(200) + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header("Content-type", "application/json") + self.send_header("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS") + self.end_headers() + self.wfile.write(json.dumps(data).encode()) + + def do_message_response(self, message): + self.do_json_response({"message": message}) + + def do_data_response(self, data): + self.do_json_response(data) + + def parse_post_data(self): + content_length = int(self.headers["Content-Length"]) + post_data = self.rfile.read(content_length).decode("utf-8") + return json.loads(post_data) + + def handle_get_ping(self): + self.do_message_response("pong") + + def handle_post_ping(self): + data = self.parse_post_data() + self.do_data_response({"your_request": data}) + + def handle_post_query(self): + data = self.parse_post_data() + response = convert_graph_data(query_results=json.loads(data["params"])) + self.do_data_response(response) + + def do_GET(self): + assert self.path == GraphServer.endpoints["get_ping"] + self.handle_get_ping() + + def do_POST(self): + if self.path == GraphServer.endpoints["post_ping"]: + self.handle_post_ping() + else: + assert self.path == GraphServer.endpoints["post_query"] + self.handle_post_query() + + +atexit.register(graph_server.stop_server) diff --git a/noxfile.py b/noxfile.py index 7c02269..562dd6b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -66,7 +66,10 @@ "geopandas", ], "3.11": [], - "3.12": [], + "3.12": [ + "bqstorage", + "spanner-graph-notebook", + ], "3.13": [ "bqstorage", "bigframes", @@ -102,7 +105,10 @@ "geopandas", ], "3.11": [], - "3.12": [], + "3.12": [ + "bqstorage", + "spanner-graph-notebook", + ], "3.13": [ "bqstorage", "bigframes", diff --git a/owlbot.py b/owlbot.py index bbbc590..4347527 100644 --- a/owlbot.py +++ b/owlbot.py @@ -30,6 +30,7 @@ extras_storage = ["bqstorage"] extras_bf = ["bqstorage", "bigframes", "geopandas"] +extras_spanner = ["spanner-graph-notebook"] extras_by_python = { "3.7": extras_storage, "3.8": extras_storage, @@ -37,7 +38,7 @@ "3.10": extras_bf, # Use a middle version of Python to test when no extras are installed. "3.11": [], - "3.12": [], + "3.12": extras_storage + extras_spanner, "3.13": extras_bf, } templated_files = common.py_library( diff --git a/setup.py b/setup.py index d62b227..8311429 100644 --- a/setup.py +++ b/setup.py @@ -56,6 +56,11 @@ ], "bigframes": ["bigframes >= 1.17.0"], "geopandas": ["geopandas >= 1.0.1"], + "spanner-graph-notebook": [ + "spanner-graph-notebook >= 1.1.1, <=1.1.1", + "networkx", + "portpicker", + ], } all_extras = [] diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 70047ff..0ab9685 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -15,7 +15,9 @@ from concurrent import futures import contextlib import copy +import json import re +import sys from unittest import mock import warnings @@ -48,6 +50,11 @@ except ImportError: bpd = None +try: + import spanner_graphs.graph_visualization as graph_visualization +except ImportError: + graph_visualization = None + try: import geopandas as gpd except ImportError: @@ -463,6 +470,492 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): assert list(return_value) == list(result) # verify column names +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is not None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present", +) +def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + + sql = "SELECT 3 AS result" + result = pandas.DataFrame(["abc"], columns=["s"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), display_patch as display_mock: + run_query_mock.return_value = query_job_mock + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + # Since the query result is not valid JSON, the visualizer should not be displayed. + display_mock.assert_not_called() + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", +) +def test_bigquery_graph_int_result(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + + sql = "SELECT 3 AS result" + result = pandas.DataFrame(["abc"], columns=["s"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), display_patch as display_mock: + run_query_mock.return_value = query_job_mock + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + # Since the query result is not valid JSON, the visualizer should not be displayed. + display_mock.assert_not_called() + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", +) +def test_bigquery_graph_str_result(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + + sql = "SELECT 'abc' AS s" + result = pandas.DataFrame(["abc"], columns=["s"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), display_patch as display_mock: + run_query_mock.return_value = query_job_mock + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + # Since the query result is not valid JSON, the visualizer should not be displayed. + display_mock.assert_not_called() + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", +) +def test_bigquery_graph_json_json_result(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + + sql = "SELECT graph_json, graph_json AS graph_json2 FROM t" + graph_json_rows = [ + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI=","kind":"node","labels":["Person"],"properties":{"birthday":"1991-12-21T08:00:00Z","city":"Adelaide","country":"Australia","id":1,"name":"Alex"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==","kind":"edge","labels":["Owns"],"properties":{"account_id":7,"create_time":"2020-01-10T14:22:20.222Z","id":1},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-10T14:22:20.222Z","id":7,"is_blocked":false,"nick_name":"Vacation Fund"}}] + """, + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY=","kind":"node","labels":["Person"],"properties":{"birthday":"1986-12-07T08:00:00Z","city":"Kollam","country":"India","id":3,"name":"Lee"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==","kind":"edge","labels":["Owns"],"properties":{"account_id":16,"create_time":"2020-02-18T13:44:20.655Z","id":3},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-28T01:55:09.206Z","id":16,"is_blocked":true,"nick_name":"Vacation Fund"}}] + """, + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ=","kind":"node","labels":["Person"],"properties":{"birthday":"1980-10-31T08:00:00Z","city":"Moravia","country":"Czech_Republic","id":2,"name":"Dana"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEEkSiZRmluR3JhcGguUGVyc29uAHiRBJlGaW5HcmFwaC5BY2NvdW50AHiRKA==","kind":"edge","labels":["Owns"],"properties":{"account_id":20,"create_time":"2020-01-28T01:55:09.206Z","id":2},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","kind":"node","labels":["Account"],"properties":{"create_time":"2020-02-18T13:44:20.655Z","id":20,"is_blocked":false,"nick_name":"Rainy Day Fund"}}] + """, + ] + result = pandas.DataFrame( + {"graph_json": graph_json_rows, "graph_json2": graph_json_rows}, + columns=["graph_json", "graph_json2"], + ) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), display_patch as display_mock: + run_query_mock.return_value = query_job_mock + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + # As we only support visualization with single-column queries, the visualizer should not be launched. + display_mock.assert_not_called() + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", +) +def test_bigquery_graph_json_result(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + + sql = "SELECT graph_json FROM t" + graph_json_rows = [ + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI=","kind":"node","labels":["Person"],"properties":{"birthday":"1991-12-21T08:00:00Z","city":"Adelaide","country":"Australia","id":1,"name":"Alex"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==","kind":"edge","labels":["Owns"],"properties":{"account_id":7,"create_time":"2020-01-10T14:22:20.222Z","id":1},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-10T14:22:20.222Z","id":7,"is_blocked":false,"nick_name":"Vacation Fund"}}] + """, + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY=","kind":"node","labels":["Person"],"properties":{"birthday":"1986-12-07T08:00:00Z","city":"Kollam","country":"India","id":3,"name":"Lee"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==","kind":"edge","labels":["Owns"],"properties":{"account_id":16,"create_time":"2020-02-18T13:44:20.655Z","id":3},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-28T01:55:09.206Z","id":16,"is_blocked":true,"nick_name":"Vacation Fund"}}] + """, + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ=","kind":"node","labels":["Person"],"properties":{"birthday":"1980-10-31T08:00:00Z","city":"Moravia","country":"Czech_Republic","id":2,"name":"Dana"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEEkSiZRmluR3JhcGguUGVyc29uAHiRBJlGaW5HcmFwaC5BY2NvdW50AHiRKA==","kind":"edge","labels":["Owns"],"properties":{"account_id":20,"create_time":"2020-01-28T01:55:09.206Z","id":2},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","kind":"node","labels":["Account"],"properties":{"create_time":"2020-02-18T13:44:20.655Z","id":20,"is_blocked":false,"nick_name":"Rainy Day Fund"}}] + """, + ] + result = pandas.DataFrame(graph_json_rows, columns=["graph_json"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + graph_server_init_patch = mock.patch( + "bigquery_magics.graph_server.GraphServer.init", autospec=True + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + graph_server_init_mock.return_value = mock.Mock() + graph_server_init_mock.return_value.is_alive = mock.Mock() + graph_server_init_mock.return_value.is_alive.return_value = True + run_query_mock.return_value = query_job_mock + + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + assert len(display_mock.call_args_list) == 1 + assert len(display_mock.call_args_list[0]) == 2 + + # Sanity check that the HTML content looks like graph visualization. Minimal check + # to allow Spanner to change its implementation without breaking this test. + html_content = display_mock.call_args_list[0][0][0].data + assert "" in html_content + # Verify that the query results are embedded into the HTML, allowing them to be visualized. + # Due to escaping, it is not possible check for graph_json_rows exactly, so we check for a few + # sentinel strings within the query results, instead. + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQI=" in html_content + ) # identifier in 1st row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQY=" in html_content + ) # identifier in 2nd row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQQ=" in html_content + ) # identifier in 3rd row of query result + + # Make sure we can run a second graph query, after the graph server is already running. + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + # Sanity check that the HTML content looks like graph visualization. Minimal check + # to allow Spanner to change its implementation without breaking this test. + html_content = display_mock.call_args_list[0][0][0].data + assert "" in html_content + # Verify that the query results are embedded into the HTML, allowing them to be visualized. + # Due to escaping, it is not possible check for graph_json_rows exactly, so we check for a few + # sentinel strings within the query results, instead. + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQI=" in html_content + ) # identifier in 1st row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQY=" in html_content + ) # identifier in 2nd row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQQ=" in html_content + ) # identifier in 3rd row of query result + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", +) +def test_bigquery_graph_colab(monkeypatch): + # Mock the colab module so the code under test uses colab.register_callback(), rather than + # GraphServer. + sys.modules["google.colab"] = mock.Mock() + + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + + sql = "SELECT graph_json FROM t" + graph_json_rows = [ + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI=","kind":"node","labels":["Person"],"properties":{"birthday":"1991-12-21T08:00:00Z","city":"Adelaide","country":"Australia","id":1,"name":"Alex"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==","kind":"edge","labels":["Owns"],"properties":{"account_id":7,"create_time":"2020-01-10T14:22:20.222Z","id":1},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-10T14:22:20.222Z","id":7,"is_blocked":false,"nick_name":"Vacation Fund"}}] + """, + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY=","kind":"node","labels":["Person"],"properties":{"birthday":"1986-12-07T08:00:00Z","city":"Kollam","country":"India","id":3,"name":"Lee"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==","kind":"edge","labels":["Owns"],"properties":{"account_id":16,"create_time":"2020-02-18T13:44:20.655Z","id":3},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-28T01:55:09.206Z","id":16,"is_blocked":true,"nick_name":"Vacation Fund"}}] + """, + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ=","kind":"node","labels":["Person"],"properties":{"birthday":"1980-10-31T08:00:00Z","city":"Moravia","country":"Czech_Republic","id":2,"name":"Dana"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEEkSiZRmluR3JhcGguUGVyc29uAHiRBJlGaW5HcmFwaC5BY2NvdW50AHiRKA==","kind":"edge","labels":["Owns"],"properties":{"account_id":20,"create_time":"2020-01-28T01:55:09.206Z","id":2},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","kind":"node","labels":["Account"],"properties":{"create_time":"2020-02-18T13:44:20.655Z","id":20,"is_blocked":false,"nick_name":"Rainy Day Fund"}}] + """, + ] + result = pandas.DataFrame(graph_json_rows, columns=["graph_json"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + graph_server_init_patch = mock.patch( + "bigquery_magics.graph_server.GraphServer.init", autospec=True + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + run_query_mock.return_value = query_job_mock + graph_server_init_mock.return_value = None + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + assert len(display_mock.call_args_list) == 1 + assert len(display_mock.call_args_list[0]) == 2 + + # Sanity check that the HTML content looks like graph visualization. Minimal check + # to allow Spanner to change its implementation without breaking this test. + html_content = display_mock.call_args_list[0][0][0].data + assert "" in html_content + # Verify that the query results are embedded into the HTML, allowing them to be visualized. + # Due to escaping, it is not possible check for graph_json_rows exactly, so we check for a few + # sentinel strings within the query results, instead. + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQI=" in html_content + ) # identifier in 1st row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQY=" in html_content + ) # identifier in 2nd row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQQ=" in html_content + ) # identifier in 3rd row of query result + + # Make sure we actually used colab path, not GraphServer path. + assert sys.modules["google.colab"].output.register_callback.called + assert not graph_server_init_mock.called + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", +) +def test_colab_callback(): + result = bigquery_magics.bigquery._colab_callback( + "query", json.dumps({"result": {}}) + ) + assert result.data == { + "response": { + "edges": [], + "nodes": [], + "query_result": {"result": []}, + "rows": [], + "schema": None, + } + } + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is not None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present", +) +def test_bigquery_graph_missing_spanner_deps(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + sql = "SELECT graph_json FROM t" + result = pandas.DataFrame([], columns=["graph_json"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + graph_server_init_patch = mock.patch( + "bigquery_magics.graph_server.GraphServer.init", autospec=True + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + run_query_mock.return_value = query_job_mock + graph_server_init_mock.return_value = None + with pytest.raises(ImportError): + ip.run_cell_magic("bigquery", "--graph", sql) + display_mock.assert_not_called() + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_default_connection_user_agent(): ip = IPython.get_ipython() @@ -545,7 +1038,7 @@ def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): ) clear_patch = mock.patch( - "bigquery_magics.bigquery.display.clear_output", + "bigquery_magics.bigquery.IPython.display.clear_output", autospec=True, ) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) @@ -564,7 +1057,7 @@ def test_bigquery_magic_clears_display_in_non_verbose_mode(): ) clear_patch = mock.patch( - "bigquery_magics.bigquery.display.clear_output", + "bigquery_magics.bigquery.IPython.display.clear_output", autospec=True, ) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py new file mode 100644 index 0000000..d4100c4 --- /dev/null +++ b/tests/unit/test_graph_server.py @@ -0,0 +1,419 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import unittest + +import pytest +import requests + +try: + import spanner_graphs.graph_visualization as graph_visualization +except ImportError: + graph_visualization = None + +import bigquery_magics.graph_server as graph_server + +alex_properties = { + "birthday": "1991-12-21T08:00:00Z", + "id": 1, + "city": "Adelaide", + "country": "Australia", + "name": "Alex", +} + +alex_account_properties = { + "create_time": "2020-01-10T14:22:20.222Z", + "id": 7, + "is_blocked": False, + "nick_name": "Vacation Fund", +} + +alex_owns_account_edge_properites = { + "account_id": 7, + "create_time": "2020-01-10T14:22:20.222Z", + "id": 1, +} + +row_alex_owns_account = [ + { + "identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQI=", + "kind": "node", + "labels": ["Person"], + "properties": alex_properties, + }, + { + "destination_node_identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEO", + "identifier": "mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==", + "kind": "edge", + "labels": ["Owns"], + "properties": alex_owns_account_edge_properites, + "source_node_identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQI=", + }, + { + "identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEO", + "kind": "node", + "labels": ["Account"], + "properties": alex_account_properties, + }, +] + +lee_properties = { + "birthday": "1986-12-07T08:00:00Z", + "city": "Kollam", + "country": "India", + "id": 3, + "name": "Lee", +} + +lee_account_properties = { + "create_time": "2020-01-28T01:55:09.206Z", + "id": 16, + "is_blocked": True, + "nick_name": "Vacation Fund", +} + +lee_owns_account_edge_properties = { + "account_id": 16, + "create_time": "2020-02-18T13:44:20.655Z", + "id": 3, +} + +row_lee_owns_account = [ + { + "identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQY=", + "kind": "node", + "labels": ["Person"], + "properties": lee_properties, + }, + { + "destination_node_identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEg", + "identifier": "mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==", + "kind": "edge", + "labels": ["Owns"], + "properties": lee_owns_account_edge_properties, + "source_node_identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQY=", + }, + { + "identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEg", + "kind": "node", + "labels": ["Account"], + "properties": lee_account_properties, + }, +] + + +def _validate_nodes_and_edges(result): + for edge in result["response"]["edges"]: + assert "id" in edge + assert edge["label"] == "Owns" + assert "source" in edge + assert "target" in edge + assert "properties" in edge + + for node in result["response"]["nodes"]: + assert "id" in node + assert "key_property_names" in node + assert node["label"] in ("Account", "Person") + assert "properties" in node + assert "value" in node + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_one_column_no_rows(): + result = graph_server.convert_graph_data({"result": {}}) + assert result == { + "response": { + "edges": [], + "nodes": [], + "query_result": {"result": []}, + "rows": [], + "schema": None, + } + } + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_one_column_one_row_one_column(): + result = graph_server.convert_graph_data( + { + "result": { + "0": json.dumps(row_alex_owns_account), + } + } + ) + + assert len(result["response"]["nodes"]) == 2 + assert len(result["response"]["edges"]) == 1 + + _validate_nodes_and_edges(result) + + assert result["response"]["query_result"] == {"result": [row_alex_owns_account]} + assert result["response"]["rows"] == [[row_alex_owns_account]] + assert result["response"]["schema"] is None + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_one_column_one_row_one_column_null_json(): + result = graph_server.convert_graph_data( + { + "result": { + "0": json.dumps(None), + } + } + ) + + assert result == { + "response": { + "edges": [], + "nodes": [], + "query_result": {"result": []}, + "rows": [ + [ + None, + ] + ], + "schema": None, + }, + } + + _validate_nodes_and_edges(result) + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_one_column_two_rows(): + result = graph_server.convert_graph_data( + { + "result": { + "0": json.dumps(row_alex_owns_account), + "1": json.dumps(row_lee_owns_account), + } + } + ) + + assert len(result["response"]["nodes"]) == 4 + assert len(result["response"]["edges"]) == 2 + + _validate_nodes_and_edges(result) + + assert result["response"]["query_result"] == { + "result": [row_alex_owns_account, row_lee_owns_account] + } + assert result["response"]["rows"] == [ + [row_alex_owns_account], + [row_lee_owns_account], + ] + assert result["response"]["schema"] is None + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_nongraph_json(): + # If we have valid json that doesn't represent a graph, we don't expect to get nodes and edges, + # but we should at least have row data, allowing the tabular view to work. + result = graph_server.convert_graph_data( + { + "result": { + "0": json.dumps({"foo": 1, "bar": 2}), + } + } + ) + + assert len(result["response"]["nodes"]) == 0 + assert len(result["response"]["edges"]) == 0 + + assert result["response"]["query_result"] == {"result": [{"foo": 1, "bar": 2}]} + assert result["response"]["rows"] == [[{"foo": 1, "bar": 2}]] + assert result["response"]["schema"] is None + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_outer_key_not_string(): + result = graph_server.convert_graph_data( + { + 0: { + "0": json.dumps({"foo": 1, "bar": 2}), + } + } + ) + assert result == {"error": "Expected outer key to be str, got "} + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_outer_value_not_dict(): + result = graph_server.convert_graph_data({"result": 0}) + assert result == {"error": "Expected outer value to be dict, got "} + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_inner_key_not_string(): + result = graph_server.convert_graph_data( + { + "result": { + 0: json.dumps({"foo": 1, "bar": 2}), + } + } + ) + assert result == {"error": "Expected inner key to be str, got "} + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_inner_value_not_string(): + result = graph_server.convert_graph_data( + { + "result": { + "0": 1, + } + } + ) + assert result == {"error": "Expected inner value to be str, got "} + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_one_column_one_row_two_columns(): + result = graph_server.convert_graph_data( + { + "result1": { + "0": json.dumps(row_alex_owns_account), + }, + "result2": { + "0": json.dumps(row_alex_owns_account), + }, + } + ) + assert result == { + "error": "Query has multiple columns - graph visualization not supported" + } + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_empty_dict(): + result = graph_server.convert_graph_data({}) + assert result == { + "error": "query result with no columns is not supported for graph visualization" + } + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_wrong_row_index(): + result0 = graph_server.convert_graph_data( + { + "result": { + "0": json.dumps(row_alex_owns_account), + } + } + ) + + # Changing the index should not impact the result. + result1 = graph_server.convert_graph_data( + { + "result": { + "1": json.dumps(row_alex_owns_account), + } + } + ) + + assert result1 == result0 + + +class TestGraphServer(unittest.TestCase): + def setUp(self): + if graph_visualization is not None: # pragma: NO COVER + self.server_thread = graph_server.graph_server.init() + + def tearDown(self): + if graph_visualization is not None: # pragma: NO COVER + graph_server.graph_server.stop_server() # Stop the server after each test + self.server_thread.join() # Wait for the thread to finish + + @pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" + ) + def test_get_ping(self): + self.assertTrue(self.server_thread.is_alive()) + + route = graph_server.graph_server.build_route( + graph_server.GraphServer.endpoints["get_ping"] + ) + response = requests.get(route) + self.assertEqual(response.status_code, 200) + self.assertEqual(response.json(), {"message": "pong"}) + + @pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" + ) + def test_post_ping(self): + self.assertTrue(self.server_thread.is_alive()) + route = graph_server.graph_server.build_route( + graph_server.GraphServer.endpoints["post_ping"] + ) + response = requests.post(route, json={"data": "ping"}) + self.assertEqual(response.status_code, 200) + self.assertEqual(response.json(), {"your_request": {"data": "ping"}}) + + @pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" + ) + def test_post_query(self): + self.assertTrue(self.server_thread.is_alive()) + route = graph_server.graph_server.build_route( + graph_server.GraphServer.endpoints["post_query"] + ) + + data = { + "result": { + "0": json.dumps(row_alex_owns_account), + } + } + response = requests.post(route, json={"params": json.dumps(data)}) + self.assertEqual(response.status_code, 200) + response_data = response.json()["response"] + + self.assertEqual(len(response_data["nodes"]), 2) + self.assertEqual(len(response_data["edges"]), 1) + + _validate_nodes_and_edges(response.json()) + + self.assertEqual( + response_data["query_result"], {"result": [row_alex_owns_account]} + ) + self.assertEqual(response_data["rows"], [[row_alex_owns_account]]) + self.assertIsNone(response_data["schema"]) + + +def test_stop_server_never_started(): + graph_server.graph_server.stop_server()