From 6e79cd7460440e0cc007870d99173953b48e9a91 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Fri, 7 Feb 2025 15:08:42 -0800 Subject: [PATCH 01/44] Implement graph visualization with the --graph argument. --- bigquery_magics/bigquery.py | 77 ++++++++++- bigquery_magics/graph_server.py | 230 ++++++++++++++++++++++++++++++++ setup.py | 2 + tests/unit/test_bigquery.py | 226 +++++++++++++++++++++++++++++++ tests/unit/test_graph_server.py | 211 +++++++++++++++++++++++++++++ 5 files changed, 745 insertions(+), 1 deletion(-) create mode 100644 bigquery_magics/graph_server.py create mode 100644 tests/unit/test_graph_server.py diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index 5e99883..4a7339c 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -53,6 +53,8 @@ amount of time for the query to complete will not be cleared after the query is finished. By default, this information will be displayed but will be cleared after the query is finished. + * ``--graph`` (Optional[line argument]): + Visualizes the query result as a graph. * ``--params `` (Optional[line argument]): If present, the argument following the ``--params`` flag must be either: @@ -87,6 +89,8 @@ import ast from concurrent import futures import copy +import json +import pandas import re import sys import time @@ -95,6 +99,7 @@ import IPython # type: ignore from IPython import display # type: ignore +from IPython.core.display import HTML, JSON from IPython.core import magic_arguments # type: ignore from IPython.core.getipython import get_ipython from google.api_core import client_info @@ -110,6 +115,9 @@ import bigquery_magics.config import bigquery_magics.line_arg_parser.exceptions import bigquery_magics.version +from bigquery_magics.graph_server import GraphServer, convert_graph_data + +from threading import Thread try: from google.cloud import bigquery_storage # type: ignore @@ -371,6 +379,13 @@ def _create_dataset_if_necessary(client, dataset_id): "Defaults to engine set in the query setting in console." ), ) +@magic_arguments.argument( + "--graph", + action="store_true", + default=False, + help=("Visualizes the query results as a graph"), + +) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -405,7 +420,7 @@ def _cell_magic(line, query): def _parse_magic_args(line: str) -> Tuple[List[Any], Any]: # The built-in parser does not recognize Python structures such as dicts, thus - # we extract the "--params" option and inteprpret it separately. + # we extract the "--params" option and interpret it separately. try: params_option_value, rest_of_args = _split_args_line(line) @@ -566,6 +581,64 @@ def _handle_result(result, args): return result +def _is_colab() -> bool: + """Check if code is running in Google Colab""" + try: + import google.colab + return True + except ImportError: + return False + +def _colab_callback(query: str, params: str): + return JSON(convert_graph_data(query_results=json.loads(params))) + + +singleton_server_thread: Thread = None + +def _add_graph_widget(query_result): + try: + from spanner_graphs.graph_visualization import generate_visualization_html + except ImportError as err: + customized_error = ImportError( + "Use of --graph requires the spanner-graph package to be installed." + ) + raise customized_error from err + + # In Jupyter, create an http server to be invoked from the Javascript to populate the + # visualizer widget. In colab, we are not able to create an http server on a + # background thread, so we use a special colab-specific api to register a callback, + # to be invoked from Javascript. + if _is_colab(): + from google.colab import output + output.register_callback('graph_visualization.Query', _colab_callback) + else: + global singleton_server_thread + alive = singleton_server_thread and singleton_server_thread.is_alive() + if not alive: + singleton_server_thread = GraphServer.init() + + # Create html to invoke the graph server + html_content = generate_visualization_html( + query='dummy query', + port=GraphServer.port, + params=query_result.to_json().replace('\\', '\\\\').replace('"', '\\"') + ) + display.display(HTML(html_content)) + +def _is_valid_json(s: str): + try: + json.loads(s) + return True + except (json.JSONDecodeError, TypeError): + return False + +def _supports_graph_widget(query_result: pandas.DataFrame): + num_rows, num_columns = query_result.shape + if num_columns != 1: + return False + return query_result[query_result.columns[0]].apply(_is_valid_json).all() + + def _make_bq_query( query: str, args: Any, @@ -645,6 +718,8 @@ def _make_bq_query( progress_bar_type=progress_bar, ) + if args.graph and _supports_graph_widget(result): + _add_graph_widget(result) return _handle_result(result, args) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py new file mode 100644 index 0000000..b8d7ec9 --- /dev/null +++ b/bigquery_magics/graph_server.py @@ -0,0 +1,230 @@ +# Copyright 2024 Google LLC + +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# https://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import http.server +import socketserver +import json +import threading +import requests +import portpicker +from networkx.classes import DiGraph +from typing import List +import atexit + + +def convert_graph_data(query_results: dict[str, dict[str, str]]): + """ + Converts graph data to the form expected by the visualization framework. + + Receives graph data as a dictionary, produced by converting the underlying + DataFrame representing the query results into JSON, then into a + python dictionary. Converts it into a form expected by the visualization + framework. + + Args: + query_results: + A dictionary with one key/value pair per column. For each column: + - The key is the name of the column (str) + - The value is another dictionary with one key/value pair per row. + Row each row: + - The key is a string that specifies the integer index of the row + (e.g. '0', '1', '2') + - The value is a JSON string containing the result of the query + for the current row/column. (Note: We only support graph + visualization for columns of type JSON). + """ + # Delay spanner imports until this function is called to avoid making + # # spanner_graphs (and its dependencies) hard requirements for bigquery + # magics users, who don't need graph visualization. + # + # Note that these imports do not need to be in a try/except, as this function + # does not even get called unless spanner_graphs has already been confirmed + # to exist upstream. + from google.cloud.spanner_v1.types import StructType, TypeCode, Type + from spanner_graphs.conversion import prepare_data_for_graphing, columns_to_native_numpy + + try: + column_name = None + column_value = None + for key, value in query_results.items(): + if column_name == None: + if not isinstance(key, str): + raise ValueError(f'Expected key to be str, got {type(key)}') + if not isinstance(value, dict): + raise ValueError(f'Expected value to be dict, got {type(value)}') + column_name = key + column_value = value + else: + raise ValueError('Query has multiple columns - graph visualization not supported') + if column_name is None or column_value is None: + raise ValueError('Unable to get column name or value - how is this possible???') + + fields: List[StructType.Field] = [StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))] + data = {column_name : []} + rows = [] + for value_key, value_value in column_value.items(): + if not isinstance(value_key, str): + raise ValueError(f'Expected key to be str, got {type(key)}') + if not isinstance(value_value, str): + raise ValueError(f'Expected value to be str, got {type(value)}') + row_index = int(value_key) + row_json = json.loads(value_value) + + if row_index != len(data[column_name]): + raise ValueError(f'Unexpected row index; expected {len(data[column_name])}, got {row_index}') + data[column_name].append(row_json) + rows.append([row_json]) + + d, ignored_columns = columns_to_native_numpy(data, fields) + + graph: DiGraph = prepare_data_for_graphing( + incoming=d, + schema_json=None) + + nodes = [] + for (node_id, node) in graph.nodes(data=True): + nodes.append(node) + + edges = [] + for (from_id, to_id, edge) in graph.edges(data=True): + edges.append(edge) + + return { + "response": { + "nodes": nodes, + "edges": edges, + "schema": None, + "rows": rows, + "query_result": data + } + } + except Exception as e: + return { + "error": getattr(e, "message", str(e)) + } + +class GraphServer: + port = portpicker.pick_unused_port() + host = 'http://localhost' + url = f"{host}:{port}" + + endpoints = { + "get_ping": "/get_ping", + "post_ping": "/post_ping", + "post_query": "/post_query", + } + + _server = None + + @staticmethod + def build_route(endpoint): + return f"{GraphServer.url}{endpoint}" + + @staticmethod + def start_server(): + class ThreadedTCPServer(socketserver.TCPServer): + # Allow socket reuse to avoid "Address already in use" errors + allow_reuse_address = True + # Daemon threads automatically terminate when the main program exits + daemon_threads = True + + with ThreadedTCPServer(("", GraphServer.port), GraphServerHandler) as httpd: + GraphServer._server = httpd + GraphServer._server.serve_forever() + + @staticmethod + def init(): + server_thread = threading.Thread(target=GraphServer.start_server) + server_thread.start() + return server_thread + + @staticmethod + def stop_server(): + if GraphServer._server: + GraphServer._server.shutdown() + print("Spanner Graph Notebook shutting down...") + + @staticmethod + def get_ping(): + route = GraphServer.build_route(GraphServer.endpoints["get_ping"]) + response = requests.get(route) + + if response.status_code == 200: + return response.json() + else: + print(f"Request failed with status code {response.status_code}") + return False + + @staticmethod + def post_ping(data): + route = GraphServer.build_route(GraphServer.endpoints["post_ping"]) + response = requests.post(route, json=data) + + if response.status_code == 200: + return response.json() + else: + print(f"Request failed with status code {response.status_code}") + return False + +class GraphServerHandler(http.server.SimpleHTTPRequestHandler): + def log_message(self, format, *args): + pass + + def do_json_response(self, data): + self.send_response(200) + self.send_header("Access-Control-Allow-Origin", "*") + self.send_header('Content-type', 'application/json') + self.send_header("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS") + self.end_headers() + self.wfile.write(json.dumps(data).encode()) + + def do_message_response(self, message): + self.do_json_response({'message': message}) + + def do_data_response(self, data): + self.do_json_response(data) + + def parse_post_data(self): + content_length = int(self.headers['Content-Length']) + post_data = self.rfile.read(content_length).decode('utf-8') + return json.loads(post_data) + + def handle_get_ping(self): + self.do_message_response('pong') + + def handle_post_ping(self): + data = self.parse_post_data() + self.do_data_response({'your_request': data}) + + def handle_post_query(self): + data = self.parse_post_data() + response = convert_graph_data( + query_results=json.loads(data['params']) + ) + self.do_data_response(response) + + def do_GET(self): + if self.path == GraphServer.endpoints["get_ping"]: + self.handle_get_ping() + else: + super().do_GET() + + def do_POST(self): + if self.path == GraphServer.endpoints["post_ping"]: + self.handle_post_ping() + elif self.path == GraphServer.endpoints["post_query"]: + self.handle_post_query() + + +atexit.register(GraphServer.stop_server) \ No newline at end of file diff --git a/setup.py b/setup.py index 2b67aa6..f278b8d 100644 --- a/setup.py +++ b/setup.py @@ -33,8 +33,10 @@ "ipywidgets>=7.7.1", "ipython>=7.23.1", "ipykernel>=5.5.6", + "networkx", "packaging >= 20.0.0", "pandas>=1.1.0", + "portpicker", "pyarrow >= 3.0.0", "pydata-google-auth >=1.5.0", "tqdm >= 4.7.4, <5.0.0dev", diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 61ad744..254aee1 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -48,6 +48,11 @@ except ImportError: bpd = None +try: + import spanner_graphs.graph_visualization as graph_visualization +except ImportError: + spanner_graphs = None + def make_connection(*args): # TODO(tswast): Remove this in favor of a mock google.cloud.bigquery.Client @@ -456,6 +461,227 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): assert list(return_value) == list(result) # verify column names +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`" +) +def test_bigquery_graph_int_result(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + + sql = "SELECT 3 AS result" + result = pandas.DataFrame(['abc'], columns=["s"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: + run_query_mock.return_value = query_job_mock + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + # Since the query result is not valid JSON, the visualizer should not be displayed. + display_mock.assert_not_called() + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`" +) +def test_bigquery_graph_str_result(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + + sql = "SELECT 'abc' AS s" + result = pandas.DataFrame(['abc'], columns=["s"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: + run_query_mock.return_value = query_job_mock + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + # Since the query result is not valid JSON, the visualizer should not be displayed. + display_mock.assert_not_called() + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`" +) +def test_bigquery_graph_json_json_result(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + + sql = "SELECT graph_json, graph_json AS graph_json2 FROM t" + graph_json_rows = [""" + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI=","kind":"node","labels":["Person"],"properties":{"birthday":"1991-12-21T08:00:00Z","city":"Adelaide","country":"Australia","id":1,"name":"Alex"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==","kind":"edge","labels":["Owns"],"properties":{"account_id":7,"create_time":"2020-01-10T14:22:20.222Z","id":1},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-10T14:22:20.222Z","id":7,"is_blocked":false,"nick_name":"Vacation Fund"}}] + """,""" + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY=","kind":"node","labels":["Person"],"properties":{"birthday":"1986-12-07T08:00:00Z","city":"Kollam","country":"India","id":3,"name":"Lee"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==","kind":"edge","labels":["Owns"],"properties":{"account_id":16,"create_time":"2020-02-18T13:44:20.655Z","id":3},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-28T01:55:09.206Z","id":16,"is_blocked":true,"nick_name":"Vacation Fund"}}] + """, """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ=","kind":"node","labels":["Person"],"properties":{"birthday":"1980-10-31T08:00:00Z","city":"Moravia","country":"Czech_Republic","id":2,"name":"Dana"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEEkSiZRmluR3JhcGguUGVyc29uAHiRBJlGaW5HcmFwaC5BY2NvdW50AHiRKA==","kind":"edge","labels":["Owns"],"properties":{"account_id":20,"create_time":"2020-01-28T01:55:09.206Z","id":2},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","kind":"node","labels":["Account"],"properties":{"create_time":"2020-02-18T13:44:20.655Z","id":20,"is_blocked":false,"nick_name":"Rainy Day Fund"}}] + """] + result = pandas.DataFrame({'graph_json': graph_json_rows, + 'graph_json2': graph_json_rows}, columns=["graph_json", "graph_json2"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: + run_query_mock.return_value = query_job_mock + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + # As we only support visualization with single-column queries, the visualizer should not be launched. + display_mock.assert_not_called() + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`" +) +def test_bigquery_graph_json_result(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + + sql = "SELECT graph_json FROM t" + graph_json_rows = [""" + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI=","kind":"node","labels":["Person"],"properties":{"birthday":"1991-12-21T08:00:00Z","city":"Adelaide","country":"Australia","id":1,"name":"Alex"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==","kind":"edge","labels":["Owns"],"properties":{"account_id":7,"create_time":"2020-01-10T14:22:20.222Z","id":1},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-10T14:22:20.222Z","id":7,"is_blocked":false,"nick_name":"Vacation Fund"}}] + """,""" + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY=","kind":"node","labels":["Person"],"properties":{"birthday":"1986-12-07T08:00:00Z","city":"Kollam","country":"India","id":3,"name":"Lee"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==","kind":"edge","labels":["Owns"],"properties":{"account_id":16,"create_time":"2020-02-18T13:44:20.655Z","id":3},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-28T01:55:09.206Z","id":16,"is_blocked":true,"nick_name":"Vacation Fund"}}] + """, """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ=","kind":"node","labels":["Person"],"properties":{"birthday":"1980-10-31T08:00:00Z","city":"Moravia","country":"Czech_Republic","id":2,"name":"Dana"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEEkSiZRmluR3JhcGguUGVyc29uAHiRBJlGaW5HcmFwaC5BY2NvdW50AHiRKA==","kind":"edge","labels":["Owns"],"properties":{"account_id":20,"create_time":"2020-01-28T01:55:09.206Z","id":2},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","kind":"node","labels":["Account"],"properties":{"create_time":"2020-02-18T13:44:20.655Z","id":20,"is_blocked":false,"nick_name":"Rainy Day Fund"}}] + """] + result = pandas.DataFrame(graph_json_rows, columns=["graph_json"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + graph_server_init_patch = mock.patch("bigquery_magics.graph_server.GraphServer.init", autospec=True) + display_patch = mock.patch("IPython.display.display", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, bqstorage_client_patch, graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + run_query_mock.return_value = query_job_mock + graph_server_init_mock.return_value = None + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + assert len(display_mock.call_args_list) == 1 + assert len(display_mock.call_args_list[0]) == 2 + + # Sanity check that the HTML content looks like graph visualization. Minimal check + # to allow Spanner to change its implementation without breaking this test. + html_content = display_mock.call_args_list[0][0][0].data + assert '' in html_content + # Verify that the query results are embedded into the HTML, allowing them to be visualized. + # Due to escaping, it is not possible check for graph_json_rows exactly, so we check for a few + # sentinel strings within the query results, instead. + assert 'mUZpbkdyYXBoLlBlcnNvbgB4kQI=' in html_content # identifier in 1st row of query result + assert 'mUZpbkdyYXBoLlBlcnNvbgB4kQY=' in html_content # identifier in 2nd row of query result + assert 'mUZpbkdyYXBoLlBlcnNvbgB4kQQ=' in html_content # identifier in 3rd row of query result + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_default_connection_user_agent(): ip = IPython.get_ipython() diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py new file mode 100644 index 0000000..aaa7971 --- /dev/null +++ b/tests/unit/test_graph_server.py @@ -0,0 +1,211 @@ +# Copyright 2018 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from concurrent import futures +import contextlib +import copy +import json +import re +from unittest import mock +import warnings + +import IPython +import IPython.terminal.interactiveshell as interactiveshell +import IPython.testing.tools as tools +import IPython.utils.io as io +from google.api_core import exceptions +import google.auth.credentials +from google.cloud import bigquery +from google.cloud.bigquery import exceptions as bq_exceptions +from google.cloud.bigquery import job, table +import google.cloud.bigquery._http +import google.cloud.bigquery.exceptions +from google.cloud.bigquery.retry import DEFAULT_TIMEOUT +import pandas +import pytest +import test_utils.imports # google-cloud-testutils + +import bigquery_magics +import bigquery_magics.bigquery as magics + +try: + import spanner_graphs.graph_visualization as graph_visualization +except ImportError: + spanner_graphs = None + +from bigquery_magics.graph_server import convert_graph_data + +alex_properties = { + 'birthday': '1991-12-21T08:00:00Z', + 'id': 1, + 'city': 'Adelaide', + 'country': 'Australia', + 'name': 'Alex', +} + +alex_account_properties = { + 'create_time': '2020-01-10T14:22:20.222Z', + 'id': 7, + 'is_blocked': False, + 'nick_name': 'Vacation Fund' +} + +alex_owns_account_edge_properites = { + "account_id": 7, + "create_time": "2020-01-10T14:22:20.222Z", + "id": 1, +} + +row_alex_owns_account = [ + { + "identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQI=", + "kind": "node", + "labels": ["Person"], + "properties": alex_properties + }, + { + "destination_node_identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEO", + "identifier": "mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==", + "kind": "edge", + "labels": ["Owns"], + "properties": alex_owns_account_edge_properites, + "source_node_identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQI=", + }, + { + "identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEO", + "kind": "node", + "labels": ["Account"], + "properties": alex_account_properties, + }, +] + +lee_properties = { + "birthday": "1986-12-07T08:00:00Z", + "city": "Kollam", + "country": "India", + "id": 3, + "name": "Lee" +} + +lee_account_properties = { + "create_time": "2020-01-28T01:55:09.206Z", + "id": 16, + "is_blocked": True, + "nick_name": "Vacation Fund" +} + +lee_owns_account_edge_properties = { + "account_id": 16, + "create_time": "2020-02-18T13:44:20.655Z", + "id": 3 +} + +row_lee_owns_account = [ + { + "identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQY=", + "kind": "node", + "labels": [ + "Person" + ], + "properties": lee_properties + }, + { + "destination_node_identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEg", + "identifier": "mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==", + "kind": "edge", + "labels": [ + "Owns" + ], + "properties": lee_owns_account_edge_properties, + "source_node_identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQY=" + }, + { + "identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEg", + "kind": "node", + "labels": [ + "Account" + ], + "properties": lee_account_properties + } +] + +def _validate_nodes_and_edges(result): + for edge in result['response']['edges']: + assert 'id' in edge + assert edge['label'] == 'Owns' + assert 'source' in edge + assert 'target' in edge + assert 'properties' in edge + + for node in result['response']['nodes']: + assert 'id' in node + assert 'key_property_names' in node + assert node['label'] in ('Account', 'Person') + assert 'properties' in node + assert 'value' in node + +def test_convert_one_column_no_rows(): + result = convert_graph_data({'result': {}}) + assert result == { + 'response': { + 'edges': [], + 'nodes': [], + 'query_result': {'result': []}, + 'rows': [], + 'schema': None + } + } + +def test_convert_one_column_one_row(): + result = convert_graph_data({'result': { + '0': json.dumps(row_alex_owns_account), + }}) + + assert len(result['response']['nodes']) == 2 + assert len(result['response']['edges']) == 1 + + _validate_nodes_and_edges(result) + + assert result['response']['query_result'] == {'result': [row_alex_owns_account]} + assert result['response']['rows'] == [[row_alex_owns_account]] + assert result['response']['schema'] is None + +def test_convert_one_column_two_rows(): + result = convert_graph_data({'result': { + '0': json.dumps(row_alex_owns_account), + '1': json.dumps(row_lee_owns_account) + }}) + + assert len(result['response']['nodes']) == 4 + assert len(result['response']['edges']) == 2 + + _validate_nodes_and_edges(result) + + assert result['response']['query_result'] == {'result': [row_alex_owns_account, row_lee_owns_account]} + assert result['response']['rows'] == [[row_alex_owns_account], [row_lee_owns_account]] + assert result['response']['schema'] is None + +def test_convert_nongraph_json(): + # If we have valid json that doesn't represent a graph, we don't expect to get nodes and edges, + # but we should at least have row data, allowing the tabular view to work. + result = convert_graph_data({'result': { + '0': json.dumps({'foo': 1, 'bar': 2}), + }}) + + assert len(result['response']['nodes']) == 0 + assert len(result['response']['edges']) == 0 + + assert result['response']['query_result'] == {'result': [{'foo': 1, 'bar': 2}]} + assert result['response']['rows'] == [[{'foo': 1, 'bar': 2}]] + assert result['response']['schema'] is None From a3b71a4a1808343dcc3176309d853bbcbcd97a5d Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 19 Feb 2025 16:16:23 -0800 Subject: [PATCH 02/44] Skip graph server tests when spanner-graph-notebook is missing. Also, add a test to bigquery magic for when spanner-graph-notebook is missing. --- tests/unit/test_bigquery.py | 50 ++++++++++++++++++++++++++++++++- tests/unit/test_graph_server.py | 6 +++- 2 files changed, 54 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 254aee1..9d94207 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -51,7 +51,7 @@ try: import spanner_graphs.graph_visualization as graph_visualization except ImportError: - spanner_graphs = None + graph_visualization = None def make_connection(*args): @@ -461,6 +461,54 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): assert list(return_value) == list(result) # verify column names +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is not None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present" +) +def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + + sql = "SELECT 3 AS result" + result = pandas.DataFrame(['abc'], columns=["s"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: + run_query_mock.return_value = query_job_mock + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + # Since the query result is not valid JSON, the visualizer should not be displayed. + display_mock.assert_not_called() + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( graph_visualization is None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`" diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index aaa7971..d0ed213 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -42,7 +42,7 @@ try: import spanner_graphs.graph_visualization as graph_visualization except ImportError: - spanner_graphs = None + graph_visualization = None from bigquery_magics.graph_server import convert_graph_data @@ -155,6 +155,7 @@ def _validate_nodes_and_edges(result): assert 'properties' in node assert 'value' in node +@pytest.mark.skipif(graph_visualization is None, reason="Requires `spanner-graph-notebook`") def test_convert_one_column_no_rows(): result = convert_graph_data({'result': {}}) assert result == { @@ -167,6 +168,7 @@ def test_convert_one_column_no_rows(): } } +@pytest.mark.skipif(graph_visualization is None, reason="Requires `spanner-graph-notebook`") def test_convert_one_column_one_row(): result = convert_graph_data({'result': { '0': json.dumps(row_alex_owns_account), @@ -181,6 +183,7 @@ def test_convert_one_column_one_row(): assert result['response']['rows'] == [[row_alex_owns_account]] assert result['response']['schema'] is None +@pytest.mark.skipif(graph_visualization is None, reason="Requires `spanner-graph-notebook`") def test_convert_one_column_two_rows(): result = convert_graph_data({'result': { '0': json.dumps(row_alex_owns_account), @@ -196,6 +199,7 @@ def test_convert_one_column_two_rows(): assert result['response']['rows'] == [[row_alex_owns_account], [row_lee_owns_account]] assert result['response']['schema'] is None +@pytest.mark.skipif(graph_visualization is None, reason="Requires `spanner-graph-notebook`") def test_convert_nongraph_json(): # If we have valid json that doesn't represent a graph, we don't expect to get nodes and edges, # but we should at least have row data, allowing the tabular view to work. From 86505aa388bd1ace36a6d0fa4eb0d7dbae662977 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Fri, 21 Feb 2025 18:39:38 +0000 Subject: [PATCH 03/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigquery_magics/bigquery.py | 22 +-- bigquery_magics/graph_server.py | 82 +++++----- tests/unit/test_bigquery.py | 91 +++++++---- tests/unit/test_graph_server.py | 272 +++++++++++++++++--------------- 4 files changed, 268 insertions(+), 199 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index 4a7339c..20136ac 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -90,17 +90,17 @@ from concurrent import futures import copy import json -import pandas import re import sys +from threading import Thread import time from typing import Any, List, Tuple import warnings import IPython # type: ignore from IPython import display # type: ignore -from IPython.core.display import HTML, JSON from IPython.core import magic_arguments # type: ignore +from IPython.core.display import HTML, JSON from IPython.core.getipython import get_ipython from google.api_core import client_info from google.api_core.exceptions import NotFound @@ -109,15 +109,14 @@ from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery.dbapi import _helpers from google.cloud.bigquery.job import QueryJobConfig +import pandas from bigquery_magics import line_arg_parser as lap import bigquery_magics._versions_helpers import bigquery_magics.config +from bigquery_magics.graph_server import GraphServer, convert_graph_data import bigquery_magics.line_arg_parser.exceptions import bigquery_magics.version -from bigquery_magics.graph_server import GraphServer, convert_graph_data - -from threading import Thread try: from google.cloud import bigquery_storage # type: ignore @@ -384,7 +383,6 @@ def _create_dataset_if_necessary(client, dataset_id): action="store_true", default=False, help=("Visualizes the query results as a graph"), - ) def _cell_magic(line, query): """Underlying function for bigquery cell magic @@ -585,16 +583,19 @@ def _is_colab() -> bool: """Check if code is running in Google Colab""" try: import google.colab + return True except ImportError: return False + def _colab_callback(query: str, params: str): return JSON(convert_graph_data(query_results=json.loads(params))) singleton_server_thread: Thread = None + def _add_graph_widget(query_result): try: from spanner_graphs.graph_visualization import generate_visualization_html @@ -610,7 +611,8 @@ def _add_graph_widget(query_result): # to be invoked from Javascript. if _is_colab(): from google.colab import output - output.register_callback('graph_visualization.Query', _colab_callback) + + output.register_callback("graph_visualization.Query", _colab_callback) else: global singleton_server_thread alive = singleton_server_thread and singleton_server_thread.is_alive() @@ -619,12 +621,13 @@ def _add_graph_widget(query_result): # Create html to invoke the graph server html_content = generate_visualization_html( - query='dummy query', + query="dummy query", port=GraphServer.port, - params=query_result.to_json().replace('\\', '\\\\').replace('"', '\\"') + params=query_result.to_json().replace("\\", "\\\\").replace('"', '\\"'), ) display.display(HTML(html_content)) + def _is_valid_json(s: str): try: json.loads(s) @@ -632,6 +635,7 @@ def _is_valid_json(s: str): except (json.JSONDecodeError, TypeError): return False + def _supports_graph_widget(query_result: pandas.DataFrame): num_rows, num_columns = query_result.shape if num_columns != 1: diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index b8d7ec9..7cd9501 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -12,15 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import atexit import http.server -import socketserver import json +import socketserver import threading -import requests -import portpicker -from networkx.classes import DiGraph from typing import List -import atexit + +from networkx.classes import DiGraph +import portpicker +import requests def convert_graph_data(query_results: dict[str, dict[str, str]]): @@ -51,8 +52,11 @@ def convert_graph_data(query_results: dict[str, dict[str, str]]): # Note that these imports do not need to be in a try/except, as this function # does not even get called unless spanner_graphs has already been confirmed # to exist upstream. - from google.cloud.spanner_v1.types import StructType, TypeCode, Type - from spanner_graphs.conversion import prepare_data_for_graphing, columns_to_native_numpy + from google.cloud.spanner_v1.types import StructType, Type, TypeCode + from spanner_graphs.conversion import ( + columns_to_native_numpy, + prepare_data_for_graphing, + ) try: column_name = None @@ -60,44 +64,50 @@ def convert_graph_data(query_results: dict[str, dict[str, str]]): for key, value in query_results.items(): if column_name == None: if not isinstance(key, str): - raise ValueError(f'Expected key to be str, got {type(key)}') + raise ValueError(f"Expected key to be str, got {type(key)}") if not isinstance(value, dict): - raise ValueError(f'Expected value to be dict, got {type(value)}') + raise ValueError(f"Expected value to be dict, got {type(value)}") column_name = key column_value = value else: - raise ValueError('Query has multiple columns - graph visualization not supported') + raise ValueError( + "Query has multiple columns - graph visualization not supported" + ) if column_name is None or column_value is None: - raise ValueError('Unable to get column name or value - how is this possible???') - - fields: List[StructType.Field] = [StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))] - data = {column_name : []} + raise ValueError( + "Unable to get column name or value - how is this possible???" + ) + + fields: List[StructType.Field] = [ + StructType.Field(name=column_name, type=Type(code=TypeCode.JSON)) + ] + data = {column_name: []} rows = [] for value_key, value_value in column_value.items(): if not isinstance(value_key, str): - raise ValueError(f'Expected key to be str, got {type(key)}') + raise ValueError(f"Expected key to be str, got {type(key)}") if not isinstance(value_value, str): - raise ValueError(f'Expected value to be str, got {type(value)}') + raise ValueError(f"Expected value to be str, got {type(value)}") row_index = int(value_key) row_json = json.loads(value_value) if row_index != len(data[column_name]): - raise ValueError(f'Unexpected row index; expected {len(data[column_name])}, got {row_index}') + raise ValueError( + f"Unexpected row index; expected {len(data[column_name])}, got {row_index}" + ) data[column_name].append(row_json) rows.append([row_json]) d, ignored_columns = columns_to_native_numpy(data, fields) - graph: DiGraph = prepare_data_for_graphing( - incoming=d, - schema_json=None) + graph: DiGraph = prepare_data_for_graphing(incoming=d, schema_json=None) nodes = [] - for (node_id, node) in graph.nodes(data=True): + for node_id, node in graph.nodes(data=True): nodes.append(node) edges = [] - for (from_id, to_id, edge) in graph.edges(data=True): + for from_id, to_id, edge in graph.edges(data=True): edges.append(edge) return { @@ -106,17 +116,16 @@ def convert_graph_data(query_results: dict[str, dict[str, str]]): "edges": edges, "schema": None, "rows": rows, - "query_result": data + "query_result": data, } } except Exception as e: - return { - "error": getattr(e, "message", str(e)) - } + return {"error": getattr(e, "message", str(e))} + class GraphServer: port = portpicker.pick_unused_port() - host = 'http://localhost' + host = "http://localhost" url = f"{host}:{port}" endpoints = { @@ -177,6 +186,7 @@ def post_ping(data): print(f"Request failed with status code {response.status_code}") return False + class GraphServerHandler(http.server.SimpleHTTPRequestHandler): def log_message(self, format, *args): pass @@ -184,34 +194,32 @@ def log_message(self, format, *args): def do_json_response(self, data): self.send_response(200) self.send_header("Access-Control-Allow-Origin", "*") - self.send_header('Content-type', 'application/json') + self.send_header("Content-type", "application/json") self.send_header("Access-Control-Allow-Methods", "GET,PUT,POST,DELETE,OPTIONS") self.end_headers() self.wfile.write(json.dumps(data).encode()) def do_message_response(self, message): - self.do_json_response({'message': message}) + self.do_json_response({"message": message}) def do_data_response(self, data): self.do_json_response(data) def parse_post_data(self): - content_length = int(self.headers['Content-Length']) - post_data = self.rfile.read(content_length).decode('utf-8') + content_length = int(self.headers["Content-Length"]) + post_data = self.rfile.read(content_length).decode("utf-8") return json.loads(post_data) def handle_get_ping(self): - self.do_message_response('pong') + self.do_message_response("pong") def handle_post_ping(self): data = self.parse_post_data() - self.do_data_response({'your_request': data}) + self.do_data_response({"your_request": data}) def handle_post_query(self): data = self.parse_post_data() - response = convert_graph_data( - query_results=json.loads(data['params']) - ) + response = convert_graph_data(query_results=json.loads(data["params"])) self.do_data_response(response) def do_GET(self): @@ -227,4 +235,4 @@ def do_POST(self): self.handle_post_query() -atexit.register(GraphServer.stop_server) \ No newline at end of file +atexit.register(GraphServer.stop_server) diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 9d94207..6f78f14 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -463,7 +463,8 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - graph_visualization is not None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present" + graph_visualization is not None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present", ) def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch): ip = IPython.get_ipython() @@ -489,14 +490,16 @@ def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch): display_patch = mock.patch("IPython.display.display", autospec=True) sql = "SELECT 3 AS result" - result = pandas.DataFrame(['abc'], columns=["s"]) + result = pandas.DataFrame(["abc"], columns=["s"]) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), display_patch as display_mock: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--graph", sql) @@ -511,7 +514,8 @@ def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - graph_visualization is None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`" + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_int_result(monkeypatch): ip = IPython.get_ipython() @@ -537,14 +541,16 @@ def test_bigquery_graph_int_result(monkeypatch): display_patch = mock.patch("IPython.display.display", autospec=True) sql = "SELECT 3 AS result" - result = pandas.DataFrame(['abc'], columns=["s"]) + result = pandas.DataFrame(["abc"], columns=["s"]) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), display_patch as display_mock: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--graph", sql) @@ -559,7 +565,8 @@ def test_bigquery_graph_int_result(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - graph_visualization is None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`" + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_str_result(monkeypatch): ip = IPython.get_ipython() @@ -585,14 +592,16 @@ def test_bigquery_graph_str_result(monkeypatch): display_patch = mock.patch("IPython.display.display", autospec=True) sql = "SELECT 'abc' AS s" - result = pandas.DataFrame(['abc'], columns=["s"]) + result = pandas.DataFrame(["abc"], columns=["s"]) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), display_patch as display_mock: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--graph", sql) @@ -607,7 +616,8 @@ def test_bigquery_graph_str_result(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - graph_visualization is None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`" + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_json_json_result(monkeypatch): ip = IPython.get_ipython() @@ -633,22 +643,30 @@ def test_bigquery_graph_json_json_result(monkeypatch): display_patch = mock.patch("IPython.display.display", autospec=True) sql = "SELECT graph_json, graph_json AS graph_json2 FROM t" - graph_json_rows = [""" + graph_json_rows = [ + """ [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI=","kind":"node","labels":["Person"],"properties":{"birthday":"1991-12-21T08:00:00Z","city":"Adelaide","country":"Australia","id":1,"name":"Alex"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==","kind":"edge","labels":["Owns"],"properties":{"account_id":7,"create_time":"2020-01-10T14:22:20.222Z","id":1},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-10T14:22:20.222Z","id":7,"is_blocked":false,"nick_name":"Vacation Fund"}}] - """,""" + """, + """ [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY=","kind":"node","labels":["Person"],"properties":{"birthday":"1986-12-07T08:00:00Z","city":"Kollam","country":"India","id":3,"name":"Lee"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==","kind":"edge","labels":["Owns"],"properties":{"account_id":16,"create_time":"2020-02-18T13:44:20.655Z","id":3},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-28T01:55:09.206Z","id":16,"is_blocked":true,"nick_name":"Vacation Fund"}}] - """, """ + """, + """ [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ=","kind":"node","labels":["Person"],"properties":{"birthday":"1980-10-31T08:00:00Z","city":"Moravia","country":"Czech_Republic","id":2,"name":"Dana"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEEkSiZRmluR3JhcGguUGVyc29uAHiRBJlGaW5HcmFwaC5BY2NvdW50AHiRKA==","kind":"edge","labels":["Owns"],"properties":{"account_id":20,"create_time":"2020-01-28T01:55:09.206Z","id":2},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","kind":"node","labels":["Account"],"properties":{"create_time":"2020-02-18T13:44:20.655Z","id":20,"is_blocked":false,"nick_name":"Rainy Day Fund"}}] - """] - result = pandas.DataFrame({'graph_json': graph_json_rows, - 'graph_json2': graph_json_rows}, columns=["graph_json", "graph_json2"]) + """, + ] + result = pandas.DataFrame( + {"graph_json": graph_json_rows, "graph_json2": graph_json_rows}, + columns=["graph_json", "graph_json2"], + ) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch, display_patch as display_mock: + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), display_patch as display_mock: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--graph", sql) @@ -663,7 +681,8 @@ def test_bigquery_graph_json_json_result(monkeypatch): @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( - graph_visualization is None or bigquery_storage is None, reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`" + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_json_result(monkeypatch): ip = IPython.get_ipython() @@ -688,23 +707,31 @@ def test_bigquery_graph_json_result(monkeypatch): ) sql = "SELECT graph_json FROM t" - graph_json_rows = [""" + graph_json_rows = [ + """ [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI=","kind":"node","labels":["Person"],"properties":{"birthday":"1991-12-21T08:00:00Z","city":"Adelaide","country":"Australia","id":1,"name":"Alex"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==","kind":"edge","labels":["Owns"],"properties":{"account_id":7,"create_time":"2020-01-10T14:22:20.222Z","id":1},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-10T14:22:20.222Z","id":7,"is_blocked":false,"nick_name":"Vacation Fund"}}] - """,""" + """, + """ [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY=","kind":"node","labels":["Person"],"properties":{"birthday":"1986-12-07T08:00:00Z","city":"Kollam","country":"India","id":3,"name":"Lee"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==","kind":"edge","labels":["Owns"],"properties":{"account_id":16,"create_time":"2020-02-18T13:44:20.655Z","id":3},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-28T01:55:09.206Z","id":16,"is_blocked":true,"nick_name":"Vacation Fund"}}] - """, """ + """, + """ [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ=","kind":"node","labels":["Person"],"properties":{"birthday":"1980-10-31T08:00:00Z","city":"Moravia","country":"Czech_Republic","id":2,"name":"Dana"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEEkSiZRmluR3JhcGguUGVyc29uAHiRBJlGaW5HcmFwaC5BY2NvdW50AHiRKA==","kind":"edge","labels":["Owns"],"properties":{"account_id":20,"create_time":"2020-01-28T01:55:09.206Z","id":2},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","kind":"node","labels":["Account"],"properties":{"create_time":"2020-02-18T13:44:20.655Z","id":20,"is_blocked":false,"nick_name":"Rainy Day Fund"}}] - """] + """, + ] result = pandas.DataFrame(graph_json_rows, columns=["graph_json"]) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) - graph_server_init_patch = mock.patch("bigquery_magics.graph_server.GraphServer.init", autospec=True) + graph_server_init_patch = mock.patch( + "bigquery_magics.graph_server.GraphServer.init", autospec=True + ) display_patch = mock.patch("IPython.display.display", autospec=True) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True ) query_job_mock.to_dataframe.return_value = result - with run_query_patch as run_query_mock, bqstorage_client_patch, graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: run_query_mock.return_value = query_job_mock graph_server_init_mock.return_value = None return_value = ip.run_cell_magic("bigquery", "--graph", sql) @@ -715,14 +742,20 @@ def test_bigquery_graph_json_result(monkeypatch): # Sanity check that the HTML content looks like graph visualization. Minimal check # to allow Spanner to change its implementation without breaking this test. html_content = display_mock.call_args_list[0][0][0].data - assert '' in html_content + assert "" in html_content # Verify that the query results are embedded into the HTML, allowing them to be visualized. # Due to escaping, it is not possible check for graph_json_rows exactly, so we check for a few # sentinel strings within the query results, instead. - assert 'mUZpbkdyYXBoLlBlcnNvbgB4kQI=' in html_content # identifier in 1st row of query result - assert 'mUZpbkdyYXBoLlBlcnNvbgB4kQY=' in html_content # identifier in 2nd row of query result - assert 'mUZpbkdyYXBoLlBlcnNvbgB4kQQ=' in html_content # identifier in 3rd row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQI=" in html_content + ) # identifier in 1st row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQY=" in html_content + ) # identifier in 2nd row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQQ=" in html_content + ) # identifier in 3rd row of query result assert bqstorage_mock.called # BQ storage client was used assert isinstance(return_value, pandas.DataFrame) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index d0ed213..c5a7fc9 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -47,169 +47,193 @@ from bigquery_magics.graph_server import convert_graph_data alex_properties = { - 'birthday': '1991-12-21T08:00:00Z', - 'id': 1, - 'city': 'Adelaide', - 'country': 'Australia', - 'name': 'Alex', + "birthday": "1991-12-21T08:00:00Z", + "id": 1, + "city": "Adelaide", + "country": "Australia", + "name": "Alex", } alex_account_properties = { - 'create_time': '2020-01-10T14:22:20.222Z', - 'id': 7, - 'is_blocked': False, - 'nick_name': 'Vacation Fund' + "create_time": "2020-01-10T14:22:20.222Z", + "id": 7, + "is_blocked": False, + "nick_name": "Vacation Fund", } alex_owns_account_edge_properites = { - "account_id": 7, - "create_time": "2020-01-10T14:22:20.222Z", - "id": 1, + "account_id": 7, + "create_time": "2020-01-10T14:22:20.222Z", + "id": 1, } row_alex_owns_account = [ - { - "identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQI=", - "kind": "node", - "labels": ["Person"], - "properties": alex_properties - }, - { - "destination_node_identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEO", - "identifier": "mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==", - "kind": "edge", - "labels": ["Owns"], - "properties": alex_owns_account_edge_properites, - "source_node_identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQI=", - }, - { - "identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEO", - "kind": "node", - "labels": ["Account"], - "properties": alex_account_properties, - }, + { + "identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQI=", + "kind": "node", + "labels": ["Person"], + "properties": alex_properties, + }, + { + "destination_node_identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEO", + "identifier": "mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==", + "kind": "edge", + "labels": ["Owns"], + "properties": alex_owns_account_edge_properites, + "source_node_identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQI=", + }, + { + "identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEO", + "kind": "node", + "labels": ["Account"], + "properties": alex_account_properties, + }, ] lee_properties = { - "birthday": "1986-12-07T08:00:00Z", - "city": "Kollam", - "country": "India", - "id": 3, - "name": "Lee" + "birthday": "1986-12-07T08:00:00Z", + "city": "Kollam", + "country": "India", + "id": 3, + "name": "Lee", } lee_account_properties = { - "create_time": "2020-01-28T01:55:09.206Z", - "id": 16, - "is_blocked": True, - "nick_name": "Vacation Fund" + "create_time": "2020-01-28T01:55:09.206Z", + "id": 16, + "is_blocked": True, + "nick_name": "Vacation Fund", } lee_owns_account_edge_properties = { - "account_id": 16, - "create_time": "2020-02-18T13:44:20.655Z", - "id": 3 + "account_id": 16, + "create_time": "2020-02-18T13:44:20.655Z", + "id": 3, } row_lee_owns_account = [ - { - "identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQY=", - "kind": "node", - "labels": [ - "Person" - ], - "properties": lee_properties - }, - { - "destination_node_identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEg", - "identifier": "mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==", - "kind": "edge", - "labels": [ - "Owns" - ], - "properties": lee_owns_account_edge_properties, - "source_node_identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQY=" - }, - { - "identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEg", - "kind": "node", - "labels": [ - "Account" - ], - "properties": lee_account_properties - } -] + { + "identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQY=", + "kind": "node", + "labels": ["Person"], + "properties": lee_properties, + }, + { + "destination_node_identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEg", + "identifier": "mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==", + "kind": "edge", + "labels": ["Owns"], + "properties": lee_owns_account_edge_properties, + "source_node_identifier": "mUZpbkdyYXBoLlBlcnNvbgB4kQY=", + }, + { + "identifier": "mUZpbkdyYXBoLkFjY291bnQAeJEg", + "kind": "node", + "labels": ["Account"], + "properties": lee_account_properties, + }, +] + def _validate_nodes_and_edges(result): - for edge in result['response']['edges']: - assert 'id' in edge - assert edge['label'] == 'Owns' - assert 'source' in edge - assert 'target' in edge - assert 'properties' in edge - - for node in result['response']['nodes']: - assert 'id' in node - assert 'key_property_names' in node - assert node['label'] in ('Account', 'Person') - assert 'properties' in node - assert 'value' in node - -@pytest.mark.skipif(graph_visualization is None, reason="Requires `spanner-graph-notebook`") + for edge in result["response"]["edges"]: + assert "id" in edge + assert edge["label"] == "Owns" + assert "source" in edge + assert "target" in edge + assert "properties" in edge + + for node in result["response"]["nodes"]: + assert "id" in node + assert "key_property_names" in node + assert node["label"] in ("Account", "Person") + assert "properties" in node + assert "value" in node + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) def test_convert_one_column_no_rows(): - result = convert_graph_data({'result': {}}) + result = convert_graph_data({"result": {}}) assert result == { - 'response': { - 'edges': [], - 'nodes': [], - 'query_result': {'result': []}, - 'rows': [], - 'schema': None + "response": { + "edges": [], + "nodes": [], + "query_result": {"result": []}, + "rows": [], + "schema": None, } } -@pytest.mark.skipif(graph_visualization is None, reason="Requires `spanner-graph-notebook`") + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) def test_convert_one_column_one_row(): - result = convert_graph_data({'result': { - '0': json.dumps(row_alex_owns_account), - }}) + result = convert_graph_data( + { + "result": { + "0": json.dumps(row_alex_owns_account), + } + } + ) - assert len(result['response']['nodes']) == 2 - assert len(result['response']['edges']) == 1 + assert len(result["response"]["nodes"]) == 2 + assert len(result["response"]["edges"]) == 1 _validate_nodes_and_edges(result) - assert result['response']['query_result'] == {'result': [row_alex_owns_account]} - assert result['response']['rows'] == [[row_alex_owns_account]] - assert result['response']['schema'] is None + assert result["response"]["query_result"] == {"result": [row_alex_owns_account]} + assert result["response"]["rows"] == [[row_alex_owns_account]] + assert result["response"]["schema"] is None + -@pytest.mark.skipif(graph_visualization is None, reason="Requires `spanner-graph-notebook`") +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) def test_convert_one_column_two_rows(): - result = convert_graph_data({'result': { - '0': json.dumps(row_alex_owns_account), - '1': json.dumps(row_lee_owns_account) - }}) - - assert len(result['response']['nodes']) == 4 - assert len(result['response']['edges']) == 2 + result = convert_graph_data( + { + "result": { + "0": json.dumps(row_alex_owns_account), + "1": json.dumps(row_lee_owns_account), + } + } + ) + + assert len(result["response"]["nodes"]) == 4 + assert len(result["response"]["edges"]) == 2 _validate_nodes_and_edges(result) - assert result['response']['query_result'] == {'result': [row_alex_owns_account, row_lee_owns_account]} - assert result['response']['rows'] == [[row_alex_owns_account], [row_lee_owns_account]] - assert result['response']['schema'] is None + assert result["response"]["query_result"] == { + "result": [row_alex_owns_account, row_lee_owns_account] + } + assert result["response"]["rows"] == [ + [row_alex_owns_account], + [row_lee_owns_account], + ] + assert result["response"]["schema"] is None + -@pytest.mark.skipif(graph_visualization is None, reason="Requires `spanner-graph-notebook`") +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) def test_convert_nongraph_json(): # If we have valid json that doesn't represent a graph, we don't expect to get nodes and edges, # but we should at least have row data, allowing the tabular view to work. - result = convert_graph_data({'result': { - '0': json.dumps({'foo': 1, 'bar': 2}), - }}) - - assert len(result['response']['nodes']) == 0 - assert len(result['response']['edges']) == 0 - - assert result['response']['query_result'] == {'result': [{'foo': 1, 'bar': 2}]} - assert result['response']['rows'] == [[{'foo': 1, 'bar': 2}]] - assert result['response']['schema'] is None + result = convert_graph_data( + { + "result": { + "0": json.dumps({"foo": 1, "bar": 2}), + } + } + ) + + assert len(result["response"]["nodes"]) == 0 + assert len(result["response"]["edges"]) == 0 + + assert result["response"]["query_result"] == {"result": [{"foo": 1, "bar": 2}]} + assert result["response"]["rows"] == [[{"foo": 1, "bar": 2}]] + assert result["response"]["schema"] is None From cfc1badafd2b1a9a80652fb27bbe363a2aa38476 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 25 Feb 2025 14:14:58 -0800 Subject: [PATCH 04/44] Fix lint errors and unit tests under nox. --- bigquery_magics/bigquery.py | 2 +- bigquery_magics/graph_server.py | 6 +++--- tests/unit/test_graph_server.py | 24 ------------------------ 3 files changed, 4 insertions(+), 28 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index b689af9..d50b7e3 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -601,7 +601,7 @@ def _handle_result(result, args): def _is_colab() -> bool: """Check if code is running in Google Colab""" try: - import google.colab + import google.colab # noqa: F401 return True except ImportError: diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 7cd9501..af2521f 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -17,14 +17,14 @@ import json import socketserver import threading -from typing import List +from typing import Dict, List from networkx.classes import DiGraph import portpicker import requests -def convert_graph_data(query_results: dict[str, dict[str, str]]): +def convert_graph_data(query_results: Dict[str, Dict[str, str]]): """ Converts graph data to the form expected by the visualization framework. @@ -62,7 +62,7 @@ def convert_graph_data(query_results: dict[str, dict[str, str]]): column_name = None column_value = None for key, value in query_results.items(): - if column_name == None: + if column_name is None: if not isinstance(key, str): raise ValueError(f"Expected key to be str, got {type(key)}") if not isinstance(value, dict): diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index c5a7fc9..cd25b2f 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -12,32 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. -from concurrent import futures -import contextlib -import copy import json -import re -from unittest import mock -import warnings - -import IPython -import IPython.terminal.interactiveshell as interactiveshell -import IPython.testing.tools as tools -import IPython.utils.io as io -from google.api_core import exceptions -import google.auth.credentials -from google.cloud import bigquery -from google.cloud.bigquery import exceptions as bq_exceptions -from google.cloud.bigquery import job, table -import google.cloud.bigquery._http -import google.cloud.bigquery.exceptions -from google.cloud.bigquery.retry import DEFAULT_TIMEOUT -import pandas import pytest -import test_utils.imports # google-cloud-testutils - -import bigquery_magics -import bigquery_magics.bigquery as magics try: import spanner_graphs.graph_visualization as graph_visualization From bb05c5c8f497a53ef8fab51ae17a141b4a5d151d Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Tue, 25 Feb 2025 22:18:14 +0000 Subject: [PATCH 05/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- tests/unit/test_graph_server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index cd25b2f..32ef52d 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -13,6 +13,7 @@ # limitations under the License. import json + import pytest try: From f37593dd512647c70081e291b50f6365d8be5766 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 25 Feb 2025 15:40:12 -0800 Subject: [PATCH 06/44] Add spanner_graphs to owlbot. Also, fix typo in package name in error message. --- bigquery_magics/bigquery.py | 2 +- owlbot.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index d50b7e3..189cf76 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -620,7 +620,7 @@ def _add_graph_widget(query_result): from spanner_graphs.graph_visualization import generate_visualization_html except ImportError as err: customized_error = ImportError( - "Use of --graph requires the spanner-graph package to be installed." + "Use of --graph requires the spanner_graphs package to be installed." ) raise customized_error from err diff --git a/owlbot.py b/owlbot.py index bbbc590..72a1407 100644 --- a/owlbot.py +++ b/owlbot.py @@ -30,9 +30,10 @@ extras_storage = ["bqstorage"] extras_bf = ["bqstorage", "bigframes", "geopandas"] +extras_spanner = ["spanner_graphs"] extras_by_python = { "3.7": extras_storage, - "3.8": extras_storage, + "3.8": [extras_storage, extras_spanner], "3.9": extras_bf, "3.10": extras_bf, # Use a middle version of Python to test when no extras are installed. From 258e894e11587d8399d4f49549f1b3cfed933e9e Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Tue, 25 Feb 2025 23:56:52 +0000 Subject: [PATCH 07/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- noxfile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 7c02269..9afd6ac 100644 --- a/noxfile.py +++ b/noxfile.py @@ -53,7 +53,8 @@ "bqstorage", ], "3.8": [ - "bqstorage", + "['bqstorage']", + "['spanner_graphs']", ], "3.9": [ "bqstorage", @@ -89,7 +90,8 @@ "bqstorage", ], "3.8": [ - "bqstorage", + "['bqstorage']", + "['spanner_graphs']", ], "3.9": [ "bqstorage", From 8d18cf476b2524b64f44f146fb0a7c9afabf4f85 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 25 Feb 2025 15:58:53 -0800 Subject: [PATCH 08/44] Fix owlbot entry --- owlbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/owlbot.py b/owlbot.py index 72a1407..acf6e97 100644 --- a/owlbot.py +++ b/owlbot.py @@ -33,7 +33,7 @@ extras_spanner = ["spanner_graphs"] extras_by_python = { "3.7": extras_storage, - "3.8": [extras_storage, extras_spanner], + "3.8": extras_storage + extras_spanner, "3.9": extras_bf, "3.10": extras_bf, # Use a middle version of Python to test when no extras are installed. From 92f668980cc8dbc0b1497f2f7d17d77bdadcfa7e Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 26 Feb 2025 00:01:11 +0000 Subject: [PATCH 09/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- noxfile.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/noxfile.py b/noxfile.py index 9afd6ac..a535a03 100644 --- a/noxfile.py +++ b/noxfile.py @@ -53,8 +53,8 @@ "bqstorage", ], "3.8": [ - "['bqstorage']", - "['spanner_graphs']", + "bqstorage", + "spanner_graphs", ], "3.9": [ "bqstorage", @@ -90,8 +90,8 @@ "bqstorage", ], "3.8": [ - "['bqstorage']", - "['spanner_graphs']", + "bqstorage", + "spanner_graphs", ], "3.9": [ "bqstorage", From 75386d56b3256d397df9d4b42e3c74a4d30441a3 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 25 Feb 2025 16:52:48 -0800 Subject: [PATCH 10/44] Add spanner_graphs as optional dependency with minimum version 1.1.1. This is required for graph visualization. --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 2fdcd8b..582d442 100644 --- a/setup.py +++ b/setup.py @@ -58,6 +58,7 @@ ], "bigframes": ["bigframes >= 1.17.0"], "geopandas": ["geopandas >= 1.0.1"], + "spanner_graphs": ["spanner_graphs >= 1.1.1"], } all_extras = [] From 9f7f1c727c2149a89219ca9be4b9e7cc61f14a0d Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Thu, 27 Feb 2025 14:26:43 -0800 Subject: [PATCH 11/44] Change owlbot so spanner-graph-notebook is added to the config using python runtime 3.12 instead of 3.8, as spanner-graph-notebook does not support runtime version 3.8. --- owlbot.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/owlbot.py b/owlbot.py index acf6e97..618ed67 100644 --- a/owlbot.py +++ b/owlbot.py @@ -33,12 +33,12 @@ extras_spanner = ["spanner_graphs"] extras_by_python = { "3.7": extras_storage, - "3.8": extras_storage + extras_spanner, + "3.8": extras_storage, "3.9": extras_bf, "3.10": extras_bf, # Use a middle version of Python to test when no extras are installed. "3.11": [], - "3.12": [], + "3.12": [extras_storage + extras_spanner], "3.13": extras_bf, } templated_files = common.py_library( From 5f27b29e3947c5c855a7d03b95551cfebf139c8a Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 27 Feb 2025 22:29:33 +0000 Subject: [PATCH 12/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- noxfile.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/noxfile.py b/noxfile.py index a535a03..682c55f 100644 --- a/noxfile.py +++ b/noxfile.py @@ -54,7 +54,6 @@ ], "3.8": [ "bqstorage", - "spanner_graphs", ], "3.9": [ "bqstorage", @@ -67,7 +66,9 @@ "geopandas", ], "3.11": [], - "3.12": [], + "3.12": [ + "['bqstorage', 'spanner_graphs']", + ], "3.13": [ "bqstorage", "bigframes", @@ -91,7 +92,6 @@ ], "3.8": [ "bqstorage", - "spanner_graphs", ], "3.9": [ "bqstorage", @@ -104,7 +104,9 @@ "geopandas", ], "3.11": [], - "3.12": [], + "3.12": [ + "['bqstorage', 'spanner_graphs']", + ], "3.13": [ "bqstorage", "bigframes", From f05be37ab4b7875f03bb5dccb6fe2707cc996698 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Thu, 27 Feb 2025 14:31:15 -0800 Subject: [PATCH 13/44] Fix typo --- owlbot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/owlbot.py b/owlbot.py index 618ed67..0585a09 100644 --- a/owlbot.py +++ b/owlbot.py @@ -38,7 +38,7 @@ "3.10": extras_bf, # Use a middle version of Python to test when no extras are installed. "3.11": [], - "3.12": [extras_storage + extras_spanner], + "3.12": extras_storage + extras_spanner, "3.13": extras_bf, } templated_files = common.py_library( From f6058edb2c55cc958b48e9d49bb0cabd32a8ec0a Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 27 Feb 2025 22:33:23 +0000 Subject: [PATCH 14/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- noxfile.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 682c55f..31e0ddc 100644 --- a/noxfile.py +++ b/noxfile.py @@ -67,7 +67,8 @@ ], "3.11": [], "3.12": [ - "['bqstorage', 'spanner_graphs']", + "bqstorage", + "spanner_graphs", ], "3.13": [ "bqstorage", @@ -105,7 +106,8 @@ ], "3.11": [], "3.12": [ - "['bqstorage', 'spanner_graphs']", + "bqstorage", + "spanner_graphs", ], "3.13": [ "bqstorage", From b6b98c04334cc8d8b1a6aeae5aa2043cdf19f8d0 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Thu, 27 Feb 2025 14:44:43 -0800 Subject: [PATCH 15/44] Fix dependent package name: spanner_graphs -> spanner-graph-notebook --- owlbot.py | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/owlbot.py b/owlbot.py index 0585a09..4347527 100644 --- a/owlbot.py +++ b/owlbot.py @@ -30,7 +30,7 @@ extras_storage = ["bqstorage"] extras_bf = ["bqstorage", "bigframes", "geopandas"] -extras_spanner = ["spanner_graphs"] +extras_spanner = ["spanner-graph-notebook"] extras_by_python = { "3.7": extras_storage, "3.8": extras_storage, diff --git a/setup.py b/setup.py index 582d442..16c2297 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,7 @@ ], "bigframes": ["bigframes >= 1.17.0"], "geopandas": ["geopandas >= 1.0.1"], - "spanner_graphs": ["spanner_graphs >= 1.1.1"], + "spanner-graph-notebook": ["spanner-graph-notebook >= 1.1.1"], } all_extras = [] From bf82c6094aa33d0529979303407556d0c27ba150 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 27 Feb 2025 22:47:33 +0000 Subject: [PATCH 16/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- noxfile.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/noxfile.py b/noxfile.py index 31e0ddc..562dd6b 100644 --- a/noxfile.py +++ b/noxfile.py @@ -68,7 +68,7 @@ "3.11": [], "3.12": [ "bqstorage", - "spanner_graphs", + "spanner-graph-notebook", ], "3.13": [ "bqstorage", @@ -107,7 +107,7 @@ "3.11": [], "3.12": [ "bqstorage", - "spanner_graphs", + "spanner-graph-notebook", ], "3.13": [ "bqstorage", From 858e72b668da5a5b9ebce91c6e514101925568fc Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Thu, 27 Feb 2025 15:06:57 -0800 Subject: [PATCH 17/44] Add unit test coverage for the GraphServer object. --- bigquery_magics/graph_server.py | 2 +- tests/unit/test_graph_server.py | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index af2521f..184eafb 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -162,7 +162,7 @@ def init(): def stop_server(): if GraphServer._server: GraphServer._server.shutdown() - print("Spanner Graph Notebook shutting down...") + print("BigQuery-magics graph server shutting down...") @staticmethod def get_ping(): diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 32ef52d..c1b97ee 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -13,15 +13,15 @@ # limitations under the License. import json - import pytest +import unittest try: import spanner_graphs.graph_visualization as graph_visualization except ImportError: graph_visualization = None -from bigquery_magics.graph_server import convert_graph_data +from bigquery_magics.graph_server import convert_graph_data, GraphServer alex_properties = { "birthday": "1991-12-21T08:00:00Z", @@ -214,3 +214,22 @@ def test_convert_nongraph_json(): assert result["response"]["query_result"] == {"result": [{"foo": 1, "bar": 2}]} assert result["response"]["rows"] == [[{"foo": 1, "bar": 2}]] assert result["response"]["schema"] is None + + +class TestGraphServer(unittest.TestCase): + def setUp(self): + self.server_thread = GraphServer.init() + + def tearDown(self): + GraphServer.stop_server() # Stop the server after each test + self.server_thread.join() # Wait for the thread to finish + + def test_ping(self): + self.assertTrue(self.server_thread.is_alive()) + + response = GraphServer.get_ping() + self.assertEqual(response, {"message": "pong"}) + + request = {"data": "ping"} + response = GraphServer.post_ping(request) + self.assertEqual(response, {"your_request": request}) From 942f7c149cc9b537d95dcb029fd36c6cfb2798b7 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Thu, 27 Feb 2025 23:08:54 +0000 Subject: [PATCH 18/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- tests/unit/test_graph_server.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index c1b97ee..ae33082 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -13,15 +13,16 @@ # limitations under the License. import json -import pytest import unittest +import pytest + try: import spanner_graphs.graph_visualization as graph_visualization except ImportError: graph_visualization = None -from bigquery_magics.graph_server import convert_graph_data, GraphServer +from bigquery_magics.graph_server import GraphServer, convert_graph_data alex_properties = { "birthday": "1991-12-21T08:00:00Z", From 6f9ddceec451639a3c826c2183ec7622f4e1f533 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Fri, 28 Feb 2025 10:49:44 -0800 Subject: [PATCH 19/44] Add more unit tests for convert_graph_data() to boost code coverage. --- bigquery_magics/graph_server.py | 11 ++--- tests/unit/test_graph_server.py | 78 ++++++++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 6 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 184eafb..d9cb1e8 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -64,18 +64,19 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): for key, value in query_results.items(): if column_name is None: if not isinstance(key, str): - raise ValueError(f"Expected key to be str, got {type(key)}") + raise ValueError(f"Expected outer key to be str, got {type(key)}") if not isinstance(value, dict): - raise ValueError(f"Expected value to be dict, got {type(value)}") + raise ValueError(f"Expected outer value to be dict, got {type(value)}") column_name = key column_value = value else: + # TODO: Implement multi-column support. raise ValueError( "Query has multiple columns - graph visualization not supported" ) if column_name is None or column_value is None: raise ValueError( - "Unable to get column name or value - how is this possible???" + "query result with no columns is not supported for graph visualization" ) fields: List[StructType.Field] = [ @@ -85,9 +86,9 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): rows = [] for value_key, value_value in column_value.items(): if not isinstance(value_key, str): - raise ValueError(f"Expected key to be str, got {type(key)}") + raise ValueError(f"Expected inner key to be str, got {type(value_key)}") if not isinstance(value_value, str): - raise ValueError(f"Expected value to be str, got {type(value)}") + raise ValueError(f"Expected inner value to be str, got {type(value_value)}") row_index = int(value_key) row_json = json.loads(value_value) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index ae33082..0eab646 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -148,7 +148,7 @@ def test_convert_one_column_no_rows(): @pytest.mark.skipif( graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) -def test_convert_one_column_one_row(): +def test_convert_one_column_one_row_one_column(): result = convert_graph_data( { "result": { @@ -217,6 +217,82 @@ def test_convert_nongraph_json(): assert result["response"]["schema"] is None +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_outer_key_not_string(): + result = convert_graph_data( + { + 0: { + '0': json.dumps({"foo": 1, "bar": 2}), + } + }) + assert result == {"error": "Expected outer key to be str, got "} + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_outer_value_not_dict(): + result = convert_graph_data( + { + 'result': 0 + }) + assert result == {"error": "Expected outer value to be dict, got "} + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_inner_key_not_string(): + result = convert_graph_data( + { + 'result': { + 0: json.dumps({"foo": 1, "bar": 2}), + } + }) + assert result == {"error": "Expected inner key to be str, got "} + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_inner_value_not_string(): + result = convert_graph_data( + { + 'result': { + '0': 1, + } + }) + assert result == {"error": "Expected inner value to be str, got "} + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_one_column_one_row_two_columns(): + result = convert_graph_data( + { + "result1": { + "0": json.dumps(row_alex_owns_account), + }, + "result2": { + "0": json.dumps(row_alex_owns_account), + } + + } + ) + assert result == {"error": "Query has multiple columns - graph visualization not supported"} + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_empty_dict(): + result = convert_graph_data({}) + assert result == {"error": "query result with no columns is not supported for graph visualization"} + + class TestGraphServer(unittest.TestCase): def setUp(self): self.server_thread = GraphServer.init() From b112cff5b09fbb3c9fd98a94c7137b3ee6b200fb Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Fri, 28 Feb 2025 18:53:32 +0000 Subject: [PATCH 20/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigquery_magics/graph_server.py | 8 ++++-- tests/unit/test_graph_server.py | 49 +++++++++++++++++---------------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index d9cb1e8..e152c15 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -66,7 +66,9 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): if not isinstance(key, str): raise ValueError(f"Expected outer key to be str, got {type(key)}") if not isinstance(value, dict): - raise ValueError(f"Expected outer value to be dict, got {type(value)}") + raise ValueError( + f"Expected outer value to be dict, got {type(value)}" + ) column_name = key column_value = value else: @@ -88,7 +90,9 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): if not isinstance(value_key, str): raise ValueError(f"Expected inner key to be str, got {type(value_key)}") if not isinstance(value_value, str): - raise ValueError(f"Expected inner value to be str, got {type(value_value)}") + raise ValueError( + f"Expected inner value to be str, got {type(value_value)}" + ) row_index = int(value_key) row_json = json.loads(value_value) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 0eab646..64f401f 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -222,11 +222,12 @@ def test_convert_nongraph_json(): ) def test_convert_outer_key_not_string(): result = convert_graph_data( - { - 0: { - '0': json.dumps({"foo": 1, "bar": 2}), - } - }) + { + 0: { + "0": json.dumps({"foo": 1, "bar": 2}), + } + } + ) assert result == {"error": "Expected outer key to be str, got "} @@ -234,10 +235,7 @@ def test_convert_outer_key_not_string(): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_outer_value_not_dict(): - result = convert_graph_data( - { - 'result': 0 - }) + result = convert_graph_data({"result": 0}) assert result == {"error": "Expected outer value to be dict, got "} @@ -246,11 +244,12 @@ def test_convert_outer_value_not_dict(): ) def test_convert_inner_key_not_string(): result = convert_graph_data( - { - 'result': { - 0: json.dumps({"foo": 1, "bar": 2}), - } - }) + { + "result": { + 0: json.dumps({"foo": 1, "bar": 2}), + } + } + ) assert result == {"error": "Expected inner key to be str, got "} @@ -259,11 +258,12 @@ def test_convert_inner_key_not_string(): ) def test_convert_inner_value_not_string(): result = convert_graph_data( - { - 'result': { - '0': 1, - } - }) + { + "result": { + "0": 1, + } + } + ) assert result == {"error": "Expected inner value to be str, got "} @@ -278,11 +278,12 @@ def test_convert_one_column_one_row_two_columns(): }, "result2": { "0": json.dumps(row_alex_owns_account), - } - + }, } ) - assert result == {"error": "Query has multiple columns - graph visualization not supported"} + assert result == { + "error": "Query has multiple columns - graph visualization not supported" + } @pytest.mark.skipif( @@ -290,7 +291,9 @@ def test_convert_one_column_one_row_two_columns(): ) def test_convert_empty_dict(): result = convert_graph_data({}) - assert result == {"error": "query result with no columns is not supported for graph visualization"} + assert result == { + "error": "query result with no columns is not supported for graph visualization" + } class TestGraphServer(unittest.TestCase): From 8f3100fc325365f719a535ecb41f29cc77dd9acf Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Fri, 28 Feb 2025 12:50:04 -0800 Subject: [PATCH 21/44] Add more tests. --- bigquery_magics/graph_server.py | 9 ++++----- tests/unit/test_graph_server.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index e152c15..6b15d21 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -174,11 +174,10 @@ def get_ping(): route = GraphServer.build_route(GraphServer.endpoints["get_ping"]) response = requests.get(route) - if response.status_code == 200: - return response.json() - else: - print(f"Request failed with status code {response.status_code}") - return False + assert ( + response.status_code == 200 + ) # Guaranteed by GraphServerHandler implementation + return response.json() @staticmethod def post_ping(data): diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 64f401f..c025785 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -296,6 +296,22 @@ def test_convert_empty_dict(): } +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_wrong_row_index(): + result = convert_graph_data( + { + "result": { + # Missing "0" key + "1": json.dumps(row_alex_owns_account), + } + } + ) + + assert result == {"error": "Unexpected row index; expected 0, got 1"} + + class TestGraphServer(unittest.TestCase): def setUp(self): self.server_thread = GraphServer.init() @@ -313,3 +329,7 @@ def test_ping(self): request = {"data": "ping"} response = GraphServer.post_ping(request) self.assertEqual(response, {"your_request": request}) + + +def test_stop_server_never_started(): + GraphServer.stop_server() From cddb18c57183f57e04d8462b167a17c86cf392db Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Mon, 3 Mar 2025 14:02:11 -0800 Subject: [PATCH 22/44] Move get_ping() and post_ping() out of the GraphServer class, into the unit test --- bigquery_magics/graph_server.py | 22 +--------------------- tests/unit/test_graph_server.py | 18 ++++++++++++------ 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 6b15d21..f7aa81e 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -168,27 +168,7 @@ def stop_server(): if GraphServer._server: GraphServer._server.shutdown() print("BigQuery-magics graph server shutting down...") - - @staticmethod - def get_ping(): - route = GraphServer.build_route(GraphServer.endpoints["get_ping"]) - response = requests.get(route) - - assert ( - response.status_code == 200 - ) # Guaranteed by GraphServerHandler implementation - return response.json() - - @staticmethod - def post_ping(data): - route = GraphServer.build_route(GraphServer.endpoints["post_ping"]) - response = requests.post(route, json=data) - - if response.status_code == 200: - return response.json() - else: - print(f"Request failed with status code {response.status_code}") - return False + GraphServer._server = None class GraphServerHandler(http.server.SimpleHTTPRequestHandler): diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index c025785..f1a6b8e 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -13,6 +13,7 @@ # limitations under the License. import json +import requests import unittest import pytest @@ -320,15 +321,20 @@ def tearDown(self): GraphServer.stop_server() # Stop the server after each test self.server_thread.join() # Wait for the thread to finish - def test_ping(self): + def test_get_ping(self): self.assertTrue(self.server_thread.is_alive()) - response = GraphServer.get_ping() - self.assertEqual(response, {"message": "pong"}) + route = GraphServer.build_route(GraphServer.endpoints["get_ping"]) + response = requests.get(route) + self.assertEqual(response.status_code, 200) + self.assertEqual(response.json(), {"message": "pong"}) - request = {"data": "ping"} - response = GraphServer.post_ping(request) - self.assertEqual(response, {"your_request": request}) + def test_post_ping(self): + self.assertTrue(self.server_thread.is_alive()) + route = GraphServer.build_route(GraphServer.endpoints["post_ping"]) + response = requests.post(route, json={"data": "ping"}) + self.assertEqual(response.status_code, 200) + self.assertEqual(response.json(), {"your_request": {"data": "ping"}}) def test_stop_server_never_started(): From abd718afaf84f0074cb6404358e960c8a5fcbd23 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Mon, 3 Mar 2025 22:04:21 +0000 Subject: [PATCH 23/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- tests/unit/test_graph_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index f1a6b8e..00a5c28 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -13,10 +13,10 @@ # limitations under the License. import json -import requests import unittest import pytest +import requests try: import spanner_graphs.graph_visualization as graph_visualization From b470c717b9862411e7edd13b1253ff284628d4c9 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Mon, 3 Mar 2025 14:28:11 -0800 Subject: [PATCH 24/44] Add unit test for handle_post_query() in GraphServer. --- tests/unit/test_graph_server.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 00a5c28..33c601c 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -336,6 +336,28 @@ def test_post_ping(self): self.assertEqual(response.status_code, 200) self.assertEqual(response.json(), {"your_request": {"data": "ping"}}) + def test_post_query(self): + self.assertTrue(self.server_thread.is_alive()) + route = GraphServer.build_route(GraphServer.endpoints["post_query"]) + + data = { + "result": { + "0": json.dumps(row_alex_owns_account), + } + } + response = requests.post(route, json={"params": json.dumps(data)}) + self.assertEqual(response.status_code, 200) + response_data = response.json()["response"] + + self.assertEqual(len(response_data["nodes"]), 2) + self.assertEqual(len(response_data["edges"]), 1) + + _validate_nodes_and_edges(response.json()) + + self.assertEqual(response_data["query_result"], {"result": [row_alex_owns_account]}) + self.assertEqual(response_data["rows"], [[row_alex_owns_account]]) + self.assertIsNone(response_data["schema"]) + def test_stop_server_never_started(): GraphServer.stop_server() From f4266c37282b33ac992cb51b1537776eb49e3b12 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Mon, 3 Mar 2025 22:30:35 +0000 Subject: [PATCH 25/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- tests/unit/test_graph_server.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 33c601c..46add5e 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -343,7 +343,7 @@ def test_post_query(self): data = { "result": { "0": json.dumps(row_alex_owns_account), - } + } } response = requests.post(route, json={"params": json.dumps(data)}) self.assertEqual(response.status_code, 200) @@ -354,7 +354,9 @@ def test_post_query(self): _validate_nodes_and_edges(response.json()) - self.assertEqual(response_data["query_result"], {"result": [row_alex_owns_account]}) + self.assertEqual( + response_data["query_result"], {"result": [row_alex_owns_account]} + ) self.assertEqual(response_data["rows"], [[row_alex_owns_account]]) self.assertIsNone(response_data["schema"]) From 02f022383af38610170f1f9a0c81cb0ee49b03e0 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Mon, 3 Mar 2025 14:38:49 -0800 Subject: [PATCH 26/44] Fix lint, remove a line of dead code, mark test_post_query for skipping if spanner_graphs is not present. --- bigquery_magics/graph_server.py | 3 --- tests/unit/test_graph_server.py | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index f7aa81e..30e964e 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -21,7 +21,6 @@ from networkx.classes import DiGraph import portpicker -import requests def convert_graph_data(query_results: Dict[str, Dict[str, str]]): @@ -209,8 +208,6 @@ def handle_post_query(self): def do_GET(self): if self.path == GraphServer.endpoints["get_ping"]: self.handle_get_ping() - else: - super().do_GET() def do_POST(self): if self.path == GraphServer.endpoints["post_ping"]: diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 46add5e..6317e0a 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -336,6 +336,9 @@ def test_post_ping(self): self.assertEqual(response.status_code, 200) self.assertEqual(response.json(), {"your_request": {"data": "ping"}}) + @pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" + ) def test_post_query(self): self.assertTrue(self.server_thread.is_alive()) route = GraphServer.build_route(GraphServer.endpoints["post_query"]) From 6f06250bd3b54a3376ef10b6598d6c86a22d61d5 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Mon, 3 Mar 2025 14:57:52 -0800 Subject: [PATCH 27/44] Remove more dead code in graph server, add test for --graph without spanner-graph-notebook present. --- bigquery_magics/graph_server.py | 7 +++-- tests/unit/test_bigquery.py | 50 +++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 30e964e..8ac37e0 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -206,13 +206,14 @@ def handle_post_query(self): self.do_data_response(response) def do_GET(self): - if self.path == GraphServer.endpoints["get_ping"]: - self.handle_get_ping() + assert self.path == GraphServer.endpoints["get_ping"] + self.handle_get_ping() def do_POST(self): if self.path == GraphServer.endpoints["post_ping"]: self.handle_post_ping() - elif self.path == GraphServer.endpoints["post_query"]: + else: + assert self.path == GraphServer.endpoints["post_query"] self.handle_post_query() diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index dbd7d61..223efb5 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -770,6 +770,56 @@ def test_bigquery_graph_json_result(monkeypatch): assert list(return_value) == list(result) # verify column names +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is not None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present", +) +def test_bigquery_graph_missing_spanner_deps(monkeypatch): + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + sql = "SELECT graph_json FROM t" + result = pandas.DataFrame([], columns=["graph_json"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + graph_server_init_patch = mock.patch( + "bigquery_magics.graph_server.GraphServer.init", autospec=True + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + run_query_mock.return_value = query_job_mock + graph_server_init_mock.return_value = None + try: + ip.run_cell_magic("bigquery", "--graph", sql) + assert False, "Should have failed" + except ImportError: + pass + + @pytest.mark.usefixtures("ipython_interactive") def test_bigquery_magic_default_connection_user_agent(): ip = IPython.get_ipython() From 19e53fb5300b7c19115cc0b7aaca9bc3d28dda5d Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Mon, 3 Mar 2025 15:29:21 -0800 Subject: [PATCH 28/44] Add unit tests for colab paths. --- tests/unit/test_bigquery.py | 113 ++++++++++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 223efb5..2c11118 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -15,7 +15,9 @@ from concurrent import futures import contextlib import copy +import json import re +import sys from unittest import mock import warnings @@ -770,6 +772,116 @@ def test_bigquery_graph_json_result(monkeypatch): assert list(return_value) == list(result) # verify column names +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", +) +def test_bigquery_graph_colab(monkeypatch): + # Mock the colab module so the code under test uses colab.register_callback(), rather than + # GraphServer. + sys.modules['google.colab'] = mock.Mock() + + ip = IPython.get_ipython() + ip.extension_manager.load_extension("bigquery_magics") + mock_credentials = mock.create_autospec( + google.auth.credentials.Credentials, instance=True + ) + + # Set up the context with monkeypatch so that it's reset for subsequent + # tests. + monkeypatch.setattr(bigquery_magics.context, "_credentials", mock_credentials) + + # Mock out the BigQuery Storage API. + bqstorage_mock = mock.create_autospec(bigquery_storage.BigQueryReadClient) + bqstorage_instance_mock = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + bqstorage_instance_mock._transport = mock.Mock() + bqstorage_mock.return_value = bqstorage_instance_mock + bqstorage_client_patch = mock.patch( + "google.cloud.bigquery_storage.BigQueryReadClient", bqstorage_mock + ) + + sql = "SELECT graph_json FROM t" + graph_json_rows = [ + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI=","kind":"node","labels":["Person"],"properties":{"birthday":"1991-12-21T08:00:00Z","city":"Adelaide","country":"Australia","id":1,"name":"Alex"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJECkQ6ZRmluR3JhcGguUGVyc29uAHiRAplGaW5HcmFwaC5BY2NvdW50AHiRDg==","kind":"edge","labels":["Owns"],"properties":{"account_id":7,"create_time":"2020-01-10T14:22:20.222Z","id":1},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQI="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEO","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-10T14:22:20.222Z","id":7,"is_blocked":false,"nick_name":"Vacation Fund"}}] + """, + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY=","kind":"node","labels":["Person"],"properties":{"birthday":"1986-12-07T08:00:00Z","city":"Kollam","country":"India","id":3,"name":"Lee"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEGkSCZRmluR3JhcGguUGVyc29uAHiRBplGaW5HcmFwaC5BY2NvdW50AHiRIA==","kind":"edge","labels":["Owns"],"properties":{"account_id":16,"create_time":"2020-02-18T13:44:20.655Z","id":3},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQY="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEg","kind":"node","labels":["Account"],"properties":{"create_time":"2020-01-28T01:55:09.206Z","id":16,"is_blocked":true,"nick_name":"Vacation Fund"}}] + """, + """ + [{"identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ=","kind":"node","labels":["Person"],"properties":{"birthday":"1980-10-31T08:00:00Z","city":"Moravia","country":"Czech_Republic","id":2,"name":"Dana"}},{"destination_node_identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","identifier":"mUZpbkdyYXBoLlBlcnNvbk93bkFjY291bnQAeJEEkSiZRmluR3JhcGguUGVyc29uAHiRBJlGaW5HcmFwaC5BY2NvdW50AHiRKA==","kind":"edge","labels":["Owns"],"properties":{"account_id":20,"create_time":"2020-01-28T01:55:09.206Z","id":2},"source_node_identifier":"mUZpbkdyYXBoLlBlcnNvbgB4kQQ="},{"identifier":"mUZpbkdyYXBoLkFjY291bnQAeJEo","kind":"node","labels":["Account"],"properties":{"create_time":"2020-02-18T13:44:20.655Z","id":20,"is_blocked":false,"nick_name":"Rainy Day Fund"}}] + """, + ] + result = pandas.DataFrame(graph_json_rows, columns=["graph_json"]) + run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) + graph_server_init_patch = mock.patch( + "bigquery_magics.graph_server.GraphServer.init", autospec=True + ) + display_patch = mock.patch("IPython.display.display", autospec=True) + query_job_mock = mock.create_autospec( + google.cloud.bigquery.job.QueryJob, instance=True + ) + query_job_mock.to_dataframe.return_value = result + + with run_query_patch as run_query_mock, ( + bqstorage_client_patch + ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + run_query_mock.return_value = query_job_mock + graph_server_init_mock.return_value = None + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + assert len(display_mock.call_args_list) == 1 + assert len(display_mock.call_args_list[0]) == 2 + + # Sanity check that the HTML content looks like graph visualization. Minimal check + # to allow Spanner to change its implementation without breaking this test. + html_content = display_mock.call_args_list[0][0][0].data + assert "" in html_content + # Verify that the query results are embedded into the HTML, allowing them to be visualized. + # Due to escaping, it is not possible check for graph_json_rows exactly, so we check for a few + # sentinel strings within the query results, instead. + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQI=" in html_content + ) # identifier in 1st row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQY=" in html_content + ) # identifier in 2nd row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQQ=" in html_content + ) # identifier in 3rd row of query result + + # Make sure we actually used colab path, not GraphServer path. + assert sys.modules['google.colab'].output.register_callback.called + assert not graph_server_init_mock.called + + assert bqstorage_mock.called # BQ storage client was used + assert isinstance(return_value, pandas.DataFrame) + assert len(return_value) == len(result) # verify row count + assert list(return_value) == list(result) # verify column names + + +@pytest.mark.usefixtures("ipython_interactive") +@pytest.mark.skipif( + graph_visualization is None or bigquery_storage is None, + reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", +) +def test_colab_callback(): + result = bigquery_magics.bigquery._colab_callback('query', json.dumps({"result": {}})) + assert result.data == { + "response": { + "edges": [], + "nodes": [], + "query_result": {"result": []}, + "rows": [], + "schema": None, + } + } + + @pytest.mark.usefixtures("ipython_interactive") @pytest.mark.skipif( graph_visualization is not None or bigquery_storage is None, @@ -818,6 +930,7 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch): assert False, "Should have failed" except ImportError: pass + display_mock.assert_not_called() @pytest.mark.usefixtures("ipython_interactive") From ec1e188f29df9a258b1223a4adf73e42e183adcd Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Mon, 3 Mar 2025 23:31:23 +0000 Subject: [PATCH 29/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- tests/unit/test_bigquery.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 2c11118..dda843d 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -780,7 +780,7 @@ def test_bigquery_graph_json_result(monkeypatch): def test_bigquery_graph_colab(monkeypatch): # Mock the colab module so the code under test uses colab.register_callback(), rather than # GraphServer. - sys.modules['google.colab'] = mock.Mock() + sys.modules["google.colab"] = mock.Mock() ip = IPython.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -855,7 +855,7 @@ def test_bigquery_graph_colab(monkeypatch): ) # identifier in 3rd row of query result # Make sure we actually used colab path, not GraphServer path. - assert sys.modules['google.colab'].output.register_callback.called + assert sys.modules["google.colab"].output.register_callback.called assert not graph_server_init_mock.called assert bqstorage_mock.called # BQ storage client was used @@ -870,7 +870,9 @@ def test_bigquery_graph_colab(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_colab_callback(): - result = bigquery_magics.bigquery._colab_callback('query', json.dumps({"result": {}})) + result = bigquery_magics.bigquery._colab_callback( + "query", json.dumps({"result": {}}) + ) assert result.data == { "response": { "edges": [], From 1df745437ec67d2654fbc287689ad938b0ca0b9e Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Mon, 3 Mar 2025 15:40:23 -0800 Subject: [PATCH 30/44] Use pytest.raises() instead of try/except --- tests/unit/test_bigquery.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index dda843d..97e8635 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -927,11 +927,8 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch): ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: run_query_mock.return_value = query_job_mock graph_server_init_mock.return_value = None - try: + with pytest.raises(ImportError): ip.run_cell_magic("bigquery", "--graph", sql) - assert False, "Should have failed" - except ImportError: - pass display_mock.assert_not_called() From 388593580993511ed85d782e095f415893473db3 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Mon, 3 Mar 2025 16:03:29 -0800 Subject: [PATCH 31/44] Add test coverage for the case where a graph query is run after the graph server is already running, due to another graph query having run previously. --- tests/unit/test_bigquery.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 97e8635..37e20e3 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -741,8 +741,11 @@ def test_bigquery_graph_json_result(monkeypatch): with run_query_patch as run_query_mock, ( bqstorage_client_patch ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + graph_server_init_mock.return_value = mock.Mock() + graph_server_init_mock.return_value.is_alive = mock.Mock() + graph_server_init_mock.return_value.is_alive.return_value = True run_query_mock.return_value = query_job_mock - graph_server_init_mock.return_value = None + return_value = ip.run_cell_magic("bigquery", "--graph", sql) assert len(display_mock.call_args_list) == 1 @@ -766,6 +769,27 @@ def test_bigquery_graph_json_result(monkeypatch): "mUZpbkdyYXBoLlBlcnNvbgB4kQQ=" in html_content ) # identifier in 3rd row of query result + # Make sure we can run a second graph query, after the graph server is already running. + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + + # Sanity check that the HTML content looks like graph visualization. Minimal check + # to allow Spanner to change its implementation without breaking this test. + html_content = display_mock.call_args_list[0][0][0].data + assert "" in html_content + # Verify that the query results are embedded into the HTML, allowing them to be visualized. + # Due to escaping, it is not possible check for graph_json_rows exactly, so we check for a few + # sentinel strings within the query results, instead. + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQI=" in html_content + ) # identifier in 1st row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQY=" in html_content + ) # identifier in 2nd row of query result + assert ( + "mUZpbkdyYXBoLlBlcnNvbgB4kQQ=" in html_content + ) # identifier in 3rd row of query result + assert bqstorage_mock.called # BQ storage client was used assert isinstance(return_value, pandas.DataFrame) assert len(return_value) == len(result) # verify row count From 4b27ebb975e0b4bf7effb568d381f5ef593dd0c1 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 4 Mar 2025 14:29:23 -0800 Subject: [PATCH 32/44] Add docstrings --- bigquery_magics/graph_server.py | 37 +++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 8ac37e0..5970645 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -15,11 +15,11 @@ import atexit import http.server import json +import networkx import socketserver import threading from typing import Dict, List -from networkx.classes import DiGraph import portpicker @@ -104,7 +104,7 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): d, ignored_columns = columns_to_native_numpy(data, fields) - graph: DiGraph = prepare_data_for_graphing(incoming=d, schema_json=None) + graph: networkx.classes.DiGraph = prepare_data_for_graphing(incoming=d, schema_json=None) nodes = [] for node_id, node in graph.nodes(data=True): @@ -128,10 +128,19 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): class GraphServer: + """ + Http server invoked by Javascript to obtain the query results for visualization. + + The server is invoked by Javascript, generated as part of + spanner_graphs.graph_visualization.generate_visualization_html(). + + This server is used only in Jupyter; in colab, google.colab.output.register_callback() + is used instead. + """ port = portpicker.pick_unused_port() host = "http://localhost" url = f"{host}:{port}" - + endpoints = { "get_ping": "/get_ping", "post_ping": "/post_ping", @@ -139,13 +148,20 @@ class GraphServer: } _server = None - + @staticmethod def build_route(endpoint): + """ + Returns a url for connecting to the given endpoint. + Supported values include: + - "get_ping": sends a GET request to ping the server. + - "post_ping": sends a POST request to ping the server. + - "post_query": sends a POST request to obtain query results. + """ return f"{GraphServer.url}{endpoint}" @staticmethod - def start_server(): + def _start_server(): class ThreadedTCPServer(socketserver.TCPServer): # Allow socket reuse to avoid "Address already in use" errors allow_reuse_address = True @@ -158,12 +174,18 @@ class ThreadedTCPServer(socketserver.TCPServer): @staticmethod def init(): - server_thread = threading.Thread(target=GraphServer.start_server) + """ + Starts the HTTP server. The server runs forever, until stop_server() is called. + """ + server_thread = threading.Thread(target=GraphServer._start_server) server_thread.start() return server_thread @staticmethod def stop_server(): + """ + Starts the HTTP server, if it is currently running. + """ if GraphServer._server: GraphServer._server.shutdown() print("BigQuery-magics graph server shutting down...") @@ -171,6 +193,9 @@ def stop_server(): class GraphServerHandler(http.server.SimpleHTTPRequestHandler): + """ + Handles HTTP requests send to the graph server. + """ def log_message(self, format, *args): pass From 10c5d75d63da25b46b707e99d37e73b9da566d36 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Tue, 4 Mar 2025 22:32:21 +0000 Subject: [PATCH 33/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigquery_magics/graph_server.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 5970645..5281811 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -15,11 +15,11 @@ import atexit import http.server import json -import networkx import socketserver import threading from typing import Dict, List +import networkx import portpicker @@ -104,7 +104,9 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): d, ignored_columns = columns_to_native_numpy(data, fields) - graph: networkx.classes.DiGraph = prepare_data_for_graphing(incoming=d, schema_json=None) + graph: networkx.classes.DiGraph = prepare_data_for_graphing( + incoming=d, schema_json=None + ) nodes = [] for node_id, node in graph.nodes(data=True): @@ -137,10 +139,11 @@ class GraphServer: This server is used only in Jupyter; in colab, google.colab.output.register_callback() is used instead. """ + port = portpicker.pick_unused_port() host = "http://localhost" url = f"{host}:{port}" - + endpoints = { "get_ping": "/get_ping", "post_ping": "/post_ping", @@ -148,7 +151,7 @@ class GraphServer: } _server = None - + @staticmethod def build_route(endpoint): """ @@ -175,7 +178,7 @@ class ThreadedTCPServer(socketserver.TCPServer): @staticmethod def init(): """ - Starts the HTTP server. The server runs forever, until stop_server() is called. + Starts the HTTP server. The server runs forever, until stop_server() is called. """ server_thread = threading.Thread(target=GraphServer._start_server) server_thread.start() @@ -196,6 +199,7 @@ class GraphServerHandler(http.server.SimpleHTTPRequestHandler): """ Handles HTTP requests send to the graph server. """ + def log_message(self, format, *args): pass From 52576181c296058bcbdb0a4b767943a9c64ce1d0 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 4 Mar 2025 16:07:25 -0800 Subject: [PATCH 34/44] Move networkx and portpicker to extras under "spanner-graph-notebook". --- setup.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 16c2297..7ed10e8 100644 --- a/setup.py +++ b/setup.py @@ -33,10 +33,8 @@ "ipywidgets>=7.7.1", "ipython>=7.23.1", "ipykernel>=5.5.6", - "networkx", "packaging >= 20.0.0", - "pandas>=1.1.0", - "portpicker", + "pandas>=1.1.0", "pyarrow >= 3.0.0", "pydata-google-auth >=1.5.0", "tqdm >= 4.7.4, <5.0.0dev", @@ -58,7 +56,7 @@ ], "bigframes": ["bigframes >= 1.17.0"], "geopandas": ["geopandas >= 1.0.1"], - "spanner-graph-notebook": ["spanner-graph-notebook >= 1.1.1"], + "spanner-graph-notebook": ["spanner-graph-notebook >= 1.1.1", "networkx", "portpicker",], } all_extras = [] From 4356d39ae7fa5614ee4c699c0d2a1d9d6b5e3945 Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 5 Mar 2025 00:09:55 +0000 Subject: [PATCH 35/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- setup.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 7ed10e8..b2e3fba 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ "ipython>=7.23.1", "ipykernel>=5.5.6", "packaging >= 20.0.0", - "pandas>=1.1.0", + "pandas>=1.1.0", "pyarrow >= 3.0.0", "pydata-google-auth >=1.5.0", "tqdm >= 4.7.4, <5.0.0dev", @@ -56,7 +56,11 @@ ], "bigframes": ["bigframes >= 1.17.0"], "geopandas": ["geopandas >= 1.0.1"], - "spanner-graph-notebook": ["spanner-graph-notebook >= 1.1.1", "networkx", "portpicker",], + "spanner-graph-notebook": [ + "spanner-graph-notebook >= 1.1.1", + "networkx", + "portpicker", + ], } all_extras = [] From 0e04303d7d17fb289ff3415d78832abe7b49ca30 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 5 Mar 2025 10:58:10 -0800 Subject: [PATCH 36/44] Finish making graph dependencies optional --- bigquery_magics/graph_server.py | 10 +++++----- tests/unit/test_graph_server.py | 14 +++++++++++--- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 5281811..1e7a08e 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -19,9 +19,6 @@ import threading from typing import Dict, List -import networkx -import portpicker - def convert_graph_data(query_results: Dict[str, Dict[str, str]]): """ @@ -52,6 +49,7 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): # does not even get called unless spanner_graphs has already been confirmed # to exist upstream. from google.cloud.spanner_v1.types import StructType, Type, TypeCode + import networkx from spanner_graphs.conversion import ( columns_to_native_numpy, prepare_data_for_graphing, @@ -139,8 +137,7 @@ class GraphServer: This server is used only in Jupyter; in colab, google.colab.output.register_callback() is used instead. """ - - port = portpicker.pick_unused_port() + port = None host = "http://localhost" url = f"{host}:{port}" @@ -165,6 +162,9 @@ def build_route(endpoint): @staticmethod def _start_server(): + import portpicker + port = portpicker.pick_unused_port() + class ThreadedTCPServer(socketserver.TCPServer): # Allow socket reuse to avoid "Address already in use" errors allow_reuse_address = True diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 6317e0a..604ed50 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -315,12 +315,17 @@ def test_convert_wrong_row_index(): class TestGraphServer(unittest.TestCase): def setUp(self): - self.server_thread = GraphServer.init() + if graph_visualization is not None: + self.server_thread = GraphServer.init() def tearDown(self): - GraphServer.stop_server() # Stop the server after each test - self.server_thread.join() # Wait for the thread to finish + if graph_visualization is not None: + GraphServer.stop_server() # Stop the server after each test + self.server_thread.join() # Wait for the thread to finish + @pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" + ) def test_get_ping(self): self.assertTrue(self.server_thread.is_alive()) @@ -329,6 +334,9 @@ def test_get_ping(self): self.assertEqual(response.status_code, 200) self.assertEqual(response.json(), {"message": "pong"}) + @pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" + ) def test_post_ping(self): self.assertTrue(self.server_thread.is_alive()) route = GraphServer.build_route(GraphServer.endpoints["post_ping"]) From fedca818d6ead2b7a451f20b011782715721be4b Mon Sep 17 00:00:00 2001 From: Owl Bot Date: Wed, 5 Mar 2025 19:00:23 +0000 Subject: [PATCH 37/44] =?UTF-8?q?=F0=9F=A6=89=20Updates=20from=20OwlBot=20?= =?UTF-8?q?post-processor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --- bigquery_magics/graph_server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 1e7a08e..3914d6b 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -137,6 +137,7 @@ class GraphServer: This server is used only in Jupyter; in colab, google.colab.output.register_callback() is used instead. """ + port = None host = "http://localhost" url = f"{host}:{port}" @@ -163,6 +164,7 @@ def build_route(endpoint): @staticmethod def _start_server(): import portpicker + port = portpicker.pick_unused_port() class ThreadedTCPServer(socketserver.TCPServer): From 1824c7f2332bf4a6683c0d14e41ea2c279e1a7fc Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 5 Mar 2025 11:05:33 -0800 Subject: [PATCH 38/44] fix port --- bigquery_magics/graph_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 3914d6b..45c4397 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -165,7 +165,7 @@ def build_route(endpoint): def _start_server(): import portpicker - port = portpicker.pick_unused_port() + GraphServer.port = portpicker.pick_unused_port() class ThreadedTCPServer(socketserver.TCPServer): # Allow socket reuse to avoid "Address already in use" errors From a7061822850826c324890de1a67005d2927c9796 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 5 Mar 2025 12:46:45 -0800 Subject: [PATCH 39/44] Convert graph server to singleton object. --- bigquery_magics/bigquery.py | 18 ++++++------ bigquery_magics/graph_server.py | 49 ++++++++++++++++----------------- tests/unit/test_bigquery.py | 6 ++-- tests/unit/test_graph_server.py | 40 +++++++++++++-------------- 4 files changed, 55 insertions(+), 58 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index 189cf76..4fd730f 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -101,15 +101,13 @@ import json import re import sys -from threading import Thread +import threading import time from typing import Any, List, Tuple import warnings import IPython # type: ignore -from IPython import display # type: ignore from IPython.core import magic_arguments # type: ignore -from IPython.core.display import HTML, JSON from IPython.core.getipython import get_ipython from google.api_core import client_info from google.api_core.exceptions import NotFound @@ -123,7 +121,7 @@ from bigquery_magics import line_arg_parser as lap import bigquery_magics._versions_helpers import bigquery_magics.config -from bigquery_magics.graph_server import GraphServer, convert_graph_data +import bigquery_magics.graph_server as graph_server import bigquery_magics.line_arg_parser.exceptions import bigquery_magics.version @@ -609,10 +607,10 @@ def _is_colab() -> bool: def _colab_callback(query: str, params: str): - return JSON(convert_graph_data(query_results=json.loads(params))) + return IPython.core.display.JSON(graph_server.convert_graph_data(query_results=json.loads(params))) -singleton_server_thread: Thread = None +singleton_server_thread: threading.Thread = None def _add_graph_widget(query_result): @@ -636,15 +634,15 @@ def _add_graph_widget(query_result): global singleton_server_thread alive = singleton_server_thread and singleton_server_thread.is_alive() if not alive: - singleton_server_thread = GraphServer.init() + singleton_server_thread = graph_server.graph_server.init() # Create html to invoke the graph server html_content = generate_visualization_html( query="dummy query", - port=GraphServer.port, + port=graph_server.graph_server.port, params=query_result.to_json().replace("\\", "\\\\").replace('"', '\\"'), ) - display.display(HTML(html_content)) + IPython.display.display(IPython.core.display.HTML(html_content)) def _is_valid_json(s: str): @@ -710,7 +708,7 @@ def _make_bq_query( return if not args.verbose: - display.clear_output() + IPython.display.clear_output() if args.dry_run: # TODO(tswast): Use _handle_result() here, too, but perhaps change the diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 45c4397..bf0dc3f 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -138,20 +138,19 @@ class GraphServer: is used instead. """ - port = None host = "http://localhost" - url = f"{host}:{port}" - endpoints = { "get_ping": "/get_ping", "post_ping": "/post_ping", "post_query": "/post_query", } - _server = None + def __init__(self): + self.port = None + self.url = None + self._server = None - @staticmethod - def build_route(endpoint): + def build_route(self, endpoint): """ Returns a url for connecting to the given endpoint. Supported values include: @@ -159,43 +158,43 @@ def build_route(endpoint): - "post_ping": sends a POST request to ping the server. - "post_query": sends a POST request to obtain query results. """ - return f"{GraphServer.url}{endpoint}" - - @staticmethod - def _start_server(): - import portpicker - - GraphServer.port = portpicker.pick_unused_port() + return f"{self.url}{endpoint}" + def _start_server(self): class ThreadedTCPServer(socketserver.TCPServer): # Allow socket reuse to avoid "Address already in use" errors allow_reuse_address = True # Daemon threads automatically terminate when the main program exits daemon_threads = True - with ThreadedTCPServer(("", GraphServer.port), GraphServerHandler) as httpd: - GraphServer._server = httpd - GraphServer._server.serve_forever() + with ThreadedTCPServer(("", self.port), GraphServerHandler) as httpd: + self._server = httpd + self._server.serve_forever() - @staticmethod - def init(): + def init(self): """ Starts the HTTP server. The server runs forever, until stop_server() is called. """ - server_thread = threading.Thread(target=GraphServer._start_server) + import portpicker + self.port = portpicker.pick_unused_port() + self.url = f"{GraphServer.host}:{self.port}" + + server_thread = threading.Thread(target=self._start_server) server_thread.start() return server_thread - @staticmethod - def stop_server(): + def stop_server(self): """ Starts the HTTP server, if it is currently running. """ - if GraphServer._server: - GraphServer._server.shutdown() + if self._server: + self._server.shutdown() print("BigQuery-magics graph server shutting down...") - GraphServer._server = None + self._server = None + +global graph_server +graph_server = GraphServer() class GraphServerHandler(http.server.SimpleHTTPRequestHandler): """ @@ -248,4 +247,4 @@ def do_POST(self): self.handle_post_query() -atexit.register(GraphServer.stop_server) +atexit.register(graph_server.stop_server) diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 37e20e3..acecc57 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -746,7 +746,9 @@ def test_bigquery_graph_json_result(monkeypatch): graph_server_init_mock.return_value.is_alive.return_value = True run_query_mock.return_value = query_job_mock + print('Got here #500') return_value = ip.run_cell_magic("bigquery", "--graph", sql) + print('Got here #501') assert len(display_mock.call_args_list) == 1 assert len(display_mock.call_args_list[0]) == 2 @@ -1038,7 +1040,7 @@ def test_bigquery_magic_does_not_clear_display_in_verbose_mode(): ) clear_patch = mock.patch( - "bigquery_magics.bigquery.display.clear_output", + "bigquery_magics.bigquery.IPython.display.clear_output", autospec=True, ) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) @@ -1057,7 +1059,7 @@ def test_bigquery_magic_clears_display_in_non_verbose_mode(): ) clear_patch = mock.patch( - "bigquery_magics.bigquery.display.clear_output", + "bigquery_magics.bigquery.IPython.display.clear_output", autospec=True, ) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 604ed50..0146ba6 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -23,7 +23,7 @@ except ImportError: graph_visualization = None -from bigquery_magics.graph_server import GraphServer, convert_graph_data +import bigquery_magics.graph_server as graph_server alex_properties = { "birthday": "1991-12-21T08:00:00Z", @@ -134,7 +134,7 @@ def _validate_nodes_and_edges(result): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_one_column_no_rows(): - result = convert_graph_data({"result": {}}) + result = graph_server.convert_graph_data({"result": {}}) assert result == { "response": { "edges": [], @@ -150,7 +150,7 @@ def test_convert_one_column_no_rows(): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_one_column_one_row_one_column(): - result = convert_graph_data( + result = graph_server.convert_graph_data( { "result": { "0": json.dumps(row_alex_owns_account), @@ -172,7 +172,7 @@ def test_convert_one_column_one_row_one_column(): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_one_column_two_rows(): - result = convert_graph_data( + result = graph_server.convert_graph_data( { "result": { "0": json.dumps(row_alex_owns_account), @@ -202,7 +202,7 @@ def test_convert_one_column_two_rows(): def test_convert_nongraph_json(): # If we have valid json that doesn't represent a graph, we don't expect to get nodes and edges, # but we should at least have row data, allowing the tabular view to work. - result = convert_graph_data( + result = graph_server.convert_graph_data( { "result": { "0": json.dumps({"foo": 1, "bar": 2}), @@ -222,7 +222,7 @@ def test_convert_nongraph_json(): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_outer_key_not_string(): - result = convert_graph_data( + result = graph_server.convert_graph_data( { 0: { "0": json.dumps({"foo": 1, "bar": 2}), @@ -236,7 +236,7 @@ def test_convert_outer_key_not_string(): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_outer_value_not_dict(): - result = convert_graph_data({"result": 0}) + result = graph_server.convert_graph_data({"result": 0}) assert result == {"error": "Expected outer value to be dict, got "} @@ -244,7 +244,7 @@ def test_convert_outer_value_not_dict(): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_inner_key_not_string(): - result = convert_graph_data( + result = graph_server.convert_graph_data( { "result": { 0: json.dumps({"foo": 1, "bar": 2}), @@ -258,7 +258,7 @@ def test_convert_inner_key_not_string(): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_inner_value_not_string(): - result = convert_graph_data( + result = graph_server.convert_graph_data( { "result": { "0": 1, @@ -272,7 +272,7 @@ def test_convert_inner_value_not_string(): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_one_column_one_row_two_columns(): - result = convert_graph_data( + result = graph_server.convert_graph_data( { "result1": { "0": json.dumps(row_alex_owns_account), @@ -291,7 +291,7 @@ def test_convert_one_column_one_row_two_columns(): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_empty_dict(): - result = convert_graph_data({}) + result = graph_server.convert_graph_data({}) assert result == { "error": "query result with no columns is not supported for graph visualization" } @@ -301,7 +301,7 @@ def test_convert_empty_dict(): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_wrong_row_index(): - result = convert_graph_data( + result = graph_server.convert_graph_data( { "result": { # Missing "0" key @@ -315,13 +315,11 @@ def test_convert_wrong_row_index(): class TestGraphServer(unittest.TestCase): def setUp(self): - if graph_visualization is not None: - self.server_thread = GraphServer.init() + self.server_thread = graph_server.graph_server.init() def tearDown(self): - if graph_visualization is not None: - GraphServer.stop_server() # Stop the server after each test - self.server_thread.join() # Wait for the thread to finish + graph_server.graph_server.stop_server() # Stop the server after each test + self.server_thread.join() # Wait for the thread to finish @pytest.mark.skipif( graph_visualization is None, reason="Requires `spanner-graph-notebook`" @@ -329,7 +327,7 @@ def tearDown(self): def test_get_ping(self): self.assertTrue(self.server_thread.is_alive()) - route = GraphServer.build_route(GraphServer.endpoints["get_ping"]) + route = graph_server.graph_server.build_route(graph_server.GraphServer.endpoints["get_ping"]) response = requests.get(route) self.assertEqual(response.status_code, 200) self.assertEqual(response.json(), {"message": "pong"}) @@ -339,7 +337,7 @@ def test_get_ping(self): ) def test_post_ping(self): self.assertTrue(self.server_thread.is_alive()) - route = GraphServer.build_route(GraphServer.endpoints["post_ping"]) + route = graph_server.graph_server.build_route(graph_server.GraphServer.endpoints["post_ping"]) response = requests.post(route, json={"data": "ping"}) self.assertEqual(response.status_code, 200) self.assertEqual(response.json(), {"your_request": {"data": "ping"}}) @@ -349,7 +347,7 @@ def test_post_ping(self): ) def test_post_query(self): self.assertTrue(self.server_thread.is_alive()) - route = GraphServer.build_route(GraphServer.endpoints["post_query"]) + route = graph_server.graph_server.build_route(graph_server.GraphServer.endpoints["post_query"]) data = { "result": { @@ -373,4 +371,4 @@ def test_post_query(self): def test_stop_server_never_started(): - GraphServer.stop_server() + graph_server.graph_server.stop_server() From e4767e7bf1a33cb7494ec42cd096d6e62de8105e Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 5 Mar 2025 13:15:44 -0800 Subject: [PATCH 40/44] reformat --- bigquery_magics/bigquery.py | 4 +++- bigquery_magics/graph_server.py | 2 ++ tests/unit/test_bigquery.py | 2 -- tests/unit/test_graph_server.py | 12 +++++++++--- 4 files changed, 14 insertions(+), 6 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index 4fd730f..44d6d51 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -607,7 +607,9 @@ def _is_colab() -> bool: def _colab_callback(query: str, params: str): - return IPython.core.display.JSON(graph_server.convert_graph_data(query_results=json.loads(params))) + return IPython.core.display.JSON( + graph_server.convert_graph_data(query_results=json.loads(params)) + ) singleton_server_thread: threading.Thread = None diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index bf0dc3f..6294bce 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -176,6 +176,7 @@ def init(self): Starts the HTTP server. The server runs forever, until stop_server() is called. """ import portpicker + self.port = portpicker.pick_unused_port() self.url = f"{GraphServer.host}:{self.port}" @@ -196,6 +197,7 @@ def stop_server(self): global graph_server graph_server = GraphServer() + class GraphServerHandler(http.server.SimpleHTTPRequestHandler): """ Handles HTTP requests send to the graph server. diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index acecc57..0ab9685 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -746,9 +746,7 @@ def test_bigquery_graph_json_result(monkeypatch): graph_server_init_mock.return_value.is_alive.return_value = True run_query_mock.return_value = query_job_mock - print('Got here #500') return_value = ip.run_cell_magic("bigquery", "--graph", sql) - print('Got here #501') assert len(display_mock.call_args_list) == 1 assert len(display_mock.call_args_list[0]) == 2 diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 0146ba6..df83654 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -327,7 +327,9 @@ def tearDown(self): def test_get_ping(self): self.assertTrue(self.server_thread.is_alive()) - route = graph_server.graph_server.build_route(graph_server.GraphServer.endpoints["get_ping"]) + route = graph_server.graph_server.build_route( + graph_server.GraphServer.endpoints["get_ping"] + ) response = requests.get(route) self.assertEqual(response.status_code, 200) self.assertEqual(response.json(), {"message": "pong"}) @@ -337,7 +339,9 @@ def test_get_ping(self): ) def test_post_ping(self): self.assertTrue(self.server_thread.is_alive()) - route = graph_server.graph_server.build_route(graph_server.GraphServer.endpoints["post_ping"]) + route = graph_server.graph_server.build_route( + graph_server.GraphServer.endpoints["post_ping"] + ) response = requests.post(route, json={"data": "ping"}) self.assertEqual(response.status_code, 200) self.assertEqual(response.json(), {"your_request": {"data": "ping"}}) @@ -347,7 +351,9 @@ def test_post_ping(self): ) def test_post_query(self): self.assertTrue(self.server_thread.is_alive()) - route = graph_server.graph_server.build_route(graph_server.GraphServer.endpoints["post_query"]) + route = graph_server.graph_server.build_route( + graph_server.GraphServer.endpoints["post_query"] + ) data = { "result": { From c36506a6c2e064e187dac804ca5258db17fdb6d7 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Thu, 6 Mar 2025 12:48:50 -0800 Subject: [PATCH 41/44] Fix handling of null json elements. --- bigquery_magics/graph_server.py | 8 ++---- tests/unit/test_graph_server.py | 49 +++++++++++++++++++++++++++++---- 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 6294bce..4bd2e49 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -90,14 +90,10 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): raise ValueError( f"Expected inner value to be str, got {type(value_value)}" ) - row_index = int(value_key) row_json = json.loads(value_value) - if row_index != len(data[column_name]): - raise ValueError( - f"Unexpected row index; expected {len(data[column_name])}, got {row_index}" - ) - data[column_name].append(row_json) + if row_json is not None: + data[column_name].append(row_json) rows.append([row_json]) d, ignored_columns = columns_to_native_numpy(data, fields) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index df83654..3f35bba 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -168,6 +168,33 @@ def test_convert_one_column_one_row_one_column(): assert result["response"]["schema"] is None +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_one_column_one_row_one_column_null_json(): + result = graph_server.convert_graph_data( + { + "result": { + "0": json.dumps(None), + } + } + ) + + assert result == { + 'response': { + 'edges': [], + 'nodes': [], + 'query_result': { + 'result': [] + }, + 'rows': [ [ None, ]], + 'schema': None, + }, + } + + _validate_nodes_and_edges(result) + + @pytest.mark.skipif( graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) @@ -301,25 +328,35 @@ def test_convert_empty_dict(): graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_wrong_row_index(): - result = graph_server.convert_graph_data( + result0 = graph_server.convert_graph_data( + { + "result": { + "0": json.dumps(row_alex_owns_account), + } + } + ) + + # Changing the index should not impact the result. + result1 = graph_server.convert_graph_data( { "result": { - # Missing "0" key "1": json.dumps(row_alex_owns_account), } } ) - assert result == {"error": "Unexpected row index; expected 0, got 1"} + assert result1 == result0 class TestGraphServer(unittest.TestCase): def setUp(self): - self.server_thread = graph_server.graph_server.init() + if graph_visualization is not None: + self.server_thread = graph_server.graph_server.init() def tearDown(self): - graph_server.graph_server.stop_server() # Stop the server after each test - self.server_thread.join() # Wait for the thread to finish + if graph_visualization is not None: + graph_server.graph_server.stop_server() # Stop the server after each test + self.server_thread.join() # Wait for the thread to finish @pytest.mark.skipif( graph_visualization is None, reason="Requires `spanner-graph-notebook`" From 3a06e699f78ed6d308ad9df19ba158edee6265e1 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Thu, 6 Mar 2025 14:43:54 -0800 Subject: [PATCH 42/44] reformat --- tests/unit/test_graph_server.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index 3f35bba..fdeffac 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -181,15 +181,17 @@ def test_convert_one_column_one_row_one_column_null_json(): ) assert result == { - 'response': { - 'edges': [], - 'nodes': [], - 'query_result': { - 'result': [] - }, - 'rows': [ [ None, ]], - 'schema': None, - }, + "response": { + "edges": [], + "nodes": [], + "query_result": {"result": []}, + "rows": [ + [ + None, + ] + ], + "schema": None, + }, } _validate_nodes_and_edges(result) From cdbdb3f14a2d5c21f2b0af1da4b03dfe7d84ebc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Mon, 10 Mar 2025 14:58:36 -0500 Subject: [PATCH 43/44] Apply suggestions from code review --- bigquery_magics/bigquery.py | 4 ++-- bigquery_magics/graph_server.py | 2 +- tests/unit/test_graph_server.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index 44d6d51..d8d33c5 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -620,7 +620,7 @@ def _add_graph_widget(query_result): from spanner_graphs.graph_visualization import generate_visualization_html except ImportError as err: customized_error = ImportError( - "Use of --graph requires the spanner_graphs package to be installed." + "Use of --graph requires the spanner-graph-notebook package to be installed. Install it with `pip install 'bigquery-magics[spanner-graph-notebook]'`." ) raise customized_error from err @@ -640,7 +640,7 @@ def _add_graph_widget(query_result): # Create html to invoke the graph server html_content = generate_visualization_html( - query="dummy query", + query="placeholder query", port=graph_server.graph_server.port, params=query_result.to_json().replace("\\", "\\\\").replace('"', '\\"'), ) diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 4bd2e49..7c55279 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -42,7 +42,7 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): visualization for columns of type JSON). """ # Delay spanner imports until this function is called to avoid making - # # spanner_graphs (and its dependencies) hard requirements for bigquery + # spanner-graph-notebook (and its dependencies) hard requirements for bigquery # magics users, who don't need graph visualization. # # Note that these imports do not need to be in a try/except, as this function diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index fdeffac..d4100c4 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -352,11 +352,11 @@ def test_convert_wrong_row_index(): class TestGraphServer(unittest.TestCase): def setUp(self): - if graph_visualization is not None: + if graph_visualization is not None: # pragma: NO COVER self.server_thread = graph_server.graph_server.init() def tearDown(self): - if graph_visualization is not None: + if graph_visualization is not None: # pragma: NO COVER graph_server.graph_server.stop_server() # Stop the server after each test self.server_thread.join() # Wait for the thread to finish From ccd7f30f049835cf4015e7c1e3180f4f04584842 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Mon, 10 Mar 2025 15:49:51 -0700 Subject: [PATCH 44/44] Pin spanner-graph-notebook to exactly version 1.1.1, as subsequent changes to that repository broke our use of is. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b2e3fba..efe01f6 100644 --- a/setup.py +++ b/setup.py @@ -57,7 +57,7 @@ "bigframes": ["bigframes >= 1.17.0"], "geopandas": ["geopandas >= 1.0.1"], "spanner-graph-notebook": [ - "spanner-graph-notebook >= 1.1.1", + "spanner-graph-notebook >= 1.1.1, <=1.1.1", "networkx", "portpicker", ],