diff --git a/bigquery_magics/bigquery.py b/bigquery_magics/bigquery.py index d8d33c5..33a01d0 100644 --- a/bigquery_magics/bigquery.py +++ b/bigquery_magics/bigquery.py @@ -657,9 +657,10 @@ def _is_valid_json(s: str): def _supports_graph_widget(query_result: pandas.DataFrame): num_rows, num_columns = query_result.shape - if num_columns != 1: - return False - return query_result[query_result.columns[0]].apply(_is_valid_json).all() + for column in query_result.columns: + if not query_result[column].apply(_is_valid_json).all(): + return False + return True def _make_bq_query( diff --git a/bigquery_magics/graph_server.py b/bigquery_magics/graph_server.py index 7c55279..6104328 100644 --- a/bigquery_magics/graph_server.py +++ b/bigquery_magics/graph_server.py @@ -56,45 +56,37 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): ) try: - column_name = None - column_value = None + fields: List[StructType.Field] = [] + data = {} + rows = [] for key, value in query_results.items(): - if column_name is None: - if not isinstance(key, str): - raise ValueError(f"Expected outer key to be str, got {type(key)}") - if not isinstance(value, dict): + column_name = None + column_value = None + if not isinstance(key, str): + raise ValueError(f"Expected outer key to be str, got {type(key)}") + if not isinstance(value, dict): + raise ValueError(f"Expected outer value to be dict, got {type(value)}") + column_name = key + column_value = value + + fields.append( + StructType.Field(name=column_name, type=Type(code=TypeCode.JSON)) + ) + data[column_name] = [] + for value_key, value_value in column_value.items(): + if not isinstance(value_key, str): raise ValueError( - f"Expected outer value to be dict, got {type(value)}" + f"Expected inner key to be str, got {type(value_key)}" ) - column_name = key - column_value = value - else: - # TODO: Implement multi-column support. - raise ValueError( - "Query has multiple columns - graph visualization not supported" - ) - if column_name is None or column_value is None: - raise ValueError( - "query result with no columns is not supported for graph visualization" - ) + if not isinstance(value_value, str): + raise ValueError( + f"Expected inner value to be str, got {type(value_value)}" + ) + row_json = json.loads(value_value) - fields: List[StructType.Field] = [ - StructType.Field(name=column_name, type=Type(code=TypeCode.JSON)) - ] - data = {column_name: []} - rows = [] - for value_key, value_value in column_value.items(): - if not isinstance(value_key, str): - raise ValueError(f"Expected inner key to be str, got {type(value_key)}") - if not isinstance(value_value, str): - raise ValueError( - f"Expected inner value to be str, got {type(value_value)}" - ) - row_json = json.loads(value_value) - - if row_json is not None: - data[column_name].append(row_json) - rows.append([row_json]) + if row_json is not None: + data[column_name].append(row_json) + rows.append([row_json]) d, ignored_columns = columns_to_native_numpy(data, fields) @@ -112,10 +104,13 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]): return { "response": { + # These fields populate the graph result view. "nodes": nodes, "edges": edges, + # This populates the visualizer's schema view, but not yet implemented on the + # BigQuery side. "schema": None, - "rows": rows, + # This field is used to populate the visualizer's tabular view. "query_result": data, } } diff --git a/tests/unit/test_bigquery.py b/tests/unit/test_bigquery.py index 0ab9685..33db7bc 100644 --- a/tests/unit/test_bigquery.py +++ b/tests/unit/test_bigquery.py @@ -39,6 +39,7 @@ import bigquery_magics import bigquery_magics.bigquery as magics +import bigquery_magics.graph_server as graph_server try: import google.cloud.bigquery_storage as bigquery_storage @@ -677,10 +678,12 @@ def test_bigquery_graph_json_json_result(monkeypatch): bqstorage_client_patch ), display_patch as display_mock: run_query_mock.return_value = query_job_mock - return_value = ip.run_cell_magic("bigquery", "--graph", sql) + try: + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + finally: + graph_server.graph_server.stop_server() - # As we only support visualization with single-column queries, the visualizer should not be launched. - display_mock.assert_not_called() + display_mock.assert_called() assert bqstorage_mock.called # BQ storage client was used assert isinstance(return_value, pandas.DataFrame) @@ -729,9 +732,6 @@ def test_bigquery_graph_json_result(monkeypatch): ] result = pandas.DataFrame(graph_json_rows, columns=["graph_json"]) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) - graph_server_init_patch = mock.patch( - "bigquery_magics.graph_server.GraphServer.init", autospec=True - ) display_patch = mock.patch("IPython.display.display", autospec=True) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True @@ -740,10 +740,7 @@ def test_bigquery_graph_json_result(monkeypatch): with run_query_patch as run_query_mock, ( bqstorage_client_patch - ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: - graph_server_init_mock.return_value = mock.Mock() - graph_server_init_mock.return_value.is_alive = mock.Mock() - graph_server_init_mock.return_value.is_alive.return_value = True + ), display_patch as display_mock: run_query_mock.return_value = query_job_mock return_value = ip.run_cell_magic("bigquery", "--graph", sql) @@ -770,7 +767,10 @@ def test_bigquery_graph_json_result(monkeypatch): ) # identifier in 3rd row of query result # Make sure we can run a second graph query, after the graph server is already running. - return_value = ip.run_cell_magic("bigquery", "--graph", sql) + try: + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + finally: + graph_server.graph_server.stop_server() # Sanity check that the HTML content looks like graph visualization. Minimal check # to allow Spanner to change its implementation without breaking this test. @@ -841,9 +841,6 @@ def test_bigquery_graph_colab(monkeypatch): ] result = pandas.DataFrame(graph_json_rows, columns=["graph_json"]) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) - graph_server_init_patch = mock.patch( - "bigquery_magics.graph_server.GraphServer.init", autospec=True - ) display_patch = mock.patch("IPython.display.display", autospec=True) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True @@ -852,10 +849,12 @@ def test_bigquery_graph_colab(monkeypatch): with run_query_patch as run_query_mock, ( bqstorage_client_patch - ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + ), display_patch as display_mock: run_query_mock.return_value = query_job_mock - graph_server_init_mock.return_value = None - return_value = ip.run_cell_magic("bigquery", "--graph", sql) + try: + return_value = ip.run_cell_magic("bigquery", "--graph", sql) + finally: + graph_server.graph_server.stop_server() assert len(display_mock.call_args_list) == 1 assert len(display_mock.call_args_list[0]) == 2 @@ -880,7 +879,6 @@ def test_bigquery_graph_colab(monkeypatch): # Make sure we actually used colab path, not GraphServer path. assert sys.modules["google.colab"].output.register_callback.called - assert not graph_server_init_mock.called assert bqstorage_mock.called # BQ storage client was used assert isinstance(return_value, pandas.DataFrame) @@ -902,7 +900,6 @@ def test_colab_callback(): "edges": [], "nodes": [], "query_result": {"result": []}, - "rows": [], "schema": None, } } @@ -937,9 +934,6 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch): sql = "SELECT graph_json FROM t" result = pandas.DataFrame([], columns=["graph_json"]) run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True) - graph_server_init_patch = mock.patch( - "bigquery_magics.graph_server.GraphServer.init", autospec=True - ) display_patch = mock.patch("IPython.display.display", autospec=True) query_job_mock = mock.create_autospec( google.cloud.bigquery.job.QueryJob, instance=True @@ -948,11 +942,13 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch): with run_query_patch as run_query_mock, ( bqstorage_client_patch - ), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock: + ), display_patch as display_mock: run_query_mock.return_value = query_job_mock - graph_server_init_mock.return_value = None with pytest.raises(ImportError): - ip.run_cell_magic("bigquery", "--graph", sql) + try: + ip.run_cell_magic("bigquery", "--graph", sql) + finally: + graph_server.graph_server.stop_server() display_mock.assert_not_called() diff --git a/tests/unit/test_graph_server.py b/tests/unit/test_graph_server.py index d4100c4..434b722 100644 --- a/tests/unit/test_graph_server.py +++ b/tests/unit/test_graph_server.py @@ -140,7 +140,6 @@ def test_convert_one_column_no_rows(): "edges": [], "nodes": [], "query_result": {"result": []}, - "rows": [], "schema": None, } } @@ -164,7 +163,6 @@ def test_convert_one_column_one_row_one_column(): _validate_nodes_and_edges(result) assert result["response"]["query_result"] == {"result": [row_alex_owns_account]} - assert result["response"]["rows"] == [[row_alex_owns_account]] assert result["response"]["schema"] is None @@ -185,11 +183,6 @@ def test_convert_one_column_one_row_one_column_null_json(): "edges": [], "nodes": [], "query_result": {"result": []}, - "rows": [ - [ - None, - ] - ], "schema": None, }, } @@ -218,10 +211,34 @@ def test_convert_one_column_two_rows(): assert result["response"]["query_result"] == { "result": [row_alex_owns_account, row_lee_owns_account] } - assert result["response"]["rows"] == [ - [row_alex_owns_account], - [row_lee_owns_account], - ] + assert result["response"]["schema"] is None + + +@pytest.mark.skipif( + graph_visualization is None, reason="Requires `spanner-graph-notebook`" +) +def test_convert_one_row_two_columns(): + result = graph_server.convert_graph_data( + { + "col1": { + "0": json.dumps(row_alex_owns_account), + }, + "col2": { + "0": json.dumps(row_lee_owns_account), + }, + } + ) + print(json.dumps(result)) + + assert len(result["response"]["nodes"]) == 4 + assert len(result["response"]["edges"]) == 2 + + _validate_nodes_and_edges(result) + + assert result["response"]["query_result"] == { + "col1": [row_alex_owns_account], + "col2": [row_lee_owns_account], + } assert result["response"]["schema"] is None @@ -243,7 +260,6 @@ def test_convert_nongraph_json(): assert len(result["response"]["edges"]) == 0 assert result["response"]["query_result"] == {"result": [{"foo": 1, "bar": 2}]} - assert result["response"]["rows"] == [[{"foo": 1, "bar": 2}]] assert result["response"]["schema"] is None @@ -297,32 +313,18 @@ def test_convert_inner_value_not_string(): assert result == {"error": "Expected inner value to be str, got "} -@pytest.mark.skipif( - graph_visualization is None, reason="Requires `spanner-graph-notebook`" -) -def test_convert_one_column_one_row_two_columns(): - result = graph_server.convert_graph_data( - { - "result1": { - "0": json.dumps(row_alex_owns_account), - }, - "result2": { - "0": json.dumps(row_alex_owns_account), - }, - } - ) - assert result == { - "error": "Query has multiple columns - graph visualization not supported" - } - - @pytest.mark.skipif( graph_visualization is None, reason="Requires `spanner-graph-notebook`" ) def test_convert_empty_dict(): result = graph_server.convert_graph_data({}) assert result == { - "error": "query result with no columns is not supported for graph visualization" + "response": { + "nodes": [], + "edges": [], + "schema": None, + "query_result": {}, + } } @@ -411,7 +413,6 @@ def test_post_query(self): self.assertEqual( response_data["query_result"], {"result": [row_alex_owns_account]} ) - self.assertEqual(response_data["rows"], [[row_alex_owns_account]]) self.assertIsNone(response_data["schema"])