Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions bigquery_magics/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,9 +657,10 @@ def _is_valid_json(s: str):

def _supports_graph_widget(query_result: pandas.DataFrame):
num_rows, num_columns = query_result.shape
if num_columns != 1:
return False
return query_result[query_result.columns[0]].apply(_is_valid_json).all()
for column in query_result.columns:
if not query_result[column].apply(_is_valid_json).all():
return False
Comment on lines +661 to +662
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting, so the widget only works if all columns are valid JSON? This will return False if there's even one column with one cell that returns false for _is_valid_json.

Please add a docstring explaining more what the expectation is for data for the widget to be supported.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If some columns are not json, the widget actually does work - it simply ignores the non-json columns for purposes of populating the widget, but shows them in the widget's tabular view. I have a separate work in progress to handle this, but it's not included in this branch.

Is it ok to check in what we have, and do a follow-up PR to handle the "some columns not json" scenario?

return True


def _make_bq_query(
Expand Down
67 changes: 31 additions & 36 deletions bigquery_magics/graph_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,45 +56,37 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]):
)

try:
column_name = None
column_value = None
fields: List[StructType.Field] = []
data = {}
rows = []
for key, value in query_results.items():
if column_name is None:
if not isinstance(key, str):
raise ValueError(f"Expected outer key to be str, got {type(key)}")
if not isinstance(value, dict):
column_name = None
column_value = None
if not isinstance(key, str):
raise ValueError(f"Expected outer key to be str, got {type(key)}")
if not isinstance(value, dict):
raise ValueError(f"Expected outer value to be dict, got {type(value)}")
column_name = key
column_value = value

fields.append(
StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))
)
data[column_name] = []
for value_key, value_value in column_value.items():
if not isinstance(value_key, str):
raise ValueError(
f"Expected outer value to be dict, got {type(value)}"
f"Expected inner key to be str, got {type(value_key)}"
)
column_name = key
column_value = value
else:
# TODO: Implement multi-column support.
raise ValueError(
"Query has multiple columns - graph visualization not supported"
)
if column_name is None or column_value is None:
raise ValueError(
"query result with no columns is not supported for graph visualization"
)
if not isinstance(value_value, str):
raise ValueError(
f"Expected inner value to be str, got {type(value_value)}"
)
row_json = json.loads(value_value)

fields: List[StructType.Field] = [
StructType.Field(name=column_name, type=Type(code=TypeCode.JSON))
]
data = {column_name: []}
rows = []
for value_key, value_value in column_value.items():
if not isinstance(value_key, str):
raise ValueError(f"Expected inner key to be str, got {type(value_key)}")
if not isinstance(value_value, str):
raise ValueError(
f"Expected inner value to be str, got {type(value_value)}"
)
row_json = json.loads(value_value)

if row_json is not None:
data[column_name].append(row_json)
rows.append([row_json])
if row_json is not None:
data[column_name].append(row_json)
rows.append([row_json])

d, ignored_columns = columns_to_native_numpy(data, fields)

Expand All @@ -112,10 +104,13 @@ def convert_graph_data(query_results: Dict[str, Dict[str, str]]):

return {
"response": {
# These fields populate the graph result view.
"nodes": nodes,
"edges": edges,
# This populates the visualizer's schema view, but not yet implemented on the
# BigQuery side.
"schema": None,
"rows": rows,
# This field is used to populate the visualizer's tabular view.
"query_result": data,
}
}
Expand Down
46 changes: 21 additions & 25 deletions tests/unit/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

import bigquery_magics
import bigquery_magics.bigquery as magics
import bigquery_magics.graph_server as graph_server

try:
import google.cloud.bigquery_storage as bigquery_storage
Expand Down Expand Up @@ -677,10 +678,12 @@ def test_bigquery_graph_json_json_result(monkeypatch):
bqstorage_client_patch
), display_patch as display_mock:
run_query_mock.return_value = query_job_mock
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
try:
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
finally:
graph_server.graph_server.stop_server()

# As we only support visualization with single-column queries, the visualizer should not be launched.
display_mock.assert_not_called()
display_mock.assert_called()

assert bqstorage_mock.called # BQ storage client was used
assert isinstance(return_value, pandas.DataFrame)
Expand Down Expand Up @@ -729,9 +732,6 @@ def test_bigquery_graph_json_result(monkeypatch):
]
result = pandas.DataFrame(graph_json_rows, columns=["graph_json"])
run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True)
graph_server_init_patch = mock.patch(
"bigquery_magics.graph_server.GraphServer.init", autospec=True
)
display_patch = mock.patch("IPython.display.display", autospec=True)
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
Expand All @@ -740,10 +740,7 @@ def test_bigquery_graph_json_result(monkeypatch):

with run_query_patch as run_query_mock, (
bqstorage_client_patch
), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock:
graph_server_init_mock.return_value = mock.Mock()
graph_server_init_mock.return_value.is_alive = mock.Mock()
graph_server_init_mock.return_value.is_alive.return_value = True
), display_patch as display_mock:
run_query_mock.return_value = query_job_mock

return_value = ip.run_cell_magic("bigquery", "--graph", sql)
Expand All @@ -770,7 +767,10 @@ def test_bigquery_graph_json_result(monkeypatch):
) # identifier in 3rd row of query result

# Make sure we can run a second graph query, after the graph server is already running.
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
try:
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
finally:
graph_server.graph_server.stop_server()

# Sanity check that the HTML content looks like graph visualization. Minimal check
# to allow Spanner to change its implementation without breaking this test.
Expand Down Expand Up @@ -841,9 +841,6 @@ def test_bigquery_graph_colab(monkeypatch):
]
result = pandas.DataFrame(graph_json_rows, columns=["graph_json"])
run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True)
graph_server_init_patch = mock.patch(
"bigquery_magics.graph_server.GraphServer.init", autospec=True
)
display_patch = mock.patch("IPython.display.display", autospec=True)
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
Expand All @@ -852,10 +849,12 @@ def test_bigquery_graph_colab(monkeypatch):

with run_query_patch as run_query_mock, (
bqstorage_client_patch
), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock:
), display_patch as display_mock:
run_query_mock.return_value = query_job_mock
graph_server_init_mock.return_value = None
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
try:
return_value = ip.run_cell_magic("bigquery", "--graph", sql)
finally:
graph_server.graph_server.stop_server()

assert len(display_mock.call_args_list) == 1
assert len(display_mock.call_args_list[0]) == 2
Expand All @@ -880,7 +879,6 @@ def test_bigquery_graph_colab(monkeypatch):

# Make sure we actually used colab path, not GraphServer path.
assert sys.modules["google.colab"].output.register_callback.called
assert not graph_server_init_mock.called

assert bqstorage_mock.called # BQ storage client was used
assert isinstance(return_value, pandas.DataFrame)
Expand All @@ -902,7 +900,6 @@ def test_colab_callback():
"edges": [],
"nodes": [],
"query_result": {"result": []},
"rows": [],
"schema": None,
}
}
Expand Down Expand Up @@ -937,9 +934,6 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch):
sql = "SELECT graph_json FROM t"
result = pandas.DataFrame([], columns=["graph_json"])
run_query_patch = mock.patch("bigquery_magics.bigquery._run_query", autospec=True)
graph_server_init_patch = mock.patch(
"bigquery_magics.graph_server.GraphServer.init", autospec=True
)
display_patch = mock.patch("IPython.display.display", autospec=True)
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
Expand All @@ -948,11 +942,13 @@ def test_bigquery_graph_missing_spanner_deps(monkeypatch):

with run_query_patch as run_query_mock, (
bqstorage_client_patch
), graph_server_init_patch as graph_server_init_mock, display_patch as display_mock:
), display_patch as display_mock:
run_query_mock.return_value = query_job_mock
graph_server_init_mock.return_value = None
with pytest.raises(ImportError):
ip.run_cell_magic("bigquery", "--graph", sql)
try:
ip.run_cell_magic("bigquery", "--graph", sql)
finally:
graph_server.graph_server.stop_server()
display_mock.assert_not_called()


Expand Down
67 changes: 34 additions & 33 deletions tests/unit/test_graph_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,6 @@ def test_convert_one_column_no_rows():
"edges": [],
"nodes": [],
"query_result": {"result": []},
"rows": [],
"schema": None,
}
}
Expand All @@ -164,7 +163,6 @@ def test_convert_one_column_one_row_one_column():
_validate_nodes_and_edges(result)

assert result["response"]["query_result"] == {"result": [row_alex_owns_account]}
assert result["response"]["rows"] == [[row_alex_owns_account]]
assert result["response"]["schema"] is None


Expand All @@ -185,11 +183,6 @@ def test_convert_one_column_one_row_one_column_null_json():
"edges": [],
"nodes": [],
"query_result": {"result": []},
"rows": [
[
None,
]
],
"schema": None,
},
}
Expand Down Expand Up @@ -218,10 +211,34 @@ def test_convert_one_column_two_rows():
assert result["response"]["query_result"] == {
"result": [row_alex_owns_account, row_lee_owns_account]
}
assert result["response"]["rows"] == [
[row_alex_owns_account],
[row_lee_owns_account],
]
assert result["response"]["schema"] is None


@pytest.mark.skipif(
graph_visualization is None, reason="Requires `spanner-graph-notebook`"
)
def test_convert_one_row_two_columns():
result = graph_server.convert_graph_data(
{
"col1": {
"0": json.dumps(row_alex_owns_account),
},
"col2": {
"0": json.dumps(row_lee_owns_account),
},
}
)
print(json.dumps(result))

assert len(result["response"]["nodes"]) == 4
assert len(result["response"]["edges"]) == 2

_validate_nodes_and_edges(result)

assert result["response"]["query_result"] == {
"col1": [row_alex_owns_account],
"col2": [row_lee_owns_account],
}
assert result["response"]["schema"] is None


Expand All @@ -243,7 +260,6 @@ def test_convert_nongraph_json():
assert len(result["response"]["edges"]) == 0

assert result["response"]["query_result"] == {"result": [{"foo": 1, "bar": 2}]}
assert result["response"]["rows"] == [[{"foo": 1, "bar": 2}]]
assert result["response"]["schema"] is None


Expand Down Expand Up @@ -297,32 +313,18 @@ def test_convert_inner_value_not_string():
assert result == {"error": "Expected inner value to be str, got <class 'int'>"}


@pytest.mark.skipif(
graph_visualization is None, reason="Requires `spanner-graph-notebook`"
)
def test_convert_one_column_one_row_two_columns():
result = graph_server.convert_graph_data(
{
"result1": {
"0": json.dumps(row_alex_owns_account),
},
"result2": {
"0": json.dumps(row_alex_owns_account),
},
}
)
assert result == {
"error": "Query has multiple columns - graph visualization not supported"
}


@pytest.mark.skipif(
graph_visualization is None, reason="Requires `spanner-graph-notebook`"
)
def test_convert_empty_dict():
result = graph_server.convert_graph_data({})
assert result == {
"error": "query result with no columns is not supported for graph visualization"
"response": {
"nodes": [],
"edges": [],
"schema": None,
"query_result": {},
}
}


Expand Down Expand Up @@ -411,7 +413,6 @@ def test_post_query(self):
self.assertEqual(
response_data["query_result"], {"result": [row_alex_owns_account]}
)
self.assertEqual(response_data["rows"], [[row_alex_owns_account]])
self.assertIsNone(response_data["schema"])


Expand Down