|
33 | 33 | "name": "stderr", |
34 | 34 | "output_type": "stream", |
35 | 35 | "text": [ |
36 | | - "/usr/local/google/home/chelsealin/src/bigframes3/bigframes/_config/experiment_options.py:33: UserWarning: Semantic operators are still under experiments, and are subject to change in the future.\n", |
| 36 | + "/usr/local/google/home/chelsealin/src/bigframes/bigframes/_config/experiment_options.py:33: UserWarning: Semantic operators are still under experiments, and are subject to change in the future.\n", |
37 | 37 | " warnings.warn(\n" |
38 | 38 | ] |
39 | 39 | } |
|
51 | 51 | }, |
52 | 52 | { |
53 | 53 | "cell_type": "code", |
54 | | - "execution_count": 3, |
| 54 | + "execution_count": 4, |
55 | 55 | "metadata": {}, |
56 | 56 | "outputs": [ |
57 | 57 | { |
58 | | - "name": "stderr", |
59 | | - "output_type": "stream", |
60 | | - "text": [ |
61 | | - "/usr/local/google/home/chelsealin/src/bigframes3/bigframes/pandas/__init__.py:559: DefaultLocationWarning: No explicit location is set, so using location US for the session.\n", |
62 | | - " return global_session.get_global_session()\n" |
63 | | - ] |
| 58 | + "data": { |
| 59 | + "text/html": [ |
| 60 | + "Query job 13e4b10e-70cf-4b93-8c59-5f6f5fb10aeb is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:13e4b10e-70cf-4b93-8c59-5f6f5fb10aeb&page=queryresults\">Open Job</a>" |
| 61 | + ], |
| 62 | + "text/plain": [ |
| 63 | + "<IPython.core.display.HTML object>" |
| 64 | + ] |
| 65 | + }, |
| 66 | + "metadata": {}, |
| 67 | + "output_type": "display_data" |
64 | 68 | }, |
65 | 69 | { |
66 | 70 | "data": { |
67 | 71 | "text/html": [ |
68 | | - "Query job aef2dd7b-bdad-4dda-91be-867e8dac2613 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:aef2dd7b-bdad-4dda-91be-867e8dac2613&page=queryresults\">Open Job</a>" |
| 72 | + "Query job 559dd42c-573d-4b00-8fe9-b7061afdd672 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:559dd42c-573d-4b00-8fe9-b7061afdd672&page=queryresults\">Open Job</a>" |
69 | 73 | ], |
70 | 74 | "text/plain": [ |
71 | 75 | "<IPython.core.display.HTML object>" |
|
77 | 81 | ], |
78 | 82 | "source": [ |
79 | 83 | "import bigframes.ml.llm as llm\n", |
80 | | - "gemini_model = llm.GeminiTextGenerator(model_name=llm._GEMINI_1P5_FLASH_001_ENDPOINT)" |
| 84 | + "gemini_model = llm.GeminiTextGenerator(model_name=llm._GEMINI_1P5_FLASH_001_ENDPOINT)\n", |
| 85 | + "text_embedding_model = llm.TextEmbeddingGenerator(model_name=\"text-embedding-004\")" |
81 | 86 | ] |
82 | 87 | }, |
83 | 88 | { |
|
657 | 662 | "## Semantic Search" |
658 | 663 | ] |
659 | 664 | }, |
660 | | - { |
661 | | - "cell_type": "code", |
662 | | - "execution_count": 11, |
663 | | - "metadata": {}, |
664 | | - "outputs": [ |
665 | | - { |
666 | | - "data": { |
667 | | - "text/html": [ |
668 | | - "Query job 48aafee2-4948-4677-ab02-a94a71b9f6e2 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:48aafee2-4948-4677-ab02-a94a71b9f6e2&page=queryresults\">Open Job</a>" |
669 | | - ], |
670 | | - "text/plain": [ |
671 | | - "<IPython.core.display.HTML object>" |
672 | | - ] |
673 | | - }, |
674 | | - "metadata": {}, |
675 | | - "output_type": "display_data" |
676 | | - } |
677 | | - ], |
678 | | - "source": [ |
679 | | - "text_embedding_model = llm.TextEmbeddingGenerator(model_name=\"text-embedding-004\")" |
680 | | - ] |
681 | | - }, |
682 | 665 | { |
683 | 666 | "cell_type": "code", |
684 | 667 | "execution_count": 12, |
|
1156 | 1139 | "agg_df = df.semantics.agg(\"Find the shared first name of actors in {Movies}. One word answer.\", model=gemini_model)\n", |
1157 | 1140 | "agg_df" |
1158 | 1141 | ] |
| 1142 | + }, |
| 1143 | + { |
| 1144 | + "cell_type": "markdown", |
| 1145 | + "metadata": {}, |
| 1146 | + "source": [ |
| 1147 | + "## Semantic Cluster" |
| 1148 | + ] |
| 1149 | + }, |
| 1150 | + { |
| 1151 | + "cell_type": "code", |
| 1152 | + "execution_count": 5, |
| 1153 | + "metadata": {}, |
| 1154 | + "outputs": [ |
| 1155 | + { |
| 1156 | + "data": { |
| 1157 | + "text/html": [ |
| 1158 | + "Query job 92ce82b9-c521-42af-a2b7-6114b27a9ce4 is DONE. 0 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:92ce82b9-c521-42af-a2b7-6114b27a9ce4&page=queryresults\">Open Job</a>" |
| 1159 | + ], |
| 1160 | + "text/plain": [ |
| 1161 | + "<IPython.core.display.HTML object>" |
| 1162 | + ] |
| 1163 | + }, |
| 1164 | + "metadata": {}, |
| 1165 | + "output_type": "display_data" |
| 1166 | + }, |
| 1167 | + { |
| 1168 | + "name": "stderr", |
| 1169 | + "output_type": "stream", |
| 1170 | + "text": [ |
| 1171 | + "/usr/local/google/home/chelsealin/src/bigframes/bigframes/core/__init__.py:112: PreviewWarning: Interpreting JSON column(s) as StringDtype. This behavior may change in future versions.\n", |
| 1172 | + " warnings.warn(\n" |
| 1173 | + ] |
| 1174 | + }, |
| 1175 | + { |
| 1176 | + "data": { |
| 1177 | + "text/html": [ |
| 1178 | + "Query job 8c4c7391-2889-4cf1-bbfa-5cbf6b144db5 is DONE. 10 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:8c4c7391-2889-4cf1-bbfa-5cbf6b144db5&page=queryresults\">Open Job</a>" |
| 1179 | + ], |
| 1180 | + "text/plain": [ |
| 1181 | + "<IPython.core.display.HTML object>" |
| 1182 | + ] |
| 1183 | + }, |
| 1184 | + "metadata": {}, |
| 1185 | + "output_type": "display_data" |
| 1186 | + }, |
| 1187 | + { |
| 1188 | + "data": { |
| 1189 | + "text/html": [ |
| 1190 | + "Query job 19ae7cc6-3d61-4c69-9148-1956fafb577a is DONE. 30.8 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:19ae7cc6-3d61-4c69-9148-1956fafb577a&page=queryresults\">Open Job</a>" |
| 1191 | + ], |
| 1192 | + "text/plain": [ |
| 1193 | + "<IPython.core.display.HTML object>" |
| 1194 | + ] |
| 1195 | + }, |
| 1196 | + "metadata": {}, |
| 1197 | + "output_type": "display_data" |
| 1198 | + }, |
| 1199 | + { |
| 1200 | + "data": { |
| 1201 | + "text/html": [ |
| 1202 | + "Query job 7c2b62df-3bed-4469-9ffc-131843efe25e is DONE. 30.7 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:7c2b62df-3bed-4469-9ffc-131843efe25e&page=queryresults\">Open Job</a>" |
| 1203 | + ], |
| 1204 | + "text/plain": [ |
| 1205 | + "<IPython.core.display.HTML object>" |
| 1206 | + ] |
| 1207 | + }, |
| 1208 | + "metadata": {}, |
| 1209 | + "output_type": "display_data" |
| 1210 | + }, |
| 1211 | + { |
| 1212 | + "data": { |
| 1213 | + "text/html": [ |
| 1214 | + "Query job 74155e34-d8ca-4fba-8b93-33b1b325a5f1 is DONE. 138.9 kB processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:74155e34-d8ca-4fba-8b93-33b1b325a5f1&page=queryresults\">Open Job</a>" |
| 1215 | + ], |
| 1216 | + "text/plain": [ |
| 1217 | + "<IPython.core.display.HTML object>" |
| 1218 | + ] |
| 1219 | + }, |
| 1220 | + "metadata": {}, |
| 1221 | + "output_type": "display_data" |
| 1222 | + }, |
| 1223 | + { |
| 1224 | + "data": { |
| 1225 | + "text/html": [ |
| 1226 | + "Query job d9151043-a9c3-4388-8268-ef41162012b7 is DONE. 80 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d9151043-a9c3-4388-8268-ef41162012b7&page=queryresults\">Open Job</a>" |
| 1227 | + ], |
| 1228 | + "text/plain": [ |
| 1229 | + "<IPython.core.display.HTML object>" |
| 1230 | + ] |
| 1231 | + }, |
| 1232 | + "metadata": {}, |
| 1233 | + "output_type": "display_data" |
| 1234 | + }, |
| 1235 | + { |
| 1236 | + "data": { |
| 1237 | + "text/html": [ |
| 1238 | + "Query job d2c4ad9a-c637-490e-a2cf-37d7f5a34024 is DONE. 170 Bytes processed. <a target=\"_blank\" href=\"https://console.cloud.google.com/bigquery?project=bigframes-dev&j=bq:US:d2c4ad9a-c637-490e-a2cf-37d7f5a34024&page=queryresults\">Open Job</a>" |
| 1239 | + ], |
| 1240 | + "text/plain": [ |
| 1241 | + "<IPython.core.display.HTML object>" |
| 1242 | + ] |
| 1243 | + }, |
| 1244 | + "metadata": {}, |
| 1245 | + "output_type": "display_data" |
| 1246 | + }, |
| 1247 | + { |
| 1248 | + "data": { |
| 1249 | + "text/html": [ |
| 1250 | + "<div>\n", |
| 1251 | + "<style scoped>\n", |
| 1252 | + " .dataframe tbody tr th:only-of-type {\n", |
| 1253 | + " vertical-align: middle;\n", |
| 1254 | + " }\n", |
| 1255 | + "\n", |
| 1256 | + " .dataframe tbody tr th {\n", |
| 1257 | + " vertical-align: top;\n", |
| 1258 | + " }\n", |
| 1259 | + "\n", |
| 1260 | + " .dataframe thead th {\n", |
| 1261 | + " text-align: right;\n", |
| 1262 | + " }\n", |
| 1263 | + "</style>\n", |
| 1264 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 1265 | + " <thead>\n", |
| 1266 | + " <tr style=\"text-align: right;\">\n", |
| 1267 | + " <th></th>\n", |
| 1268 | + " <th>Product</th>\n", |
| 1269 | + " <th>Cluster ID</th>\n", |
| 1270 | + " </tr>\n", |
| 1271 | + " </thead>\n", |
| 1272 | + " <tbody>\n", |
| 1273 | + " <tr>\n", |
| 1274 | + " <th>0</th>\n", |
| 1275 | + " <td>Smartphone</td>\n", |
| 1276 | + " <td>3</td>\n", |
| 1277 | + " </tr>\n", |
| 1278 | + " <tr>\n", |
| 1279 | + " <th>1</th>\n", |
| 1280 | + " <td>Laptop</td>\n", |
| 1281 | + " <td>3</td>\n", |
| 1282 | + " </tr>\n", |
| 1283 | + " <tr>\n", |
| 1284 | + " <th>2</th>\n", |
| 1285 | + " <td>Coffee Maker</td>\n", |
| 1286 | + " <td>1</td>\n", |
| 1287 | + " </tr>\n", |
| 1288 | + " <tr>\n", |
| 1289 | + " <th>3</th>\n", |
| 1290 | + " <td>T-shirt</td>\n", |
| 1291 | + " <td>2</td>\n", |
| 1292 | + " </tr>\n", |
| 1293 | + " <tr>\n", |
| 1294 | + " <th>4</th>\n", |
| 1295 | + " <td>Jeans</td>\n", |
| 1296 | + " <td>2</td>\n", |
| 1297 | + " </tr>\n", |
| 1298 | + " </tbody>\n", |
| 1299 | + "</table>\n", |
| 1300 | + "<p>5 rows × 2 columns</p>\n", |
| 1301 | + "</div>[5 rows x 2 columns in total]" |
| 1302 | + ], |
| 1303 | + "text/plain": [ |
| 1304 | + " Product Cluster ID\n", |
| 1305 | + "0 Smartphone 3\n", |
| 1306 | + "1 Laptop 3\n", |
| 1307 | + "2 Coffee Maker 1\n", |
| 1308 | + "3 T-shirt 2\n", |
| 1309 | + "4 Jeans 2\n", |
| 1310 | + "\n", |
| 1311 | + "[5 rows x 2 columns]" |
| 1312 | + ] |
| 1313 | + }, |
| 1314 | + "execution_count": 5, |
| 1315 | + "metadata": {}, |
| 1316 | + "output_type": "execute_result" |
| 1317 | + } |
| 1318 | + ], |
| 1319 | + "source": [ |
| 1320 | + "df = bpd.DataFrame({'Product': ['Smartphone', 'Laptop', 'Coffee Maker', 'T-shirt', 'Jeans']})\n", |
| 1321 | + "\n", |
| 1322 | + "df.semantics.cluster_by(column='Product', output_column='Cluster ID', model=text_embedding_model, n=3)" |
| 1323 | + ] |
1159 | 1324 | } |
1160 | 1325 | ], |
1161 | 1326 | "metadata": { |
|
1174 | 1339 | "name": "python", |
1175 | 1340 | "nbconvert_exporter": "python", |
1176 | 1341 | "pygments_lexer": "ipython3", |
1177 | | - "version": "3.11.9" |
| 1342 | + "version": "3.12.1" |
1178 | 1343 | } |
1179 | 1344 | }, |
1180 | 1345 | "nbformat": 4, |
|
0 commit comments