Skip to content

Commit 62cab85

Browse files
[DOC] Enhance Docstrings of Flows Core Public Functions
1 parent 618b9b4 commit 62cab85

File tree

1 file changed

+160
-77
lines changed

1 file changed

+160
-77
lines changed

openml/flows/functions.py

Lines changed: 160 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -71,39 +71,59 @@ def _get_cached_flow(fid: int) -> OpenMLFlow:
7171

7272
@openml.utils.thread_safe_if_oslo_installed
7373
def get_flow(flow_id: int, reinstantiate: bool = False, strict_version: bool = True) -> OpenMLFlow: # noqa: FBT001, FBT002
74-
"""Download the OpenML flow for a given flow ID.
74+
"""Fetch an OpenMLFlow by its server-assigned ID.
75+
76+
Queries the OpenML REST API for the flow metadata and returns an
77+
:class:`OpenMLFlow` instance. If the flow is already cached locally,
78+
the cached copy is returned. Optionally the flow can be re-instantiated
79+
into a concrete model instance using the registered extension.
7580
7681
Parameters
7782
----------
7883
flow_id : int
7984
The OpenML flow id.
80-
81-
reinstantiate: bool, optional, default=False
82-
Whether to reinstantiate the flow to a model instance.
83-
84-
strict_version : bool, optional, default=True
85-
Whether to fail if version requirements are not fulfilled.
85+
reinstantiate : bool, optional (default=False)
86+
If True, convert the flow description into a concrete model instance
87+
using the flow's extension (e.g., sklearn). If conversion fails and
88+
``strict_version`` is True, an exception will be raised.
89+
strict_version : bool, optional (default=True)
90+
When ``reinstantiate`` is True, whether to enforce exact version
91+
requirements for the extension/model. If False, a fallback flow may
92+
be returned when versions differ.
8693
8794
Returns
8895
-------
89-
flow : OpenMLFlow
90-
the flow
96+
OpenMLFlow
97+
The flow object with metadata; ``model`` may be populated when
98+
``reinstantiate=True``.
99+
100+
Raises
101+
------
102+
OpenMLCacheException
103+
When cached flow files are corrupted or cannot be read.
104+
OpenMLServerException
105+
When the REST API call fails.
106+
107+
Side Effects
108+
------------
109+
- Writes to ``openml.config.cache_directory/flows/{flow_id}/flow.xml``
110+
when the flow is downloaded from the server.
111+
112+
Preconditions
113+
-------------
114+
- Network access to the OpenML server is required unless the flow is cached.
115+
- For private flows, ``openml.config.apikey`` must be set.
116+
117+
Notes
118+
-----
119+
Results are cached to speed up subsequent calls. When ``reinstantiate`` is
120+
True and version mismatches occur, a new flow may be returned to reflect
121+
the converted model (only when ``strict_version`` is False).
91122
92123
Examples
93124
--------
94125
>>> import openml
95-
Download the flow with flow_id = 5
96-
>>> flow_id = 5
97-
>>> flow = openml.flows.get_flow()
98-
>>> print(flow)
99-
OpenML Flow
100-
===========
101-
Flow ID.........: 5 (version 1)
102-
Flow URL........: https://www.openml.org/f/5
103-
Flow Name.......: openml.evaluation.average_cost
104-
Flow Description: An implementation of the evaluation measure "average_cost"
105-
Upload Date.....: 2014-01-16 14:12:56
106-
Dependencies....: Build on top of Weka API (Jar version 3.?.?)
126+
>>> flow = openml.flows.get_flow(5) # doctest: +SKIP
107127
"""
108128
flow_id = int(flow_id)
109129
flow = _get_flow_description(flow_id)
@@ -154,39 +174,47 @@ def list_flows(
154174
tag: str | None = None,
155175
uploader: str | None = None,
156176
) -> pd.DataFrame:
157-
"""
158-
Return a list of all flows which are on OpenML.
159-
(Supports large amount of results)
177+
"""List flows available on the OpenML server.
178+
179+
This function supports paging and filtering and returns a pandas
180+
DataFrame with one row per flow and columns for id, name, version,
181+
external_version, full_name and uploader.
160182
161183
Parameters
162184
----------
163185
offset : int, optional
164-
the number of flows to skip, starting from the first
186+
Number of flows to skip, starting from the first (for paging).
165187
size : int, optional
166-
the maximum number of flows to return
188+
Maximum number of flows to return.
167189
tag : str, optional
168-
the tag to include
169-
kwargs: dict, optional
170-
Legal filter operators: uploader.
190+
Only return flows having this tag.
191+
uploader : str, optional
192+
Only return flows uploaded by this user.
171193
172194
Returns
173195
-------
174-
flows : dataframe
175-
Each row maps to a dataset
176-
Each column contains the following information:
177-
- flow id
178-
- full name
179-
- name
180-
- version
181-
- external version
182-
- uploader
196+
pandas.DataFrame
197+
Rows correspond to flows. Columns include ``id``, ``full_name``,
198+
``name``, ``version``, ``external_version``, and ``uploader``.
199+
200+
Raises
201+
------
202+
OpenMLServerException
203+
When the API call fails.
204+
205+
Side Effects
206+
------------
207+
- None: results are fetched and returned; no local state is modified.
208+
209+
Preconditions
210+
-------------
211+
- Network access is required to list flows unless cached mechanisms are
212+
used by the underlying API helper.
183213
184214
Examples
185215
--------
186216
>>> import openml
187-
>>> flows = openml.flows.list_flows()
188-
A df of all flows which are on openML is returned
189-
[123519 rows x 6 columns]
217+
>>> flows = openml.flows.list_flows(size=100) # doctest: +SKIP
190218
"""
191219
listing_call = partial(_list_flows, tag=tag, uploader=uploader)
192220
batches = openml.utils._list_all(listing_call, offset=offset, limit=size)
@@ -229,33 +257,35 @@ def _list_flows(limit: int, offset: int, **kwargs: Any) -> pd.DataFrame:
229257

230258

231259
def flow_exists(name: str, external_version: str) -> int | bool:
232-
"""Retrieves the flow id.
260+
"""Check whether a flow (name + external_version) exists on the server.
233261
234-
A flow is uniquely identified by name + external_version.
262+
The OpenML server defines uniqueness of flows by the pair
263+
``(name, external_version)``. This helper queries the server and
264+
returns the corresponding flow id when present.
235265
236266
Parameters
237267
----------
238-
name : string
239-
Name of the flow
240-
external_version : string
268+
name : str
269+
Flow name (e.g., ``sklearn.tree._classes.DecisionTreeClassifier(1)``).
270+
external_version : str
241271
Version information associated with flow.
242272
243273
Returns
244274
-------
245-
flow_exist : int or bool
246-
flow id iff exists, False otherwise
275+
int or bool
276+
The flow id if the flow exists on the server, otherwise ``False``.
277+
278+
Raises
279+
------
280+
ValueError
281+
If ``name`` or ``external_version`` are empty or not strings.
282+
OpenMLServerException
283+
When the API request fails.
247284
248285
Examples
249286
--------
250287
>>> import openml
251-
>>> flow = openml.flows.flow_exists(name = "openml.evaluation.EuclideanDistance", \
252-
>>> external_version = "1")
253-
>>> print(flow)
254-
255-
256-
Notes
257-
-----
258-
see https://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
288+
>>> openml.flows.flow_exists("weka.JRip", "Weka_3.9.0_10153") # doctest: +SKIP
259289
"""
260290
if not (isinstance(name, str) and len(name) > 0):
261291
raise ValueError("Argument 'name' should be a non-empty string")
@@ -278,35 +308,58 @@ def get_flow_id(
278308
name: str | None = None,
279309
exact_version: bool = True, # noqa: FBT001, FBT002
280310
) -> int | bool | list[int]:
281-
"""Retrieves the flow id for a model or a flow name.
311+
"""Retrieve flow id(s) for a model instance or a flow name.
282312
283-
Provide either a model or a name to this function. Depending on the input, it does
313+
Provide either a concrete ``model`` (which will be converted to a flow by
314+
the appropriate extension) or a flow ``name``. Behavior depends on
315+
``exact_version``:
284316
285-
* ``model`` and ``exact_version == True``: This helper function first queries for the necessary
286-
extension. Second, it uses that extension to convert the model into a flow. Third, it
287-
executes ``flow_exists`` to potentially obtain the flow id the flow is published to the
288-
server.
289-
* ``model`` and ``exact_version == False``: This helper function first queries for the
290-
necessary extension. Second, it uses that extension to convert the model into a flow. Third
291-
it calls ``list_flows`` and filters the returned values based on the flow name.
292-
* ``name``: Ignores ``exact_version`` and calls ``list_flows``, then filters the returned
293-
values based on the flow name.
317+
- ``model`` + ``exact_version=True``: convert ``model`` to a flow and call
318+
:func:`flow_exists` to get a single flow id (or False).
319+
- ``model`` + ``exact_version=False``: convert ``model`` to a flow and
320+
return all server flow ids with the same flow name.
321+
- ``name``: ignore ``exact_version`` and return all server flow ids that
322+
match ``name``.
294323
295324
Parameters
296325
----------
297-
model : object
298-
Any model. Must provide either ``model`` or ``name``.
299-
name : str
300-
Name of the flow. Must provide either ``model`` or ``name``.
301-
exact_version : bool
302-
Whether to return the flow id of the exact version or all flow ids where the name
303-
of the flow matches. This is only taken into account for a model where a version number
304-
is available (requires ``model`` to be set).
326+
model : object, optional
327+
A model instance that can be handled by a registered extension. Either
328+
``model`` or ``name`` must be provided.
329+
name : str, optional
330+
Flow name to query for. Either ``model`` or ``name`` must be provided.
331+
exact_version : bool, optional (default=True)
332+
When True and ``model`` is provided, only return the id for the exact
333+
external version. When False, return a list of matching ids.
305334
306335
Returns
307336
-------
308-
int or bool, List
309-
flow id iff exists, ``False`` otherwise, List if ``exact_version is False``
337+
int or bool or list[int]
338+
If ``exact_version`` is True: the flow id if found, otherwise ``False``.
339+
If ``exact_version`` is False: a list of matching flow ids (may be empty).
340+
341+
Raises
342+
------
343+
ValueError
344+
If neither ``model`` nor ``name`` is provided, or if both are provided.
345+
OpenMLServerException
346+
If underlying API calls fail.
347+
348+
Side Effects
349+
------------
350+
- May call server APIs (``flow/exists``, ``flow/list``) and therefore
351+
depends on network access and API keys for private flows.
352+
353+
Examples
354+
--------
355+
>>> import openml
356+
>>> # Lookup by flow name
357+
>>> openml.flows.get_flow_id(name="weka.JRip") # doctest: +SKIP
358+
>>> # Lookup by model instance (requires a registered extension)
359+
>>> import sklearn
360+
>>> import openml_sklearn
361+
>>> clf = sklearn.tree.DecisionTreeClassifier()
362+
>>> openml.flows.get_flow_id(model=clf) # doctest: +SKIP
310363
"""
311364
if model is not None and name is not None:
312365
raise ValueError("Must provide either argument `model` or argument `name`, but not both.")
@@ -422,6 +475,21 @@ def assert_flows_equal( # noqa: C901, PLR0912, PLR0913, PLR0915
422475
423476
check_description : bool
424477
Whether to ignore matching of flow descriptions.
478+
479+
Raises
480+
------
481+
TypeError
482+
When either argument is not an :class:`OpenMLFlow`.
483+
ValueError
484+
When a relevant mismatch is found between the two flows.
485+
486+
Examples
487+
--------
488+
>>> import openml
489+
>>> f1 = openml.flows.get_flow(5) # doctest: +SKIP
490+
>>> f2 = openml.flows.get_flow(5) # doctest: +SKIP
491+
>>> openml.flows.assert_flows_equal(f1, f2) # doctest: +SKIP
492+
>>> # If flows differ, a ValueError is raised
425493
"""
426494
if not isinstance(flow1, OpenMLFlow):
427495
raise TypeError(f"Argument 1 must be of type OpenMLFlow, but is {type(flow1)}")
@@ -581,5 +649,20 @@ def delete_flow(flow_id: int) -> bool:
581649
-------
582650
bool
583651
True if the deletion was successful. False otherwise.
652+
653+
Raises
654+
------
655+
OpenMLServerException
656+
If the server-side deletion fails due to permissions or other errors.
657+
658+
Side Effects
659+
------------
660+
- Removes the flow from the OpenML server (if permitted).
661+
662+
Examples
663+
--------
664+
>>> import openml
665+
>>> # Deletes flow 23 if you are the uploader and it's not linked to runs
666+
>>> openml.flows.delete_flow(23) # doctest: +SKIP
584667
"""
585668
return openml.utils._delete_entity("flow", flow_id)

0 commit comments

Comments
 (0)