@@ -71,39 +71,59 @@ def _get_cached_flow(fid: int) -> OpenMLFlow:
7171
7272@openml .utils .thread_safe_if_oslo_installed
7373def get_flow (flow_id : int , reinstantiate : bool = False , strict_version : bool = True ) -> OpenMLFlow : # noqa: FBT001, FBT002
74- """Download the OpenML flow for a given flow ID.
74+ """Fetch an OpenMLFlow by its server-assigned ID.
75+
76+ Queries the OpenML REST API for the flow metadata and returns an
77+ :class:`OpenMLFlow` instance. If the flow is already cached locally,
78+ the cached copy is returned. Optionally the flow can be re-instantiated
79+ into a concrete model instance using the registered extension.
7580
7681 Parameters
7782 ----------
7883 flow_id : int
7984 The OpenML flow id.
80-
81- reinstantiate: bool, optional, default=False
82- Whether to reinstantiate the flow to a model instance.
83-
84- strict_version : bool, optional, default=True
85- Whether to fail if version requirements are not fulfilled.
85+ reinstantiate : bool, optional (default=False)
86+ If True, convert the flow description into a concrete model instance
87+ using the flow's extension (e.g., sklearn). If conversion fails and
88+ ``strict_version`` is True, an exception will be raised.
89+ strict_version : bool, optional (default=True)
90+ When ``reinstantiate`` is True, whether to enforce exact version
91+ requirements for the extension/model. If False, a fallback flow may
92+ be returned when versions differ.
8693
8794 Returns
8895 -------
89- flow : OpenMLFlow
90- the flow
96+ OpenMLFlow
97+ The flow object with metadata; ``model`` may be populated when
98+ ``reinstantiate=True``.
99+
100+ Raises
101+ ------
102+ OpenMLCacheException
103+ When cached flow files are corrupted or cannot be read.
104+ OpenMLServerException
105+ When the REST API call fails.
106+
107+ Side Effects
108+ ------------
109+ - Writes to ``openml.config.cache_directory/flows/{flow_id}/flow.xml``
110+ when the flow is downloaded from the server.
111+
112+ Preconditions
113+ -------------
114+ - Network access to the OpenML server is required unless the flow is cached.
115+ - For private flows, ``openml.config.apikey`` must be set.
116+
117+ Notes
118+ -----
119+ Results are cached to speed up subsequent calls. When ``reinstantiate`` is
120+ True and version mismatches occur, a new flow may be returned to reflect
121+ the converted model (only when ``strict_version`` is False).
91122
92123 Examples
93124 --------
94125 >>> import openml
95- Download the flow with flow_id = 5
96- >>> flow_id = 5
97- >>> flow = openml.flows.get_flow()
98- >>> print(flow)
99- OpenML Flow
100- ===========
101- Flow ID.........: 5 (version 1)
102- Flow URL........: https://www.openml.org/f/5
103- Flow Name.......: openml.evaluation.average_cost
104- Flow Description: An implementation of the evaluation measure "average_cost"
105- Upload Date.....: 2014-01-16 14:12:56
106- Dependencies....: Build on top of Weka API (Jar version 3.?.?)
126+ >>> flow = openml.flows.get_flow(5) # doctest: +SKIP
107127 """
108128 flow_id = int (flow_id )
109129 flow = _get_flow_description (flow_id )
@@ -154,39 +174,47 @@ def list_flows(
154174 tag : str | None = None ,
155175 uploader : str | None = None ,
156176) -> pd .DataFrame :
157- """
158- Return a list of all flows which are on OpenML.
159- (Supports large amount of results)
177+ """List flows available on the OpenML server.
178+
179+ This function supports paging and filtering and returns a pandas
180+ DataFrame with one row per flow and columns for id, name, version,
181+ external_version, full_name and uploader.
160182
161183 Parameters
162184 ----------
163185 offset : int, optional
164- the number of flows to skip, starting from the first
186+ Number of flows to skip, starting from the first (for paging).
165187 size : int, optional
166- the maximum number of flows to return
188+ Maximum number of flows to return.
167189 tag : str, optional
168- the tag to include
169- kwargs: dict , optional
170- Legal filter operators: uploader .
190+ Only return flows having this tag.
191+ uploader : str , optional
192+ Only return flows uploaded by this user .
171193
172194 Returns
173195 -------
174- flows : dataframe
175- Each row maps to a dataset
176- Each column contains the following information:
177- - flow id
178- - full name
179- - name
180- - version
181- - external version
182- - uploader
196+ pandas.DataFrame
197+ Rows correspond to flows. Columns include ``id``, ``full_name``,
198+ ``name``, ``version``, ``external_version``, and ``uploader``.
199+
200+ Raises
201+ ------
202+ OpenMLServerException
203+ When the API call fails.
204+
205+ Side Effects
206+ ------------
207+ - None: results are fetched and returned; no local state is modified.
208+
209+ Preconditions
210+ -------------
211+ - Network access is required to list flows unless cached mechanisms are
212+ used by the underlying API helper.
183213
184214 Examples
185215 --------
186216 >>> import openml
187- >>> flows = openml.flows.list_flows()
188- A df of all flows which are on openML is returned
189- [123519 rows x 6 columns]
217+ >>> flows = openml.flows.list_flows(size=100) # doctest: +SKIP
190218 """
191219 listing_call = partial (_list_flows , tag = tag , uploader = uploader )
192220 batches = openml .utils ._list_all (listing_call , offset = offset , limit = size )
@@ -229,33 +257,35 @@ def _list_flows(limit: int, offset: int, **kwargs: Any) -> pd.DataFrame:
229257
230258
231259def flow_exists (name : str , external_version : str ) -> int | bool :
232- """Retrieves the flow id .
260+ """Check whether a flow (name + external_version) exists on the server .
233261
234- A flow is uniquely identified by name + external_version.
262+ The OpenML server defines uniqueness of flows by the pair
263+ ``(name, external_version)``. This helper queries the server and
264+ returns the corresponding flow id when present.
235265
236266 Parameters
237267 ----------
238- name : string
239- Name of the flow
240- external_version : string
268+ name : str
269+ Flow name (e.g., ``sklearn.tree._classes.DecisionTreeClassifier(1)``).
270+ external_version : str
241271 Version information associated with flow.
242272
243273 Returns
244274 -------
245- flow_exist : int or bool
246- flow id iff exists, False otherwise
275+ int or bool
276+ The flow id if the flow exists on the server, otherwise ``False``.
277+
278+ Raises
279+ ------
280+ ValueError
281+ If ``name`` or ``external_version`` are empty or not strings.
282+ OpenMLServerException
283+ When the API request fails.
247284
248285 Examples
249286 --------
250287 >>> import openml
251- >>> flow = openml.flows.flow_exists(name = "openml.evaluation.EuclideanDistance", \
252- >>> external_version = "1")
253- >>> print(flow)
254-
255-
256- Notes
257- -----
258- see https://www.openml.org/api_docs/#!/flow/get_flow_exists_name_version
288+ >>> openml.flows.flow_exists("weka.JRip", "Weka_3.9.0_10153") # doctest: +SKIP
259289 """
260290 if not (isinstance (name , str ) and len (name ) > 0 ):
261291 raise ValueError ("Argument 'name' should be a non-empty string" )
@@ -278,35 +308,58 @@ def get_flow_id(
278308 name : str | None = None ,
279309 exact_version : bool = True , # noqa: FBT001, FBT002
280310) -> int | bool | list [int ]:
281- """Retrieves the flow id for a model or a flow name.
311+ """Retrieve flow id(s) for a model instance or a flow name.
282312
283- Provide either a model or a name to this function. Depending on the input, it does
313+ Provide either a concrete ``model`` (which will be converted to a flow by
314+ the appropriate extension) or a flow ``name``. Behavior depends on
315+ ``exact_version``:
284316
285- * ``model`` and ``exact_version == True``: This helper function first queries for the necessary
286- extension. Second, it uses that extension to convert the model into a flow. Third, it
287- executes ``flow_exists`` to potentially obtain the flow id the flow is published to the
288- server.
289- * ``model`` and ``exact_version == False``: This helper function first queries for the
290- necessary extension. Second, it uses that extension to convert the model into a flow. Third
291- it calls ``list_flows`` and filters the returned values based on the flow name.
292- * ``name``: Ignores ``exact_version`` and calls ``list_flows``, then filters the returned
293- values based on the flow name.
317+ - ``model`` + ``exact_version=True``: convert ``model`` to a flow and call
318+ :func:`flow_exists` to get a single flow id (or False).
319+ - ``model`` + ``exact_version=False``: convert ``model`` to a flow and
320+ return all server flow ids with the same flow name.
321+ - ``name``: ignore ``exact_version`` and return all server flow ids that
322+ match ``name``.
294323
295324 Parameters
296325 ----------
297- model : object
298- Any model. Must provide either `` model`` or ``name``.
299- name : str
300- Name of the flow. Must provide either ``model`` or ``name``.
301- exact_version : bool
302- Whether to return the flow id of the exact version or all flow ids where the name
303- of the flow matches. This is only taken into account for a model where a version number
304- is available (requires ``model`` to be set) .
326+ model : object, optional
327+ A model instance that can be handled by a registered extension. Either
328+ ``model`` or `` name`` must be provided.
329+ name : str, optional
330+ Flow name to query for. Either ``model`` or ``name`` must be provided.
331+ exact_version : bool, optional (default=True)
332+ When True and ``model`` is provided, only return the id for the exact
333+ external version. When False, return a list of matching ids .
305334
306335 Returns
307336 -------
308- int or bool, List
309- flow id iff exists, ``False`` otherwise, List if ``exact_version is False``
337+ int or bool or list[int]
338+ If ``exact_version`` is True: the flow id if found, otherwise ``False``.
339+ If ``exact_version`` is False: a list of matching flow ids (may be empty).
340+
341+ Raises
342+ ------
343+ ValueError
344+ If neither ``model`` nor ``name`` is provided, or if both are provided.
345+ OpenMLServerException
346+ If underlying API calls fail.
347+
348+ Side Effects
349+ ------------
350+ - May call server APIs (``flow/exists``, ``flow/list``) and therefore
351+ depends on network access and API keys for private flows.
352+
353+ Examples
354+ --------
355+ >>> import openml
356+ >>> # Lookup by flow name
357+ >>> openml.flows.get_flow_id(name="weka.JRip") # doctest: +SKIP
358+ >>> # Lookup by model instance (requires a registered extension)
359+ >>> import sklearn
360+ >>> import openml_sklearn
361+ >>> clf = sklearn.tree.DecisionTreeClassifier()
362+ >>> openml.flows.get_flow_id(model=clf) # doctest: +SKIP
310363 """
311364 if model is not None and name is not None :
312365 raise ValueError ("Must provide either argument `model` or argument `name`, but not both." )
@@ -422,6 +475,21 @@ def assert_flows_equal( # noqa: C901, PLR0912, PLR0913, PLR0915
422475
423476 check_description : bool
424477 Whether to ignore matching of flow descriptions.
478+
479+ Raises
480+ ------
481+ TypeError
482+ When either argument is not an :class:`OpenMLFlow`.
483+ ValueError
484+ When a relevant mismatch is found between the two flows.
485+
486+ Examples
487+ --------
488+ >>> import openml
489+ >>> f1 = openml.flows.get_flow(5) # doctest: +SKIP
490+ >>> f2 = openml.flows.get_flow(5) # doctest: +SKIP
491+ >>> openml.flows.assert_flows_equal(f1, f2) # doctest: +SKIP
492+ >>> # If flows differ, a ValueError is raised
425493 """
426494 if not isinstance (flow1 , OpenMLFlow ):
427495 raise TypeError (f"Argument 1 must be of type OpenMLFlow, but is { type (flow1 )} " )
@@ -581,5 +649,20 @@ def delete_flow(flow_id: int) -> bool:
581649 -------
582650 bool
583651 True if the deletion was successful. False otherwise.
652+
653+ Raises
654+ ------
655+ OpenMLServerException
656+ If the server-side deletion fails due to permissions or other errors.
657+
658+ Side Effects
659+ ------------
660+ - Removes the flow from the OpenML server (if permitted).
661+
662+ Examples
663+ --------
664+ >>> import openml
665+ >>> # Deletes flow 23 if you are the uploader and it's not linked to runs
666+ >>> openml.flows.delete_flow(23) # doctest: +SKIP
584667 """
585668 return openml .utils ._delete_entity ("flow" , flow_id )
0 commit comments