diff --git a/Dockerfile.jinja b/Dockerfile.jinja index 49a1842..9bae565 100644 --- a/Dockerfile.jinja +++ b/Dockerfile.jinja @@ -1,5 +1,5 @@ # basic python3 image as base -FROM harbor2.vantage6.ai/infrastructure/algorithm-base +FROM harbor2.vantage6.ai/infrastructure/algorithm-base:5.0 # This is a placeholder that should be overloaded by invoking # docker build with '--build-arg PKG_NAME=...' @@ -9,23 +9,6 @@ ARG PKG_NAME="{{algorithm_name}}" COPY . /app RUN pip install /app -{% if use_vpn %} -# Specify the ports that are used for VPN communication, along with a label -# that helps you identify them. As an example, port 8888 is used here. The label -# must be specified as the port number with a 'p' prefix, e.g. 'p8888'. - {% if vpn_expose %} - {%- for port_dict in vpn_expose %} -EXPOSE {{port_dict.port}} -LABEL p{{port_dict.port}} = '{{port_dict.label}}' - {% endfor %} - {% else %} -# TODO provide a sensible label below. Feel free to add more ports if needed by -# adding additional EXPOSE and LABEL commands. -EXPOSE 8888 -LABEL p8888='some_label' - {% endif %} -{% endif %} - # Set environment variable to make name of the package available within the # docker image. ENV PKG_NAME=${PKG_NAME} diff --git a/README.md.jinja b/README.md.jinja index e36c6bd..c677894 100644 --- a/README.md.jinja +++ b/README.md.jinja @@ -18,7 +18,8 @@ Please ensure to execute the following steps. The steps are also indicated with TODO statements in the generated code - so you can also simply search the code for TODO instead of following the checklist below. -- [ ] Include a URL to your code repository in setup.py. +- [ ] Fill out the fields in the `pyproject.toml` file, such as a URL to your code + repository. Alternatively, remove these fields. - [ ] Implement your algorithm functions. - [ ] You are free to add more arguments to the functions. Be sure to add them *after* the `client` and dataframe arguments. @@ -26,7 +27,7 @@ code for TODO instead of following the checklist below. to include values for these arguments in the `client.task.create()` calls that are available there. - [ ] If you are using Python packages that are not in the standard library, add - them to the `requirements.txt` and `setup.py` file. + them to the `pyproject.toml` file. Note that `pandas` is already included by default. {% if has_docs %} - [ ] Fill in the documentation template. This will help others to understand your algorithm, be able to use it safely, and to contribute to it. @@ -37,15 +38,16 @@ code for TODO instead of following the checklist below. {% endif %} - [ ] If you want to submit your algorithm to a vantage6 algorithm store, be sure to fill in everything in ``algorithm_store.json`` (and be sure to update - it if you change function names, arguments, etc.). + it if you change function names, arguments, etc.). It is recommended to run + ``v6 algorithm generate-store-json`` to automatically generate the file - this + should work especially well if you have added proper docstrings to your functions. + Note that you do need the `vantage6` CLI to be able to use this command, which can be + installed by e.g. running `pip install vantage6` (or `uv pip install vantage6`). {% if has_gh_pipeline %} - [ ] Create a ``DOCKER_USER`` and ``DOCKER_PASSWORD`` secret in the GitHub repository settings. This will be used to push the Docker image to the registry in the github pipeline. {% endif %} -{% if use_vpn %} -- [ ] Review the EXPOSE and LABEL commands in the Dockerfile for VPN -{% endif %} - [ ] Finally, remove this checklist section to keep the README clean. ### Dockerizing your algorithm diff --git a/algorithm_store.json.jinja b/algorithm_store.json.jinja index f116aeb..7e1ba30 100644 --- a/algorithm_store.json.jinja +++ b/algorithm_store.json.jinja @@ -1,7 +1,7 @@ { "name": "{{algorithm_name}}", "image": "{{docker_image}}", - "vantage6_version": "4.6", + "vantage6_version": "5.0", "code_url": "https://mygitrepo.org", "documentation_url": "", "partitioning": "horizontal", @@ -10,17 +10,12 @@ { "name": "{{central_function_name}}", "description": "{{algorithm_description}}", - "type": "central", + "step_type": "central_compute", "databases": [ - {%- for idx in range(partial_function_number_databases) -%} + {%- for idx in range(federated_function_number_databases) -%} { - "name": "Partial database {{idx + 1}}" - }{%- if not loop.last or central_function_number_databases > 0 -%},{%- endif -%} - {% endfor %} - {% for idx in range(central_function_number_databases) %} - { - "name": "Central database {{idx + 1}}" - }{%- if not loop.last -%},{%- endif -%} + "name": "Central database {{idx + 1}}" + }{%- if not loop.last or federated_function_number_databases > 0 -%},{%- endif -%} {% endfor %} ], "arguments": [ @@ -33,21 +28,21 @@ {% endfor %} ] }{%- endif -%} - {%- if has_central_function and has_partial_function -%},{%- endif -%} - {% if has_partial_function -%} + {%- if has_central_function and has_federated_function -%},{%- endif -%} + {% if has_federated_function -%} { - "name": "{{partial_function_name}}", + "name": "{{federated_function_name}}", "description": "", - "type": "federated", + "step_type": "federated_compute", "databases": [ - {%- for idx in range(partial_function_number_databases) -%} + {%- for idx in range(federated_function_number_databases) -%} { "name": "Database {{idx + 1}}" }{%- if not loop.last -%},{%- endif -%} {% endfor %} ], "arguments": [ - {%- for arg in partial_args -%} + {%- for arg in federated_args -%} { "name": "{{arg}}", "type": "", diff --git a/cleanup.py b/cleanup.py index a63f87d..6a886a9 100644 --- a/cleanup.py +++ b/cleanup.py @@ -24,10 +24,12 @@ def cleanup() -> None: print("Removing LICENSE file as no license was chosen...") Path("LICENSE").unlink() - # Remove partial function files if partial function is not defined - if not copier_config.get("has_partial_function"): - print("Removing partial function file as partial function is not defined...") - Path(algorithm_name, "partial.py").unlink() + # Remove federated function files if federated function is not defined + if not copier_config.get("has_federated_function"): + print( + "Removing federated function file as federated function is not defined..." + ) + Path(algorithm_name, "federated.py").unlink() # Remove central function files if central function is not defined if not copier_config.get("has_central_function"): diff --git a/copier.yml b/copier.yml index bcb1a28..7f8e78b 100644 --- a/copier.yml +++ b/copier.yml @@ -30,86 +30,103 @@ central_function_name: type: str when: "{{ has_central_function }}" help: "What is the name of your central function?" - default: central + default: central_function -central_function_client: - type: bool +central_args: + type: json when: "{{ has_central_function }}" - help: "Do you want to use a client in your central function?" + help: "Add a list of arguments to the central function '{{ central_function_name }}'" + default: ['arg1', ] + multiline: true + +# ------------- Define federated algorithm function ---------- # + +has_federated_function: + type: bool + help: "Do you want to use a federated function in your algorithm?" default: true -central_function_data: +federated_function_name: + type: str + when: "{{ has_federated_function }}" + help: "What is the name of your federated function?" + default: federated_function + +federated_function_data: type: bool - when: "{{ has_central_function }}" - help: "Do you want to use data in your central function?" - default: false + when: "{{ has_federated_function }}" + help: "Do you want to use data in your federated function?" + default: true -central_function_number_databases: +federated_function_number_databases: type: int - when: "{{ has_central_function and central_function_data }}" - help: "How many databases do you want to use in your central function?" + when: "{{ has_federated_function and federated_function_data }}" + help: "How many databases do you want to use in your federated function?" default: |- - {%- if has_central_function and central_function_data -%} + {%- if has_federated_function and federated_function_data -%} 1 {%- else -%} 0 {%- endif -%} validator: |- - {% if central_function_data and central_function_number_databases < 1 %} + {% if has_federated_function and federated_function_number_databases < 1 %} Must be at least 1 {% endif %} -central_args: +federated_args: type: json - when: "{{ has_central_function }}" - help: "Add a list of arguments to the central function '{{ central_function_name }}'" + when: "{{ has_federated_function }}" + help: "Add a list of arguments to the federated function '{{ federated_function_name }}'" default: ['arg1', ] multiline: true -# ------------- Define partial algorithm function ---------- # +# ----------------------- Data extraction ------------------- # -has_partial_function: +import_infra_extraction: type: bool - help: "Do you want to use a partial function in your algorithm?" + help: "Do you want to make the default vantage6 data extraction functions available in your algorithm?" default: true -partial_function_name: +has_data_extraction: + type: bool + help: "Do you want to define your own data extraction function in your algorithm?" + default: true + +data_extraction_function_name: type: str - when: "{{ has_partial_function }}" - help: "What is the name of your partial function?" - default: partial + when: "{{ has_data_extraction }}" + help: "What is the name of your data extraction function?" + default: data_extraction_function -partial_function_client: +data_extraction_args: + type: json + when: "{{ has_data_extraction }}" + help: "Add a list of arguments to the data extraction function '{{ data_extraction_function_name }}'" + default: ['arg1', ] + multiline: true + +# ----------------------- Data preprocessing ------------------- # + +import_infra_preprocessing: type: bool - when: "{{ has_partial_function }}" - help: "Do you want to use a client in your partial function?" - default: false + help: "Do you want to make the default vantage6 data preprocessing functions available in your algorithm?" + default: true -partial_function_data: +has_data_preprocessing: type: bool - when: "{{ has_partial_function }}" - help: "Do you want to use data in your partial function?" + help: "Do you want to define your own data preprocessing function in your algorithm?" default: true -partial_function_number_databases: - type: int - when: "{{ has_partial_function and partial_function_data }}" - help: "How many databases do you want to use in your partial function?" - default: |- - {%- if has_partial_function and partial_function_data -%} - 1 - {%- else -%} - 0 - {%- endif -%} - validator: |- - {% if has_partial_function and partial_function_number_databases < 1 %} - Must be at least 1 - {% endif %} +data_preprocessing_function_name: + type: str + when: "{{ has_data_preprocessing }}" + help: "What is the name of your data preprocessing function?" + default: data_preprocessing_function -partial_args: +data_preprocessing_args: type: json - when: "{{ has_partial_function }}" - help: "Add a list of arguments to the partial function '{{ partial_function_name }}'" + when: "{{ has_data_preprocessing }}" + help: "Add a list of arguments to the data preprocessing function '{{ data_preprocessing_function_name }}'" default: ['arg1', ] multiline: true @@ -144,31 +161,6 @@ advanced: help: "Do you want to see the advanced options?" default: true -# ----------------------- Set up VPN ----------------------- # -use_vpn: - type: bool - when: "{{ advanced }}" - help: "Do you want to use the VPN network in your algorithm?" - default: false - -vpn_expose: - type: json - when: "{{ advanced and use_vpn }}" - help: | - Configure the ports to expose in the VPN. You can add multiple ports - by adding more dictionaries with keys 'label' and 'port'. - default: [{'port': 8888, 'label': 'my_label'}] - multiline: true - validator: | - {% for port_dict in vpn_expose %} - {% if 'port' not in port_dict %} - Each dictionary should have a 'port' key. - {% endif %} - {% if 'label' not in port_dict %} - Each dictionary should have a 'label' key. - {% endif %} - {% endfor %} - has_gh_pipeline: type: bool when: "{{ advanced }}" @@ -219,7 +211,7 @@ _tasks: # {% elif _copier_conf.os == 'windows' %} # Remove-Item LICENSE # {% endif %} - # TODO: similarly, delete if empty: central.py, partial.py + # TODO: similarly, delete if empty: central.py, federated.py # Call python script that cleans up - ["{{ _copier_python }}", cleanup.py] _message_after_copy: | diff --git a/macros/kwargs_definition.jinja b/macros/arguments_definition.jinja similarity index 82% rename from macros/kwargs_definition.jinja rename to macros/arguments_definition.jinja index 04953a4..f1f2c95 100644 --- a/macros/kwargs_definition.jinja +++ b/macros/arguments_definition.jinja @@ -1,4 +1,4 @@ -{%- macro kwargs_define(func_name, func_args, num_indents) -%} +{%- macro arguments_define(func_name, func_args, num_indents) -%} {%- if func_args -%} # TODO add sensible values{{"\n"}} {%- for arg in func_args -%} diff --git a/macros/function_definition.jinja b/macros/function_definition.jinja index 1428fa1..f6b9856 100644 --- a/macros/function_definition.jinja +++ b/macros/function_definition.jinja @@ -1,25 +1,32 @@ -{% macro func_def(func_name, data_decorator, client_decorator, num_dbs, - func_args) %} -{# Data decorator #} -{% if data_decorator -%} -@data({{num_dbs}}) -{% endif -%} - -{# Algorithm client decorator #} -{%- if client_decorator -%} +{% macro func_def(func_name, func_type, num_dbs, func_args) %} +{# Function type decorator #} +{% if func_type == "central_compute" -%} +@central @algorithm_client +{% elif func_type == "federated_compute" -%} +@federated +{% elif func_type == "preprocessing" -%} +@preprocessing +{% elif func_type == "data_extraction" -%} +@data_extraction +{% endif -%} +{# Data decorator #} +{%- if num_dbs > 0 and func_type != "preprocessing" -%} +@dataframe({{num_dbs}}) {% endif -%} {# Function definition #} def {{func_name}}( {{" "}}{# <- Add 4 whitespaces before function args start #} {# Add client argument #} -{%- if client_decorator %} +{%- if func_type == "central_compute" -%} client: AlgorithmClient +{%- elif func_type == "data_extraction" -%} +connection_details: dict {%- endif -%} {# Add data arguments #} -{%- if data_decorator -%} +{%- if num_dbs > 0 -%} {# Add comma if previous arguments exist #} - {%- if client_decorator -%} + {%- if func_type == "central_compute" or func_type == "data_extraction" -%} ,{{" "}} {%- endif -%} {%- for db_num in range(num_dbs)-%} @@ -32,7 +39,7 @@ client: AlgorithmClient {# Add additional arguments to function #} {%- if func_args %} {# Add comma if previous arguments exist #} - {%- if client_decorator or data_decorator -%} + {%- if func_type == "central_compute" or func_type == "data_extraction" or num_dbs > 0 -%} ,{{" "}} {%- endif -%} {# Add arguments #} @@ -46,4 +53,4 @@ client: AlgorithmClient {# Finalize function definition #} ) -> Any: -{% endmacro %} \ No newline at end of file +{%- endmacro -%} \ No newline at end of file diff --git a/pyproject.toml.jinja b/pyproject.toml.jinja new file mode 100644 index 0000000..ad4fb57 --- /dev/null +++ b/pyproject.toml.jinja @@ -0,0 +1,39 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{algorithm_name}}" +version = "1.0.0" +description = "{{algorithm_description}}" +readme = "README.md" +requires-python = ">=3.13" +dependencies = [ + "vantage6-algorithm-tools>=5.0.0a43", + "pandas", +] +authors = [ + # TODO add authors +] +{% if open_source_license == 'mit' %} +license = { text = "MIT" } +{% elif open_source_license == 'apache' %} +license = { text = "Apache Software License 2.0" } +{% elif open_source_license == 'gpl' %} +license = { text = "GNU General Public License v3" } +{% endif %} +keywords = ["vantage6", "algorithm", "federated-learning"] +classifiers = [ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.13", +] + +[project.urls] +# TODO add urls + +[tool.hatch.build.targets.wheel] +packages = ["{{algorithm_name}}"] + +[tool.uv] +prereleases = "allow" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1f33787..0000000 --- a/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -vantage6-algorithm-tools -pandas diff --git a/setup.py.jinja b/setup.py.jinja deleted file mode 100644 index 8b18bf3..0000000 --- a/setup.py.jinja +++ /dev/null @@ -1,28 +0,0 @@ -from os import path -from codecs import open -from setuptools import setup, find_packages - -# we're using a README.md, if you do not have this in your folder, simply -# replace this with a string. -here = path.abspath(path.dirname(__file__)) -with open(path.join(here, 'README.md'), encoding='utf-8') as f: - long_description = f.read() - -# Here you specify the meta-data of your package. The `name` argument is -# needed in some other steps. -setup( - name='{{algorithm_name}}', - version="1.0.0", - description='{{algorithm_description}}', - long_description=long_description, - long_description_content_type='text/markdown', - # TODO add a url to your github repository here (or remove this line if - # you do not want to make your source code public) - # url='https://github.com/....', - packages=find_packages(), - python_requires='>=3.10', - install_requires=[ - 'vantage6-algorithm-tools', - 'pandas' - ] -) diff --git a/test/test.py.jinja b/test/test.py.jinja deleted file mode 100644 index 10ce466..0000000 --- a/test/test.py.jinja +++ /dev/null @@ -1,77 +0,0 @@ -""" -Run this script to test your algorithm locally (without building a Docker -image) using the mock client. - -Run as: - - python test.py - -Make sure to do so in an environment where `vantage6-algorithm-tools` is -installed. This can be done by running: - - pip install vantage6-algorithm-tools -""" -from vantage6.algorithm.tools.mock_client import MockAlgorithmClient -from pathlib import Path - -# get path of current directory -current_path = Path(__file__).parent - -## Mock client -client = MockAlgorithmClient( - datasets=[ - # Data for first organization - [{ - "database": current_path / "test_data.csv", - "db_type": "csv", - "input_data": {} - }], - # Data for second organization - [{ - "database": current_path / "test_data.csv", - "db_type": "csv", - "input_data": {} - }] - ], - module="{{algorithm_name}}" -) - -# list mock organizations -organizations = client.organization.list() -print(organizations) -org_ids = [organization["id"] for organization in organizations] - -{% if has_central_function %} -# Run the central method on 1 node and get the results -central_task = client.task.create( - input_={ - "method":"{{central_function_name}}", - "kwargs": { - {% from 'macros/kwargs_definition.jinja' import kwargs_define %} - {{ kwargs_define(central_function_name, central_args, 3) }} - } - }, - organizations=[org_ids[0]], -) -results = client.wait_for_results(central_task.get("id")) -print(results) -{% endif %} - -{% if has_partial_function %} -# Run the partial method for all organizations -task = client.task.create( - input_={ - "method":"{{partial_function_name}}", - "kwargs": { - {% from 'macros/kwargs_definition.jinja' import kwargs_define %} - {{ kwargs_define(partial_function_name, partial_args, 3) }} - } - }, - organizations=org_ids -) -print(task) - -# Get the results from the task -results = client.wait_for_results(task.get("id")) -print(results) -{% endif %} diff --git a/test/test_data.csv b/test/test_data.csv index 34cbe41..1606203 100644 --- a/test/test_data.csv +++ b/test/test_data.csv @@ -1,19 +1,19 @@ -"Name","Gender","Age","Height(in)","Weight(lbs)" -"Alex","M",41,74,170 -"Bert","M",42,68,166 -"Carl","M",32,70,155 -"Dave","M",39,72,167 -"Elly","F",30,66,124 -"Fran","F",33,66,115 -"Gwen","F",26,64,121 -"Hank","M",30,71,158 -"Ivan","M",53,72,175 -"Jake","M",32,69,143 -"Kate","F",47,69,139 -"Luke","M",34,72,163 -"Myra","F",23,62,98 -"Neil","M",36,75,160 -"Omar","M",38,70,145 -"Page","F",31,67,135 -"Quin","M",29,71,176 -"Ruth","F",28,65,131 \ No newline at end of file +"Name","Gender","Age","Height","Weight" +"Alex","M",41,182,77 +"Bert","M",42,172,75 +"Carl","M",32,177,70 +"Dave","M",39,182,77 +"Elly","F",30,167,65 +"Fran","F",33,167,56 +"Gwen","F",26,162,55 +"Hank","M",30,178,72 +"Ivan","M",53,182,85 +"Jake","M",32,175,65 +"Kate","F",47,175,58 +"Luke","M",34,182,85 +"Myra","F",23,160,54 +"Neil","M",36,187,86 +"Omar","M",38,175,65 +"Page","F",31,175,58 +"Quin","M",29,178,72 +"Ruth","F",28,165,55 \ No newline at end of file diff --git a/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja new file mode 100644 index 0000000..a3e0295 --- /dev/null +++ b/test/{% if has_data_extraction %}test_extraction.py{% endif %}.jinja @@ -0,0 +1,75 @@ +""" +Run this script to test you extraction function locally (without building a Docker +image) using the mock client. + +Run as: + + python test_extraction.py + +Make sure to do so in an environment where `vantage6-algorithm-tools` is +installed. This can be done by running: + + pip install vantage6-algorithm-tools +""" +from vantage6.algorithm.mock.network import MockNetwork +from pathlib import Path + +# get path of current directory +current_path = Path(__file__).parent + +# The MockNetwork expects a list of datasets. In the case of an extraction job, this +# needs to an URI. In this example, we use a CSV file that was included in this +# template. In case you want to connect to a database you need to make sure that the +# database is reachable. +DATABASE_LABEL = "default" +network = MockNetwork( + datasets=[ + { + DATABASE_LABEL: { + "database": current_path / "test_data.csv", + "db_type": "csv", + }, + }, + { + DATABASE_LABEL: { + "database": current_path / "test_data.csv", + "db_type": "csv", + }, + }, + { + DATABASE_LABEL: { + "database": current_path / "test_data.csv", + "db_type": "csv", + }, + }, + ], + module_name="{{algorithm_name}}" +) + +# Once the network is created, we can get the client to interact with the MockNetwork. +client = network.user_client + +# List mock organizations +organizations = client.organization.list() +print(organizations) +org_ids = [organization["id"] for organization in organizations] + +# Run the data extraction function +task = client.dataframe.create( + method="{{data_extraction_function_name}}", + arguments={ + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(data_extraction_function_name, data_extraction_args, 2) }} + }, + organizations=org_ids, + label=DATABASE_LABEL, +) + +# Wait for the task to complete +results = client.wait_for_results(task.get("id")) +print("results:", results) + +{# TODO implement checking the dataframe that has been created at the node: #} +print("dataframes:") +for node in network.nodes: + print(node.dataframes) diff --git a/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja new file mode 100644 index 0000000..b66d663 --- /dev/null +++ b/test/{% if has_data_preprocessing %}test_preprocessing.py{% endif %}.jinja @@ -0,0 +1,64 @@ +""" +Run this script to test you preprocessing function locally (without building a Docker +image) using the mock client. + +Run as: + + python test_preprocessing.py + +Make sure to do so in an environment where `vantage6-algorithm-tools` is +installed. This can be done by running: + + pip install vantage6-algorithm-tools +""" +import pandas as pd + +from vantage6.algorithm.mock.network import MockNetwork +from pathlib import Path + +# get path of current directory +current_path = Path(__file__).parent + +# The MockNetwork expects a list of datasets. In this instance we are not interested in +# extracting the data from its source. Therefore, we supply the data as a Pandas +# dataframe avoiding the need to extract the data first +data = pd.read_csv(current_path / "test_data.csv") +DATABASE_LABEL = "default" +network = MockNetwork( + datasets=[ + {DATABASE_LABEL: {"database": data}}, + {DATABASE_LABEL: {"database": data}}, + {DATABASE_LABEL: {"database": data}}, + ], + module_name="{{algorithm_name}}" +) + +# Once the network is created, we can get the client to interact with the MockNetwork. +client = network.user_client + +# List mock organizations +organizations = client.organization.list() +print(organizations) +org_ids = [organization["id"] for organization in organizations] + +# Run the data extraction function +task = client.dataframe.preprocess( + id_=network.server.dataframes[0]["id"], + image="{{docker_image}}", + method="{{preprocessing_function_name}}", + arguments={ + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(preprocessing_function_name, preprocessing_args, 2) }} + }, + organizations=org_ids, + databases=[{"label": DATABASE_LABEL}], +) + +# Wait for the task to complete +results = client.wait_for_results(task.get("id")) +print("results:", results) + +{# TODO implement checking the dataframe that has been created at the node: #} +print("dataframes:") +for node in network.nodes: + print(node.dataframes) diff --git a/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja b/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja new file mode 100644 index 0000000..dc43806 --- /dev/null +++ b/test/{% if has_has_central_function or has_federated_function %}test_compute.py{% endif %}.jinja @@ -0,0 +1,80 @@ +""" +Run this script to test you compute function locally (without building a Docker image) +using the mock client. + +Run as: + + python test_compute.py + +Make sure to do so in an environment where `vantage6-algorithm-tools` is +installed. This can be done by running: + + pip install vantage6-algorithm-tools +""" +import pandas as pd + +from vantage6.algorithm.mock.network import MockNetwork +from pathlib import Path + +# get path of current directory +current_path = Path(__file__).parent + +# The MockNetwork expects a list of datasets. In this instance we are not interested in +# extracting the data from its source. Therefore, we supply the data as a Pandas +# dataframe avoiding the need to extract the data first +data = pd.read_csv(current_path / "test_data.csv") +DATABASE_LABEL = "default" + +# Create a MockNetwork with identical datasets for three nodes +network = MockNetwork( + datasets=[ + {DATABASE_LABEL: {"database": data}}, + {DATABASE_LABEL: {"database": data}}, + {DATABASE_LABEL: {"database": data}}, + ], + module_name="{{algorithm_name}}" +) + +# Once the network is created, we can get the client to interact with the MockNetwork. +client = network.user_client + +# List mock organizations +organizations = client.organization.list() +print(organizations) +org_ids = [organization["id"] for organization in organizations] + +{% if has_central_function %} +# Run the central method on 1 node and get the results +central_task = client.task.create( + method="{{central_function_name}}", + arguments={ + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(central_function_name, central_args, 2) }} + }, + organizations=[org_ids[0]], + databases=[ + {"type": "dataframe", "dataframe_id": network.server.dataframes[0]["id"]} + ], +) +results = client.wait_for_results(central_task.get("id")) +print(results) +{% endif %} + +{% if has_federated_function %} +# Run the federated method for all organizations +task = client.task.create( + method="{{federated_function_name}}", + arguments={ + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(federated_function_name, federated_args, 2) }} }, + organizations=org_ids, + databases=[ + {"type": "dataframe", "dataframe_id": network.server.dataframes[0]["id"]} + ], +) +print(task) + +# Get the results from the task +results = client.wait_for_results(task.get("id")) +print(results) +{% endif %} diff --git a/{% if has_docs %}docs{% endif %}/index.rst.jinja b/{% if has_docs %}docs{% endif %}/index.rst.jinja index 7dcdbee..29fb4f6 100644 --- a/{% if has_docs %}docs{% endif %}/index.rst.jinja +++ b/{% if has_docs %}docs{% endif %}/index.rst.jinja @@ -11,6 +11,8 @@ Authors .. List authors. +{{author}} + Source code ----------- diff --git a/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja b/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja index 1677f8e..f743f0c 100644 --- a/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja +++ b/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/implementation.rst.jinja @@ -12,16 +12,35 @@ The central part is responsible for the orchestration and aggregation of the alg .. Describe the central function here. {% endif %} -{% if has_partial_function %} -Partials +{% if has_federated_function %} +Federated functions -------- -Partials are the computations that are executed on each node. The partials have access -to the data that is stored on the node. The partials are executed in parallel on each -node. +Federated functions are the computations that are executed on each node. The federated +functions have access to the data that is stored on the node. These functions are +executed in parallel on each node. -``{{partial_function_name}}`` +``{{federated_function_name}}`` ~~~~~~~~~~~~~~~~ -.. Describe the partial function. +.. Describe the function. {% endif %} +{% if has_data_extraction %} +Data extraction (``{{data_extraction_function_name}}``) +-------------------------------- +The data extraction function is responsible for extracting data from the databases and +storing it locally on the node, so that the data can be easily used in subsequent +analyses. + +.. Describe the function. + +{% endif %} +{% if has_data_preprocessing %} +Data preprocessing (``{{data_preprocessing_function_name}}``) +-------------------------------- +The data preprocessing function is responsible for preprocessing the data extracted from +the databases. + +.. Describe the function. + +{% endif %} \ No newline at end of file diff --git a/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/usage.rst.jinja b/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/usage.rst.jinja index a8e3d74..2870962 100644 --- a/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/usage.rst.jinja +++ b/{% if has_docs %}docs{% endif %}/{{algorithm_name}}/usage.rst.jinja @@ -23,40 +23,34 @@ first, especially the part about the from vantage6.client import Client - server = 'http://localhost' - port = 7601 - api_path = '/api' - private_key = None - username = 'root' - password = 'password' + server_url = "http://localhost:7601/api" + auth_url = "http://localhost:8080" collaboration_id = 1 organization_ids = [2] # Create connection with the vantage6 server - client = Client(server, port, api_path) - client.setup_encryption(private_key) - client.authenticate(username, password) + client = Client(server_url, auth_url) + client.authenticate() input_ = { - 'method': '{{central_function_name}}', - 'args': [], - 'kwargs': { + "method": "{{central_function_name}}", + "arguments": { {% for arg in central_args %} - '{{arg}}': 'my_value', + "{{arg}}": "my_value", {% endfor %} }, - 'output_format': 'json' + "output_format": "json" } my_task = client.task.create( collaboration=collaboration_id, organizations=organization_ids, - name='{{algorithm_name}}', - description='{{algorithm_description}}', - image='{{docker_image}}', + name="{{algorithm_name}}", + description="{{algorithm_description}}", + image="{{docker_image}}", input_=input_, databases=[{"label": "default"}], ) - task_id = my_task.get('id') + task_id = my_task.get("id") results = client.wait_for_results(task_id) \ No newline at end of file diff --git a/{{algorithm_name}}/__init__.py.jinja b/{{algorithm_name}}/__init__.py.jinja index c84baf2..160a14c 100644 --- a/{{algorithm_name}}/__init__.py.jinja +++ b/{{algorithm_name}}/__init__.py.jinja @@ -1,7 +1,23 @@ +{% if import_infra_extraction -%} +from vantage6.algorithm.data_extraction import * +{% endif %} + +{%- if import_infra_preprocessing -%} +from vantage6.algorithm.preprocessing import * +{% endif %} + {% if has_central_function -%} from .central import * {% endif %} -{%- if has_partial_function %} -from .partial import * +{%- if has_federated_function %} +from .federated import * {% endif %} + +{%- if has_data_extraction -%} +from .extract import * +{% endif %} + +{%- if has_data_preprocessing -%} +from .preprocess import * +{% endif %} \ No newline at end of file diff --git a/{{algorithm_name}}/central.py.jinja b/{{algorithm_name}}/central.py.jinja index 9577a0f..9a2020b 100644 --- a/{{algorithm_name}}/central.py.jinja +++ b/{{algorithm_name}}/central.py.jinja @@ -6,34 +6,23 @@ The results in a return statement are sent to the vantage6 server (after encryption if that is enabled). """ {% if has_central_function -%} - {% if central_function_data -%} - import pandas as pd - {% endif %} from typing import Any from vantage6.algorithm.tools.util import info, warn, error - {% if central_function_client %} -from vantage6.algorithm.tools.decorators import algorithm_client - {% endif %} - {% if central_function_data %} -from vantage6.algorithm.tools.decorators import data - {% endif %} - {% if central_function_client %} +from vantage6.algorithm.decorator.algorithm_client import algorithm_client +from vantage6.algorithm.decorator.action import central from vantage6.algorithm.client import AlgorithmClient - {% endif %} {# Include the function definition from a macro #} {% from 'macros/function_definition.jinja' import func_def %} {{ func_def( - central_function_name, central_function_data, central_function_client, - central_function_number_databases, central_args + central_function_name, "central_compute", 0, central_args, ) }} {# Implementation of the algorithm #} """ Central part of the algorithm """ - {% if central_function_client %} # TODO implement this function. Below is an example of a simple but typical # central function. @@ -42,51 +31,24 @@ from vantage6.algorithm.client import AlgorithmClient organizations = client.organization.list() org_ids = [organization.get("id") for organization in organizations] - # Define input parameters for a subtask - info("Defining input parameters") - input_ = { - {% if has_partial_function %} - "method": "{{partial_function_name}}", + # create a subtask for all organizations in the collaboration. + info("Creating subtask for all organizations in the collaboration") + task = client.task.create( + {% if has_federated_function %} + method="{{federated_function_name}}", {% else %} # TODO you should define a federated method here (which should also be # implemented in this repository) - "method": "some_example_method", + method="some_example_method", {% endif %} - "kwargs": { - {% from 'macros/kwargs_definition.jinja' import kwargs_define %} - {{ kwargs_define(partial_function_name, partial_args, 3) }} - } - } - - # create a subtask for all organizations in the collaboration. - info("Creating subtask for all organizations in the collaboration") - task = client.task.create( - input_=input_, + arguments={ + {% from 'macros/arguments_definition.jinja' import arguments_define %} + {{ arguments_define(federated_function_name, federated_args, 3) }} }, organizations=org_ids, name="My subtask", description="This is a very important subtask" ) - {% if use_vpn %} - # To communicate over the VPN network, get VPN addresses of the subtasks - vpn_addresses = client.vpn.get_child_addresses() - # Note that you can also use client.vpn.get_addresses() to not only get the - # subtasks. - # The variable `vpn_addresses` will be something like: - # { - # [ - # 'ip': '1.2.3.4', - # 'port': 5678, - # 'label': 'label_defined_in_dockerfile', - # 'organization_id': 1, - # 'task_id': 1, (task id of the subtask) - # 'parent_id': 1, (task id of this central task) - # ], ... (one for each algorithm container) - # } - # Use the `ip` and `port` to communicate with the subtasks. - # TODO write your own code here to communicate with the subtasks. - {% endif %} - # wait for node to return results of the subtask. info("Waiting for results") results = client.wait_for_results(task_id=task.get("id")) @@ -98,11 +60,6 @@ from vantage6.algorithm.client import AlgorithmClient # return the final results of the algorithm return results - {% else %} - # TODO write your own code here - return {'my_results': 'some_value'} - {% endif %} - -{%- endif %} +{% endif %} # TODO Feel free to add more central functions here. diff --git a/{{algorithm_name}}/extract.py.jinja b/{{algorithm_name}}/extract.py.jinja new file mode 100644 index 0000000..f8cb8df --- /dev/null +++ b/{{algorithm_name}}/extract.py.jinja @@ -0,0 +1,55 @@ +""" +This file contains all data extraction algorithm functions. + +Data extraction functions can be used to extract data from the databases present at a +vantage6 node. The extracted data are stored at the node and can then be used in +subsequent analyses. +""" +{% if has_data_extraction -%} +import os +from typing import Any +import pandas as pd + +from vantage6.algorithm.tools.util import info, warn, error +from vantage6.algorithm.decorator.action import data_extraction + +{# Include the function definition from a macro #} +{% from 'macros/function_definition.jinja' import func_def %} +{{ + func_def( + data_extraction_function_name, "data_extraction", 0, data_extraction_args, + ) +}} +{# Implementation of data extraction algorithm #} + """ This function extracts data from ... to the vantage6 node """ + # TODO this is a simple example to show you how to write a data extraction function. + # Replace it by your own code. + database_uri = connection_details["uri"] + + # Example for using environment variables: + # if the node configuration looks like this: + # databases: + # serviceBased: + # - name: my_postgres_db + # uri: postgresql://postgres:password@localhost:5432/my_postgres_db + # type: other + # env: + # USER: postgres + # PASSWORD: password + # then you can retrieve the environment variables: + user = os.getenv("user") + password = os.getenv("password") + + # Example for using the database URI: + df = pd.read_csv(database_uri) + + # or, if you e.g. have a SQL database, maybe something like this: + # df = pd.read_sql_query( + # "SELECT * FROM my_table", database_uri, user=user, password=password + # ) + + # Return results to the vantage6 server. + return df +{% endif %} + +# TODO Feel free to add more data extraction functions here. \ No newline at end of file diff --git a/{{algorithm_name}}/partial.py.jinja b/{{algorithm_name}}/federated.py.jinja similarity index 55% rename from {{algorithm_name}}/partial.py.jinja rename to {{algorithm_name}}/federated.py.jinja index 981593d..a60d6ba 100644 --- a/{{algorithm_name}}/partial.py.jinja +++ b/{{algorithm_name}}/federated.py.jinja @@ -1,26 +1,21 @@ """ -This file contains all partial algorithm functions, that are normally executed +This file contains all federated algorithm functions, that are normally executed on all nodes for which the algorithm is executed. The results in a return statement are sent to the vantage6 server (after -encryption if that is enabled). From there, they are sent to the partial task -or directly to the user (if they requested partial results). +encryption if that is enabled). From there, they are sent to the federated task +or directly to the user (if they requested federated results). """ -{% if has_partial_function -%} - {% if partial_function_data -%} +{% if has_federated_function -%} + {% if federated_function_data -%} import pandas as pd {% endif %} from typing import Any from vantage6.algorithm.tools.util import info, warn, error - {% if partial_function_client %} -from vantage6.algorithm.tools.decorators import algorithm_client - {% endif %} - {% if partial_function_data %} -from vantage6.algorithm.tools.decorators import data - {% endif %} - {% if partial_function_client %} -from vantage6.algorithm.client import AlgorithmClient +from vantage6.algorithm.decorator.action import federated + {% if federated_function_data %} +from vantage6.algorithm.decorator.data import dataframe {% endif %} @@ -28,13 +23,13 @@ from vantage6.algorithm.client import AlgorithmClient {% from 'macros/function_definition.jinja' import func_def %} {{ func_def( - partial_function_name, partial_function_data, partial_function_client, - partial_function_number_databases, partial_args + federated_function_name, "federated_compute", federated_function_number_databases, + federated_args, ) }} -{# Implementation of partial algorithm #} +{# Implementation of federated algorithm #} """ Decentral part of the algorithm """ - {% if partial_function_data %} + {% if federated_function_data %} # TODO this is a simple example to show you how to return something simple. # Replace it by your own code info("Computing mean age by gender") @@ -49,4 +44,4 @@ from vantage6.algorithm.client import AlgorithmClient {% endif %} {%- endif %} -# TODO Feel free to add more partial functions here. +# TODO Feel free to add more federated functions here. diff --git a/{{algorithm_name}}/preprocess.py.jinja b/{{algorithm_name}}/preprocess.py.jinja new file mode 100644 index 0000000..c5a7f13 --- /dev/null +++ b/{{algorithm_name}}/preprocess.py.jinja @@ -0,0 +1,31 @@ +""" +This file contains all data preprocessing algorithm functions. + +Data preprocessing functions can be used to preprocess the data extracted from the +databases. For instance, you can bin data into categories, or remove outliers. +""" +{% if has_data_preprocessing -%} +from typing import Any +import pandas as pd + +from vantage6.algorithm.tools.util import info, warn, error +from vantage6.algorithm.decorator.action import preprocessing + +{# Include the function definition from a macro. Note that preprocessing functions + have one database. #} +{% from 'macros/function_definition.jinja' import func_def %} +{{ + func_def( + data_preprocessing_function_name, "preprocessing", 1, data_preprocessing_args, + ) +}} +{# Implementation of data preprocessing algorithm #} + """ This function preprocesses the data by ...""" + # TODO this is a simple example to show you how to write a data preprocessing function. + # Replace it by your own code. Example adds a new BMI column based on height and + # weight. + df1["BMI"] = df1["Weight"] / (df1["Height"] ** 2) + return df1 +{% endif %} + +# TODO Feel free to add more data preprocessing functions here. \ No newline at end of file