diff --git a/docs/api_reference/file.rst b/docs/api_reference/file.rst index 058c5002..0d27a03f 100644 --- a/docs/api_reference/file.rst +++ b/docs/api_reference/file.rst @@ -10,6 +10,7 @@ nisystemlink.clients.file .. automethod:: api_info .. automethod:: get_files .. automethod:: query_files_linq + .. automethod:: search_files .. automethod:: delete_file .. automethod:: delete_files .. automethod:: upload_file diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 63377030..cb131f02 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -252,7 +252,7 @@ default connection. The default connection depends on your environment. With a :class:`.FileClient` object, you can: -* Get the list of files, download and delete files +* Get the list of files, query and search for files, download and delete files Examples ~~~~~~~~ @@ -266,6 +266,14 @@ Get the metadata of a File using its Id and download it. Upload a File from disk or memory to SystemLink .. literalinclude:: ../examples/file/upload_file.py + :language: python + :linenos: + +Search for files with filtering and pagination + +.. literalinclude:: ../examples/file/search_files.py + :language: python + :linenos: Feeds API ------- diff --git a/examples/file/search_files.py b/examples/file/search_files.py new file mode 100644 index 00000000..6ac9e3a8 --- /dev/null +++ b/examples/file/search_files.py @@ -0,0 +1,128 @@ +"""Example demonstrating how to search for files using the File API.""" + +import io +import time + +from nisystemlink.clients.core import HttpConfiguration +from nisystemlink.clients.file import FileClient, models +from nisystemlink.clients.file.models import SearchFilesOrderBy, UpdateMetadataRequest + +# Configure connection to SystemLink server +server_configuration = HttpConfiguration( + server_uri="https://yourserver.yourcompany.com", + api_key="YourAPIKeyGeneratedFromSystemLink", +) + +client = FileClient(configuration=server_configuration) + +# Upload a test file first +print("Uploading test file...") +test_file_content = b"This is a test file for search demonstration." +test_file = io.BytesIO(test_file_content) +test_file.name = "search-example-test-file.txt" + +file_id = client.upload_file(file=test_file) +print(f"Uploaded test file with ID: {file_id}") + +# Wait for the file to be indexed for search +# Note: Files may take a few seconds to appear in search results after upload +time.sleep(5) +print() + +# Example 1: Basic file search with filter - search for the uploaded file +print("Example 1: Search for the uploaded test file") +search_request = models.SearchFilesRequest( + filter='name:("search-example-test-file.txt")', + skip=0, + take=10, +) + +response = client.search_files(search_request) +print( + f"Found {response.total_count.value if response.total_count else 0} file(s) matching the filter" +) +if response.available_files: + for file in response.available_files: + if file.properties: + print( + f"- {file.properties.get('Name')} (ID: {file.id}, Size: {file.size} bytes)" + ) + +# Example 2: Search with wildcard pattern +print("\nExample 2: Search with wildcard pattern") +search_request = models.SearchFilesRequest( + filter='name:("search-example*")', + skip=0, + take=20, + order_by=SearchFilesOrderBy.CREATED, + order_by_descending=True, +) + +response = client.search_files(search_request) +print( + f"Found {response.total_count.value if response.total_count else 0} file(s) starting with 'search-example'" +) +if response.available_files: + for file in response.available_files: + if file.properties: + print( + f"- {file.properties.get('Name')} created at {file.created} (Size: {file.size} bytes)" + ) + +# Example 3: Search by size range +print("\nExample 3: Search by size range") +search_request = models.SearchFilesRequest( + filter="size:([1 TO 1000])", + skip=0, + take=10, +) + +response = client.search_files(search_request) +print( + f"Found {response.total_count.value if response.total_count else 0} file(s) between 1 and 1000 bytes" +) +if response.available_files: + for file in response.available_files: + if file.properties: + print(f"- {file.properties.get('Name')} (Size: {file.size} bytes)") + +# Example 4: Search by multiple custom properties +print("\nExample 4: Search by multiple custom properties") +print("Adding custom properties to existing file...") + +# Update the existing file with custom properties +custom_metadata = UpdateMetadataRequest( + replace_existing=False, + properties={ + "TestProperty1": "TestValue1", + "TestProperty2": "TestValue2", + }, +) +client.update_metadata(metadata=custom_metadata, id=file_id) + +# Wait for indexing +time.sleep(5) + +# Search by multiple custom properties using AND operator +search_request = models.SearchFilesRequest( + filter='(properties.TestProperty1:"TestValue1") AND (properties.TestProperty2:"TestValue2")', + skip=0, + take=10, +) + +response = client.search_files(search_request) +print( + f"Found {response.total_count.value if response.total_count else 0} file(s) with " + "TestProperty1=TestValue1 AND TestProperty2=TestValue2" +) +if response.available_files: + for file in response.available_files: + if file.properties: + print(f"- {file.properties.get('Name')}") + print(f" TestProperty1: {file.properties.get('TestProperty1')}") + print(f" TestProperty2: {file.properties.get('TestProperty2')}") + +# Clean up: delete the test file +print("\nCleaning up...") +client.delete_file(id=file_id) +print(f"Deleted test file with ID: {file_id}") diff --git a/nisystemlink/clients/file/_file_client.py b/nisystemlink/clients/file/_file_client.py index 052091fd..e88a3401 100644 --- a/nisystemlink/clients/file/_file_client.py +++ b/nisystemlink/clients/file/_file_client.py @@ -163,6 +163,29 @@ def query_files_linq( """ ... + @post("service-groups/Default/search-files") + def search_files( + self, request: models.SearchFilesRequest + ) -> models.SearchFilesResponse: + """Search for files based on filter criteria. + + Note: + This endpoint requires Elasticsearch to be available in the SystemLink cluster. + If Elasticsearch is not configured, this method will fail with an ApiException. + For deployments without Elasticsearch, use `query_files_linq()` instead. + + Args: + request: The search request containing filter, pagination, and sorting parameters. + + Returns: + SearchFilesResponse: Response containing matching files and total count. + + Raises: + ApiException: if unable to communicate with the File Service or if Elasticsearch + is not available in the cluster. + """ + ... + @params({"force": True}) # type: ignore @delete("service-groups/Default/files/{id}", args=[Path]) def delete_file(self, id: str) -> None: diff --git a/nisystemlink/clients/file/models/__init__.py b/nisystemlink/clients/file/models/__init__.py index 4d1dede8..9624128b 100644 --- a/nisystemlink/clients/file/models/__init__.py +++ b/nisystemlink/clients/file/models/__init__.py @@ -1,9 +1,17 @@ from ._file_metadata import FileMetadata -from ._file_query_order_by import FileQueryOrderBy, FileLinqQueryOrderBy +from ._file_query_order_by import ( + FileQueryOrderBy, + FileLinqQueryOrderBy, + SearchFilesOrderBy, +) from ._file_query_response import FileQueryResponse from ._link import Link from ._operations import V1Operations from ._update_metadata import UpdateMetadataRequest from ._file_linq_query import FileLinqQueryRequest, FileLinqQueryResponse +from ._search_files_request import SearchFilesRequest +from ._search_files_response import SearchFilesResponse +from ._base_file_response import BaseFileResponse, TotalCount, TotalCountRelation +from ._base_file_request import BaseFileRequest # flake8: noqa diff --git a/nisystemlink/clients/file/models/_base_file_request.py b/nisystemlink/clients/file/models/_base_file_request.py new file mode 100644 index 00000000..955bfcc8 --- /dev/null +++ b/nisystemlink/clients/file/models/_base_file_request.py @@ -0,0 +1,25 @@ +from nisystemlink.clients.core._uplink._json_model import JsonModel + + +class BaseFileRequest(JsonModel): + """Base class for file request models containing common query parameters.""" + + filter: str | None = None + """ + Filter string for searching/querying files. + """ + + skip: int | None = None + """ + How many files to skip in the result when paging. + """ + + take: int | None = None + """ + How many files to return in the result. + """ + + order_by_descending: bool | None = False + """ + Whether to sort in descending order. + """ diff --git a/nisystemlink/clients/file/models/_base_file_response.py b/nisystemlink/clients/file/models/_base_file_response.py new file mode 100644 index 00000000..9bf23c26 --- /dev/null +++ b/nisystemlink/clients/file/models/_base_file_response.py @@ -0,0 +1,39 @@ +from enum import Enum +from typing import List + +from nisystemlink.clients.core._uplink._json_model import JsonModel + +from ._file_metadata import LinqQueryFileMetadata + + +class TotalCountRelation(str, Enum): + """Describes the relation the returned total count value has with respect to the total number of files.""" + + EQUALS = "eq" + """Equals, meaning that the returned items are all the items that matched the filter.""" + + GREATER_THAN_OR_EQUAL = "gte" + """Greater or equal, meaning that the take limit has been hit, but there are further items that match the query.""" + + +class TotalCount(JsonModel): + """The total number of files that match the query regardless of skip and take values""" + + relation: TotalCountRelation + """ + Describes the relation the returned total count value has with respect to the total number of + files matched by the query. + """ + + value: int + """Describes the number of files that were returned as a result of the query in the database""" + + +class BaseFileResponse(JsonModel): + """Base class for file response models containing a list of files and total count.""" + + available_files: List[LinqQueryFileMetadata] + """The list of files returned by the query""" + + total_count: TotalCount + """The total number of files that match the query regardless of skip and take values""" diff --git a/nisystemlink/clients/file/models/_file_linq_query.py b/nisystemlink/clients/file/models/_file_linq_query.py index 8874fa6b..640b31ad 100644 --- a/nisystemlink/clients/file/models/_file_linq_query.py +++ b/nisystemlink/clients/file/models/_file_linq_query.py @@ -1,53 +1,18 @@ -from typing import List - -from nisystemlink.clients.core._uplink._json_model import JsonModel -from nisystemlink.clients.file.models._file_metadata import LinqQueryFileMetadata +from nisystemlink.clients.file.models._base_file_request import BaseFileRequest +from nisystemlink.clients.file.models._base_file_response import BaseFileResponse from nisystemlink.clients.file.models._file_query_order_by import FileLinqQueryOrderBy -class FileLinqQueryRequest(JsonModel): - filter: str | None = None - """ - The filter criteria for files. Consists of a string of queries composed using AND/OR operators. - String values and date strings need to be enclosed in double quotes. Parentheses can be used - around filters to better define the order of operations. - - Example Filter syntax: '[property name][operator][operand] and [property name][operator][operand]' - """ +class FileLinqQueryRequest(BaseFileRequest): + """Request model for LINQ query operations.""" order_by: FileLinqQueryOrderBy | None = None - """The property by which to order the files in the response.""" - - order_by_descending: bool | None = False - """If true, the files are ordered in descending order based on the property specified in `order_by`.""" - - take: int | None = None - """The maximum number of files to return in the response. Default value is 1000""" - - -class TotalCount(JsonModel): - """The total number of files that match the query regardless of skip and take values""" - - relation: str """ - Describes the relation the returned total count value has with respect to the total number of - files matched by the query. - - Possible values: - - - "eq" -> equals, meaning that the returned items are all the items that matched the filter. - - - "gte" -> greater or equal, meaning that there the take limit has been hit, but there are further - items that match the query in the database. + The property by which to order the files in the response. """ - value: int - """Describes the number of files that were returned as a result of the query in the database""" - -class FileLinqQueryResponse(JsonModel): - available_files: List[LinqQueryFileMetadata] - """The list of files returned by the query""" +class FileLinqQueryResponse(BaseFileResponse): + """Response model for LINQ query operations.""" - total_count: TotalCount - """The total number of files that match the query regardless of skip and take values""" + pass diff --git a/nisystemlink/clients/file/models/_file_query_order_by.py b/nisystemlink/clients/file/models/_file_query_order_by.py index 4ce9cb3c..6cd889dd 100644 --- a/nisystemlink/clients/file/models/_file_query_order_by.py +++ b/nisystemlink/clients/file/models/_file_query_order_by.py @@ -10,9 +10,21 @@ class FileQueryOrderBy(Enum): LAST_UPDATED_TIMESTAMP = "lastUpdatedTimestamp" -class FileLinqQueryOrderBy(Enum): +class FileLinqQueryOrderBy(str, Enum): """Order Files LINQ Query by Metadata for POST /query-files-linq endpoint.""" + NAME = "name" + CREATED = "created" + UPDATED = "updated" + EXTENSION = "extension" + SIZE = "size" + WORKSPACE = "workspace" + + +class SearchFilesOrderBy(str, Enum): + """Order Files Search by Metadata for POST /search-files endpoint.""" + + NAME = "name" CREATED = "created" UPDATED = "updated" EXTENSION = "extension" diff --git a/nisystemlink/clients/file/models/_search_files_request.py b/nisystemlink/clients/file/models/_search_files_request.py new file mode 100644 index 00000000..c7371600 --- /dev/null +++ b/nisystemlink/clients/file/models/_search_files_request.py @@ -0,0 +1,11 @@ +from nisystemlink.clients.file.models._base_file_request import BaseFileRequest +from nisystemlink.clients.file.models._file_query_order_by import SearchFilesOrderBy + + +class SearchFilesRequest(BaseFileRequest): + """Request model for searching files.""" + + order_by: SearchFilesOrderBy | None = None + """ + The property by which to order the files in the response. + """ diff --git a/nisystemlink/clients/file/models/_search_files_response.py b/nisystemlink/clients/file/models/_search_files_response.py new file mode 100644 index 00000000..33ea9c32 --- /dev/null +++ b/nisystemlink/clients/file/models/_search_files_response.py @@ -0,0 +1,7 @@ +from ._base_file_response import BaseFileResponse + + +class SearchFilesResponse(BaseFileResponse): + """Response model for search files operation.""" + + pass diff --git a/poetry.lock b/poetry.lock index 4d51dd04..b0b6309b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -45,6 +45,18 @@ typing_extensions = {version = ">=4.5", markers = "python_version < \"3.13\""} [package.extras] trio = ["trio (>=0.31.0)"] +[[package]] +name = "backoff" +version = "2.2.1" +description = "Function decoration for backoff and retry" +optional = false +python-versions = ">=3.7,<4.0" +groups = ["dev"] +files = [ + {file = "backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8"}, + {file = "backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba"}, +] + [[package]] name = "black" version = "24.10.0" @@ -1553,4 +1565,4 @@ pyarrow = ["pyarrow"] [metadata] lock-version = "2.1" python-versions = "^3.10" -content-hash = "bf33275736a4381c4bb2bca4394b83ddcc4bfd3aa37edb927cdec587c0c46cc5" +content-hash = "d22edb04521c040d6362689f7118d092f97f1d3d2e21232b92fdad71b35f0f63" diff --git a/pyproject.toml b/pyproject.toml index 2be62858..732bef7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ pyarrow = { version = "^21.0.0", optional = true } pyarrow = ["pyarrow"] [tool.poetry.group.dev.dependencies] +backoff = "^2.2.1" black = ">=22.10,<25.0" flake8 = "^7.3.0" flake8-import-order = "^0.19.2" diff --git a/tests/integration/file/test_file_client.py b/tests/integration/file/test_file_client.py index 4ae5d7e5..84aab477 100644 --- a/tests/integration/file/test_file_client.py +++ b/tests/integration/file/test_file_client.py @@ -6,12 +6,16 @@ from random import choices, randint from typing import BinaryIO +import backoff # type: ignore import pytest # type: ignore from nisystemlink.clients.core import ApiException from nisystemlink.clients.file import FileClient from nisystemlink.clients.file.models import ( FileLinqQueryOrderBy, FileLinqQueryRequest, + SearchFilesOrderBy, + SearchFilesRequest, + TotalCountRelation, UpdateMetadataRequest, ) from nisystemlink.clients.file.utilities import rename_file @@ -227,7 +231,7 @@ def test__query_files_linq__filter_by_name_succeeds( assert response.available_files is not None assert response.total_count is not None assert response.total_count.value == 1 - assert response.total_count.relation == "eq" + assert response.total_count.relation == TotalCountRelation.EQUALS assert len(response.available_files) == 1 assert response.available_files[0].id == file_id assert response.available_files[0].created is not None @@ -262,4 +266,172 @@ def test__query_files_linq__filter_returns_no_results(self, client: FileClient): assert len(response.available_files) == 0 assert response.total_count is not None assert response.total_count.value == 0 + assert response.total_count.relation == TotalCountRelation.EQUALS + + def test__query_files_linq__total_count_relation_accepts_string( + self, client: FileClient, test_file, random_filename_extension + ): + """Test backward compatibility: TotalCountRelation should accept string values. + + TotalCountRelation was previously a plain str type. This test ensures that string + values like 'eq' and 'gte' are still accepted for backward compatibility. + """ + test_file(file_name=random_filename_extension) + + query_request = FileLinqQueryRequest( + filter=f'name == "{random_filename_extension}"', + ) + response = client.query_files_linq(query=query_request) + + assert response.total_count is not None + # Test that the relation can be compared with string values assert response.total_count.relation == "eq" + assert response.total_count.relation in ["eq", "gte"] + # Also verify enum comparison still works + assert response.total_count.relation == TotalCountRelation.EQUALS # type: ignore[comparison-overlap] + + def test__query_files_linq__skip_and_take_pagination( + self, client: FileClient, test_file + ): + """Test query_files_linq with skip and take for pagination.""" + # Upload 5 files to test pagination + NUM_FILES = 5 + file_ids = [] + file_prefix = f"{PREFIX}pagination_test_" + + for i in range(NUM_FILES): + file_name = f"{file_prefix}{i:02d}.bin" + file_id = test_file(file_name=file_name) + file_ids.append(file_id) + + # Query with skip=1, take=3 + query_request = FileLinqQueryRequest( + filter=f'name.StartsWith("{file_prefix}")', + skip=1, + take=3, + order_by=FileLinqQueryOrderBy.CREATED, + order_by_descending=False, + ) + response = client.query_files_linq(query=query_request) + + assert response.available_files is not None + assert response.total_count is not None + assert response.total_count.value == 3 # skip=1, take=3 + assert len(response.available_files) == 3 + + # Verify that we skipped the first file + returned_ids = [f.id for f in response.available_files] + for file_id in returned_ids: + assert file_id in file_ids + + # Verify skip=1 excluded the first file in creation order + returned_names = [ + f.properties.get("Name", "") + for f in response.available_files + if f.properties + ] + expected_skipped_file = f"{file_prefix}00.bin" + assert expected_skipped_file not in returned_names + + def test__search_files__succeeds( + self, client: FileClient, test_file, random_filename_extension: str + ): + """Test search_files with filtering, pagination, and ordering. + + Note: search_files() is not guaranteed to return newly created files immediately + due to indexing delay (a few seconds). Retry logic with backoff is used to handle + this eventual consistency behavior. + """ + # Upload 5 files to test various scenarios + NUM_FILES = 5 + file_ids = [] + file_prefix = f"{PREFIX}search_test_" + + for i in range(NUM_FILES): + file_name = f"{file_prefix}_{i}.bin" + file_id = test_file(file_name=file_name) + file_ids.append(file_id) + + # Search with filter (by name pattern), pagination, and ordering + search_request = SearchFilesRequest( + filter=f'(name: ("{file_prefix}*"))', + skip=1, + take=3, + order_by=SearchFilesOrderBy.NAME, + order_by_descending=True, + ) + + # Search with retry logic + @backoff.on_exception( + backoff.expo, + (AssertionError, ApiException), + max_tries=5, + max_time=10, + ) + def search_and_verify() -> None: + response = client.search_files(request=search_request) + + assert response.available_files is not None + assert response.total_count is not None + assert response.total_count.value == 3 + assert response.total_count.relation is not None + assert len(response.available_files) == 3 # skip=1, take=3 + + # Verify all fields in response + for file_metadata in response.available_files: + assert file_metadata.id in file_ids + assert file_metadata.properties is not None + assert file_metadata.properties.get("Name") is not None + assert file_metadata.properties.get("Name", "").startswith(file_prefix) + assert file_metadata.created is not None + assert isinstance(file_metadata.created, datetime) + assert file_metadata.updated is not None + assert isinstance(file_metadata.updated, datetime) + assert file_metadata.workspace is not None + assert file_metadata.size is not None + + # Verify descending order by name + returned_names = [ + f.properties.get("Name", "") + for f in response.available_files + if f.properties + ] + assert returned_names == sorted(returned_names, reverse=True) + + # Verify skip=1 excluded the first file in descending order + expected_skipped_file = f"{file_prefix}_4.bin" + assert expected_skipped_file not in returned_names + + search_and_verify() + + def test__search_files__no_filter_succeeds(self, client: FileClient, test_file): + test_file() + + search_request = SearchFilesRequest(skip=0, take=10) + response = client.search_files(request=search_request) + + assert response.available_files is not None + assert response.total_count is not None + assert response.total_count.value >= 1 + assert len(response.available_files) >= 1 + + def test__search_files__invalid_filter_raises(self, client: FileClient): + search_request = SearchFilesRequest(filter="invalid filter syntax") + + with pytest.raises(ApiException): + client.search_files(request=search_request) + + def test__search_files__filter_returns_no_results(self, client: FileClient): + unique_nonexistent_name = ( + f"{PREFIX}nonexistent_search_file_{randint(100000, 999999)}.random_ext" + ) + + search_request = SearchFilesRequest( + filter=f'(name: ("{unique_nonexistent_name}"))', skip=0, take=10 + ) + response = client.search_files(request=search_request) + + assert response.available_files is not None + assert len(response.available_files) == 0 + assert response.total_count is not None + assert response.total_count.value == 0