Skip to content

Commit ba76bee

Browse files
committed
Release v0.10.0
1 parent 155fa99 commit ba76bee

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

87 files changed

+48769
-3025
lines changed

openprotein/__init__.py

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
isort:skip_file
77
"""
88

9-
from typing import TYPE_CHECKING
10-
import warnings
9+
import os
10+
from pathlib import Path
1111

1212
from openprotein._version import __version__
1313
from openprotein.data import DataAPI
@@ -161,4 +161,48 @@ def models(self) -> "ModelsAPI":
161161
return self._models
162162

163163

164-
connect = OpenProtein
164+
def connect(
165+
username: str | None = None,
166+
password: str | None = None,
167+
backend: str | None = None,
168+
timeout: int = 180,
169+
) -> OpenProtein:
170+
"""
171+
Connect and create a :py:class:`OpenProtein` session.
172+
173+
Parameters
174+
----------
175+
username : str, optional
176+
The username of the user. If not provided, taken from the
177+
environment variable ``OPENPROTEIN_USERNAME`` or a
178+
configuration file at ``~/.openprotein/config.toml``.
179+
password : str, optional
180+
The password of the user. If not provided, taken from the
181+
environment variable ``OPENPROTEIN_PASSWORD`` or a
182+
configuration file at ``~/.openprotein/config.toml``.
183+
184+
Examples
185+
--------
186+
>>> session = openprotein.connect("username", "password")
187+
"""
188+
189+
CREDENTIALS_FILE_PATH = Path.home() / ".openprotein/config.toml"
190+
if CREDENTIALS_FILE_PATH.exists():
191+
import tomli
192+
193+
with open(CREDENTIALS_FILE_PATH, "rb") as f:
194+
file_config = tomli.load(f)
195+
else:
196+
file_config = {}
197+
USERNAME = os.getenv("OPENPROTEIN_USERNAME", str(file_config.get("username")))
198+
PASSWORD = os.getenv("OPENPROTEIN_PASSWORD", str(file_config.get("password")))
199+
BACKEND = os.getenv(
200+
"OPENPROTEIN_API_BACKEND",
201+
file_config.get("backend", "https://api.openprotein.ai/api/"),
202+
)
203+
return OpenProtein(
204+
username=username or USERNAME,
205+
password=password or PASSWORD,
206+
backend=backend or BACKEND,
207+
timeout=timeout,
208+
)

openprotein/align/align.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from openprotein.base import APISession
88
from openprotein.errors import DeprecationError
99
from openprotein.jobs import Job
10-
from openprotein.protein import Protein
10+
from openprotein.molecules import Protein
1111

1212
from . import api
1313
from .msa import MSAFuture
@@ -285,7 +285,7 @@ def upload_msa(self, msa_file: BinaryIO) -> MSAFuture:
285285
session=self.session, job=api.msa_post(self.session, msa_file=msa_file)
286286
)
287287

288-
def create_msa(self, seed: bytes) -> MSAFuture:
288+
def create_msa(self, seed: bytes | str) -> MSAFuture:
289289
"""
290290
Construct an MSA via homology search with the seed sequence.
291291

openprotein/base.py

Lines changed: 23 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
1-
import os
21
import sys
32
import warnings
4-
from collections.abc import Container, Mapping
5-
from typing import Union
3+
from typing import Mapping, Sequence
64
from urllib.parse import urljoin
75

86
import requests
@@ -13,10 +11,6 @@
1311
import openprotein.config as config
1412
from openprotein.errors import APIError, AuthError, HTTPError
1513

16-
USERNAME = os.getenv("OPENPROTEIN_USERNAME")
17-
PASSWORD = os.getenv("OPENPROTEIN_PASSWORD")
18-
BACKEND = os.getenv("OPENPROTEIN_API_BACKEND", "https://api.openprotein.ai/api/")
19-
2014

2115
class BearerAuth(requests.auth.AuthBase):
2216
"""
@@ -34,29 +28,18 @@ def __call__(self, r):
3428
class APISession(requests.Session):
3529
"""
3630
A class to handle API sessions. This class provides a connection session to the OpenProtein API.
37-
38-
Parameters
39-
----------
40-
username : str
41-
The username of the user.
42-
password : str
43-
The password of the user.
44-
45-
Examples
46-
--------
47-
>>> session = APISession("username", "password")
4831
"""
4932

5033
def __init__(
5134
self,
52-
username: str | None = USERNAME,
53-
password: str | None = PASSWORD,
54-
backend: str = BACKEND,
35+
username: str,
36+
password: str,
37+
backend: str,
5538
timeout: int = 180,
5639
):
5740
if not username or not password:
5841
raise AuthError(
59-
"Expected username and password. Or use environment variables `OPENPROTEIN_USERNAME` and `OPENPROTEIN_PASSWORD`"
42+
"Expected username and password. Or use environment variables `OPENPROTEIN_USERNAME` and `OPENPROTEIN_PASSWORD`. Or provide these variables (`username` and `password`) in ~/.openprotein/config.toml."
6043
)
6144
super().__init__()
6245
self.backend = backend
@@ -132,7 +115,7 @@ def request(self, method: str, url: str, *args, **kwargs):
132115
response = super().request(method, full_url, *args, **kwargs)
133116

134117
if (js := kwargs.get("json")) and js is not None:
135-
if total_size(js) > 1e6:
118+
if _total_size(js) > 1e6:
136119
warnings.warn(
137120
"The requested payload is >1MB. There might be some delays or issues in processing. If the request fails, please try again with smaller sizes."
138121
)
@@ -153,7 +136,7 @@ def request(self, method: str, url: str, *args, **kwargs):
153136
return response
154137

155138

156-
def total_size(o, seen=None):
139+
def _total_size(o: Sequence | Mapping, seen=None):
157140
"""Recursively finds size of objects including contents."""
158141
if seen is None:
159142
seen = set()
@@ -163,19 +146,27 @@ def total_size(o, seen=None):
163146
seen.add(obj_id)
164147
size = sys.getsizeof(o)
165148
if isinstance(o, dict):
166-
size += sum((total_size(k, seen) + total_size(v, seen)) for k, v in o.items())
149+
size += sum((_total_size(k, seen) + _total_size(v, seen)) for k, v in o.items())
167150
elif isinstance(o, (list, tuple, set, frozenset)):
168-
size += sum(total_size(i, seen) for i in o)
151+
size += sum(_total_size(i, seen) for i in o)
169152
return size
170153

171154

172-
class RestEndpoint:
173-
pass
174-
175-
176155
class TimeoutError(requests.exceptions.HTTPError):
177-
pass
156+
"""
157+
An Exception raised due to timeout, possibly from overly large
158+
requests.
159+
"""
178160

179161

180162
class CloudFrontError(requests.exceptions.HTTPError):
181-
pass
163+
"""
164+
An Exception raised due to CloudFront.
165+
166+
This is usually due to the strict timeout from CloudFront.
167+
AWS CloudFront limits responses to return within 2 minutes.
168+
This can be a bit prohibitive for our system that tends to
169+
deal with large data. It is usually safe to just ignore/retry upon
170+
hitting this error. Our system will scale up and still handle
171+
the job.
172+
"""

openprotein/chains.py

Lines changed: 9 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1,88 +1,12 @@
1-
"""Additional chains that can be used with OpenProtein."""
1+
import warnings
22

3-
from dataclasses import dataclass
3+
warnings.warn(
4+
"openprotein.chains is deprecated and will be removed in v0.11. "
5+
"Use `from openprotein.molecules import DNA, RNA, Ligand` instead.",
6+
FutureWarning,
7+
stacklevel=2,
8+
)
49

10+
from openprotein.molecules import DNA, RNA, Ligand
511

6-
@dataclass
7-
class DNA:
8-
"""
9-
Represents a DNA sequence.
10-
11-
Attributes:
12-
sequence (str): The nucleotide sequence of the DNA.
13-
"""
14-
15-
sequence: str
16-
chain_id: str | list[str] | None = None
17-
cyclic: bool = False
18-
19-
def __init__(
20-
self,
21-
sequence: str,
22-
chain_id: str | list[str] | None = None,
23-
cyclic: bool = False,
24-
):
25-
# validate the sequence matches DNA
26-
if not all(nt in set("ACGT") for nt in sequence.upper()):
27-
raise ValueError("Sequence contains invalid DNA nucleotides.")
28-
self.sequence = sequence
29-
self.chain_id = chain_id
30-
self.cyclic = cyclic
31-
32-
33-
@dataclass
34-
class RNA:
35-
"""
36-
Represents an RNA sequence.
37-
38-
Attributes:
39-
sequence (str): The nucleotide sequence of the RNA.
40-
"""
41-
42-
sequence: str
43-
chain_id: str | list[str] | None = None
44-
cyclic: bool = False
45-
46-
def __init__(
47-
self,
48-
sequence: str,
49-
chain_id: str | list[str] | None = None,
50-
cyclic: bool = False,
51-
):
52-
# validate the sequence matches RNA
53-
if not all(nt in set("ACGU") for nt in sequence.upper()):
54-
raise ValueError("Sequence contains invalid RNA nucleotides.")
55-
self.sequence = sequence
56-
self.chain_id = chain_id
57-
self.cyclic = cyclic
58-
59-
60-
@dataclass
61-
class Ligand:
62-
"""
63-
Represents a ligand with optional Chemical Component Dictionary (CCD) identifier and SMILES string.
64-
65-
Requires either a CCD identifier or SMILES string.
66-
67-
Attributes:
68-
ccd (str | None): The CCD identifier for the ligand.
69-
smiles (str | None): The SMILES representation of the ligand.
70-
"""
71-
72-
chain_id: str | list[str] | None = None
73-
ccd: str | None = None
74-
smiles: str | None = None
75-
76-
def __init__(
77-
self,
78-
*,
79-
chain_id: str | list[str] | None = None,
80-
ccd: str | None = None,
81-
smiles: str | None = None,
82-
):
83-
self.chain_id = chain_id
84-
if (ccd is None and smiles is None) or (ccd is not None and smiles is not None):
85-
raise ValueError("Exactly one of 'ccd' or 'smiles' must be provided.")
86-
# TODO add validation
87-
self.ccd = ccd
88-
self.smiles = smiles
12+
__all__ = ["DNA", "RNA", "Ligand"]

openprotein/common/model_metadata.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ class ModelMetadata(BaseModel):
2626
id: str = Field(..., alias="model_id")
2727
description: ModelDescription
2828
max_sequence_length: int | None = None
29-
dimension: int
30-
output_types: list[str]
31-
input_tokens: list[str] | None
29+
dimension: int | None = None
30+
output_types: list[str] | None = None
31+
input_tokens: list[str] | None = None
3232
output_tokens: list[str] | None = None
33-
token_descriptions: list[list[TokenInfo]]
33+
token_descriptions: list[list[TokenInfo]] | None = None

openprotein/common/reduction.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,14 @@
55

66

77
class ReductionType(str, Enum):
8+
"""
9+
ReductionType is an enumeration of the possible reduction types available.
10+
11+
Attributes:
12+
MEAN : Mean reduction takes the mean of the embeddings across the sequence length dimension.
13+
SUM : Sum reduction takes the sum of the embeddings across the sequence length dimension.
14+
"""
15+
816
MEAN = "MEAN"
917
SUM = "SUM"
1018

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
# fmt: off
2+
residue_atoms = {
3+
"ALA": ["C", "CA", "CB", "N", "O"],
4+
"ARG": ["C", "CA", "CB", "CG", "CD", "CZ", "N", "NE", "O", "NH1", "NH2"],
5+
"ASP": ["C", "CA", "CB", "CG", "N", "O", "OD1", "OD2"],
6+
"ASN": ["C", "CA", "CB", "CG", "N", "ND2", "O", "OD1"],
7+
"CYS": ["C", "CA", "CB", "N", "O", "SG"],
8+
"GLU": ["C", "CA", "CB", "CG", "CD", "N", "O", "OE1", "OE2"],
9+
"GLN": ["C", "CA", "CB", "CG", "CD", "N", "NE2", "O", "OE1"],
10+
"GLY": ["C", "CA", "N", "O"],
11+
"HIS": ["C", "CA", "CB", "CG", "CD2", "CE1", "N", "ND1", "NE2", "O"],
12+
"ILE": ["C", "CA", "CB", "CG1", "CG2", "CD1", "N", "O"],
13+
"LEU": ["C", "CA", "CB", "CG", "CD1", "CD2", "N", "O"],
14+
"LYS": ["C", "CA", "CB", "CG", "CD", "CE", "N", "NZ", "O"],
15+
"MET": ["C", "CA", "CB", "CG", "CE", "N", "O", "SD"],
16+
"PHE": ["C", "CA", "CB", "CG", "CD1", "CD2", "CE1", "CE2", "CZ", "N", "O"],
17+
"PRO": ["C", "CA", "CB", "CG", "CD", "N", "O"],
18+
"SER": ["C", "CA", "CB", "N", "O", "OG"],
19+
"THR": ["C", "CA", "CB", "CG2", "N", "O", "OG1"],
20+
"TRP": [
21+
"C",
22+
"CA",
23+
"CB",
24+
"CG",
25+
"CD1",
26+
"CD2",
27+
"CE2",
28+
"CE3",
29+
"CZ2",
30+
"CZ3",
31+
"CH2",
32+
"N",
33+
"NE1",
34+
"O",
35+
],
36+
"TYR": [
37+
"C",
38+
"CA",
39+
"CB",
40+
"CG",
41+
"CD1",
42+
"CD2",
43+
"CE1",
44+
"CE2",
45+
"CZ",
46+
"N",
47+
"O",
48+
"OH",
49+
],
50+
"VAL": ["C", "CA", "CB", "CG1", "CG2", "N", "O"],
51+
}
52+
53+
restype_1to3 = {
54+
"A": "ALA",
55+
"R": "ARG",
56+
"N": "ASN",
57+
"D": "ASP",
58+
"C": "CYS",
59+
"Q": "GLN",
60+
"E": "GLU",
61+
"G": "GLY",
62+
"H": "HIS",
63+
"I": "ILE",
64+
"L": "LEU",
65+
"K": "LYS",
66+
"M": "MET",
67+
"F": "PHE",
68+
"P": "PRO",
69+
"S": "SER",
70+
"T": "THR",
71+
"W": "TRP",
72+
"Y": "TYR",
73+
"V": "VAL",
74+
}
75+
# fmt: on

0 commit comments

Comments
 (0)