From f98d139784377477619cb512033452682fd01d3a Mon Sep 17 00:00:00 2001 From: Xavier Medrano Date: Thu, 10 Jul 2025 16:40:15 -0400 Subject: [PATCH 1/3] include stub file for package methods --- usaddress/__init__.pyi | 109 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 usaddress/__init__.pyi diff --git a/usaddress/__init__.pyi b/usaddress/__init__.pyi new file mode 100644 index 0000000..2271935 --- /dev/null +++ b/usaddress/__init__.pyi @@ -0,0 +1,109 @@ +import typing + +def parse(address_string: str) -> list[tuple[str, str]]: + """ + Split an address string into components, and label each component. + + Args: + address_string (str): The address to parse + + Returns: + list[ tuple[str, str] ]: The parsed address + """ + ... + + +def tag(address_string: str, tag_mapping=None) -> tuple[dict[str, str], str]: + """ + Parse and merge consecutive components & strip commas. + Also return an address type (`Street Address`, `Intersection`, `PO Box`, or `Ambiguous`). + + Because this method returns an OrderedDict with labels as keys, it will throw a + `RepeatedLabelError` error when multiple areas of an address have the same label, + and thus can't be concatenated. When `RepeatedLabelError` is raised, it is likely + that either (1) the input string is not a valid address, or (2) some tokens were + labeled incorrectly. + + It is also possible to pass a mapping dict to this method to remap the labels to your own format. + + Args: + address_string (str): The address to parse + tag_mapping (dict): Optional - The tags you'd like to remap, formatted as: `{'OldTag': 'NewTag'}` + + Returns: + tuple[ dict[str, str], str ]: The tagged address + + """ + ... + + +def tokenize(address_string: str) -> list[str]: + """ + Split each component of an address into a list of unlabeled tokens. + + Args: + address_string (str): The address to tokenize + + Returns: + list[str]: The tokenized address + """ + ... + + +Feature = dict[str, typing.Union[str, bool, "Feature"]] + + +def tokenFeatures(token: str) -> Feature: + """ + Return a `Feature` dict with attributes that describe a token. + + Args: + token (str): The token to analyze + + Returns: + Feature: A type of dict with attributes that describe the token + (`abbrev`, `digits`, `word`, `trailing.zeros`, `length`, `endsinpunc`, `directional`, `street_name`, `has.vowels`) + """ + ... + + +def tokens2features(address: list[str]) -> list[Feature]: + """ + Turn every token into a `Feature` dict, and return a list of each token as a `Feature`. + Each attribute in a `Feature` describes the corresponding token. + + Args: + address list[str]: The address as a list of tokens. + + Returns: + list[Feature]: A list of all tokens with various details about each one. + """ + ... + + +def digits(token: str) -> typing.Literal["all_digits", "some_digits", "no_digits"]: + """ + Identify whether the token string is all digits, has some digits, or has no digits + + Args: + token (str): The token to parse + + Returns: + str: A label denoting the presence of digits in the token (`all_digits`, `some_digits`, or `no_digits`) + """ + ... + + +# for some reason mypy can't believe that this will return a str as of 10/2024 +def trailingZeros(token: str) -> str: + """ + Return any trailing zeros found at the end of a token. + If none are found, then return an empty string. + + Args: + token (str): The token to search for zeros. + + Returns: + str: The trailing zeros found, if any. Otherwise, an empty string. + """ + ... From 502ab2b836026e832b801e478838e2843b8284af Mon Sep 17 00:00:00 2001 From: Xavier Medrano Date: Thu, 10 Jul 2025 17:11:15 -0400 Subject: [PATCH 2/3] bump version number --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 63a0a41..e057955 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "usaddress" -version = "0.5.15" +version = "0.5.16" description = "Parse US addresses using conditional random fields" readme = "README.md" license = {text = "MIT License", url = "http://www.opensource.org/licenses/mit-license.php"} From da714796367c086fda5c9440390b1907bbe21621 Mon Sep 17 00:00:00 2001 From: Xavier Medrano Date: Thu, 10 Jul 2025 17:15:00 -0400 Subject: [PATCH 3/3] lint --- usaddress/__init__.pyi | 7 ------- 1 file changed, 7 deletions(-) diff --git a/usaddress/__init__.pyi b/usaddress/__init__.pyi index 2271935..80e3a47 100644 --- a/usaddress/__init__.pyi +++ b/usaddress/__init__.pyi @@ -12,7 +12,6 @@ def parse(address_string: str) -> list[tuple[str, str]]: """ ... - def tag(address_string: str, tag_mapping=None) -> tuple[dict[str, str], str]: """ Parse and merge consecutive components & strip commas. @@ -36,7 +35,6 @@ def tag(address_string: str, tag_mapping=None) -> tuple[dict[str, str], str]: """ ... - def tokenize(address_string: str) -> list[str]: """ Split each component of an address into a list of unlabeled tokens. @@ -49,10 +47,8 @@ def tokenize(address_string: str) -> list[str]: """ ... - Feature = dict[str, typing.Union[str, bool, "Feature"]] - def tokenFeatures(token: str) -> Feature: """ Return a `Feature` dict with attributes that describe a token. @@ -66,7 +62,6 @@ def tokenFeatures(token: str) -> Feature: """ ... - def tokens2features(address: list[str]) -> list[Feature]: """ Turn every token into a `Feature` dict, and return a list of each token as a `Feature`. @@ -80,7 +75,6 @@ def tokens2features(address: list[str]) -> list[Feature]: """ ... - def digits(token: str) -> typing.Literal["all_digits", "some_digits", "no_digits"]: """ Identify whether the token string is all digits, has some digits, or has no digits @@ -93,7 +87,6 @@ def digits(token: str) -> typing.Literal["all_digits", "some_digits", "no_digits """ ... - # for some reason mypy can't believe that this will return a str as of 10/2024 def trailingZeros(token: str) -> str: """