|
| 1 | +#!/bin/python3 |
| 2 | + |
| 3 | +import argparse |
| 4 | +import arxiv |
| 5 | +import re |
| 6 | +import os |
| 7 | +import textwrap |
| 8 | + |
| 9 | + |
| 10 | +def _first_non_stopword(title: str) -> str: |
| 11 | + for word in re.split("\W", title.lower()): |
| 12 | + if word in ("a", "an", "the", "is", "are", "what", "who", "your"): |
| 13 | + continue |
| 14 | + return word |
| 15 | + raise ValueError(f'The title seems to have only stopwords! "{title}"') |
| 16 | + |
| 17 | + |
| 18 | +def _author_lastname(author_name: str) -> str: |
| 19 | + return author_name.split(" ")[-1].lower() |
| 20 | + |
| 21 | + |
| 22 | +def get_info(paper_id: str, out_dir: str) -> None: |
| 23 | + search = arxiv.Search(id_list=[paper_id]) |
| 24 | + paper = next(search.results()) |
| 25 | + |
| 26 | + summary = ( |
| 27 | + paper.summary.replace("\n\n", "@@--@@") |
| 28 | + .replace("\n", " ") |
| 29 | + .replace("@@--@@", "\n\n") |
| 30 | + ) |
| 31 | + |
| 32 | + tmpl = textwrap.dedent( |
| 33 | + f""" |
| 34 | + --- |
| 35 | + layout: publication |
| 36 | + title: "{paper.title}" |
| 37 | + authors: {", ".join(a.name for a in paper.authors)} |
| 38 | + conference: |
| 39 | + year: {paper.published.year} |
| 40 | + additional_links: |
| 41 | + - {{name: "ArXiV", url: "https://arxiv.org/abs/{paper_id}"}} |
| 42 | + tags: ["TODO"] |
| 43 | + --- |
| 44 | + {summary} |
| 45 | + """ |
| 46 | + ) |
| 47 | + |
| 48 | + filename = f"{_author_lastname(paper.authors[0].name)}{paper.published.year}{_first_non_stopword(paper.title)}.markdown" |
| 49 | + with open(os.path.join(out_dir, filename), "w") as f: |
| 50 | + f.write(tmpl) |
| 51 | + |
| 52 | + print(f'Output at: {filename}') |
| 53 | + |
| 54 | +if __name__ == "__main__": |
| 55 | + parser = argparse.ArgumentParser() |
| 56 | + parser.add_argument("paper_id", help="The id of the paper to retrieve.") |
| 57 | + parser.add_argument("out_path", help="The path to output the file.") |
| 58 | + args = parser.parse_args() |
| 59 | + |
| 60 | + get_info(args.paper_id, args.out_path) |
0 commit comments