Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 46 additions & 3 deletions gemmapy/gemmapy_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,18 @@
import logging
import os
import subprocess
import tarfile
import tempfile
import warnings
from getpass import getpass
from io import StringIO
from typing import Optional, List, Callable
from io import StringIO, BytesIO
from os.path import join
from typing import Optional, List, Callable, Any

import anndata as ad
import numpy as np
import pandas as pd
import scanpy
from anndata import AnnData
from pandas import DataFrame

Expand Down Expand Up @@ -1667,7 +1671,46 @@ def make_anndata(pack):
pass
return out

def get_differential_expression_values(self,
def get_single_cell_dataset_object(self, dataset: str | int,
download_dir=None) -> AnnData:
"""
:param download_dir: Directory where datasets can be downloaded, or else
the data will be retrieved in-memory.
:return:
"""

def resolve():
if download_dir:
dest = join(download_dir, dataset + '.tar')
if not os.path.exists(dest):
logger.info('Downloading single-cell data for %s to %s...',
dataset, download_dir)
with open(dest, 'wb') as f:
f.write(self.raw.get_dataset_single_cell_expression(
dataset))
return open(dest, 'rb')
else:
logger.info("Downloading single-cell data data for %s...",
str(dataset))
return BytesIO(
self.raw.get_dataset_single_cell_expression(dataset))

with (resolve() as f, tarfile.open(fileobj=f) as tf,
tempfile.TemporaryDirectory() as tmpdir):
logger.info('Extracting TAR file for %s to %s...', str(dataset),
tmpdir)
tf.extractall(tmpdir)
samples = []
for sample_dir in os.listdir(tmpdir):
logger.info('Reading MEX data for %s...', sample_dir)
# Gemma already guarantees unicity of cell identifiers and
# scanpy cannot deal with numeric gene identifiers when
# make_unique is True, so we skip that part
samples.append(scanpy.read_10x_mtx(join(tmpdir, sample_dir),
make_unique=False))
return scanpy.concat(samples, axis="var")

def get_differential_expression_values(self,
dataset:Optional[str|int] = None,
keep_non_specific:bool = False,
result_sets:Optional[List[str|int]] = None,
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ install_requires =
pandas
numpy
anndata
scanpy
typing

#[options.packages.find]
Expand Down
13 changes: 13 additions & 0 deletions tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,19 @@ def test_auth(monkeypatch):
monkeypatch.setitem(os.environ, 'GEMMA_PASSWORD_CMD', '')
gemmapy.GemmaPy()

def test_get_single_cell_data():
# TODO: use a publicly available dataset
client = gemmapy.GemmaPy()
ad = client.get_single_cell_dataset_object('GSE227313', download_dir='.')
print(ad)

def test_get_genes():
assert len(api.get_genes('BRCA1')) > 0
assert len(api.get_genes(['BRCA1'])) > 0
assert len(api.get_genes(672)) > 0
assert len(api.get_genes([672])) > 0
assert len(api.get_genes([672, 'BRCA1'])) > 0

def test_get_result_sets():
res = api.get_result_sets([200])

Expand Down
Loading