Source code for capalyzer.packet_builder.api


from os import makedirs
from os.path import isfile, getsize, dirname, join
from .summary_table_factory import SummaryTableFactory
import warnings

from ..constants import (
    CARD_RPKM,
    CARD_RPKMG,
    MEGARES_CLASS_RPKM,
    MEGARES_CLASS_RPKMG,
    MEGARES_GENE_RPKM,
    MEGARES_GENE_RPKMG,
    MEGARES_GROUP_RPKM,
    MEGARES_GROUP_RPKMG,
    MEGARES_MECH_RPKM,
    MEGARES_MECH_RPKMG,
    AVE_GENOME_SIZE,
    HMP_COMPARISON,
    MACROBES,
    READ_PROPORTIONS,
    READ_STATS,
    UNIREF90_COV,
    UNIREF90_RELAB,
    MPA_RELAB,
    KRAKENHLL_REFSEQ,
    KRAKENHLL_REFSEQ_MEDIUM,
    KRAKENHLL_REFSEQ_STRICT,
    KRAKENHLL_REFSEQ_LONG,
)



[docs]def write_csv(df_func, fname, overwrite=False, **kwargs): """Build a dataframe and write a csv.""" makedirs(dirname(fname), exist_ok=True) if (isfile(fname) and getsize(fname) > 0) and not overwrite: raise FileExistsError(f'{fname} exists, set overwrite to True to overwrite.') df = df_func(**kwargs) if sum(df.shape) == 0: warnings.warn(f'Table {fname} is empty.') if '.gz' in fname: df.to_csv(fname, compression='gzip') else: df.to_csv(fname) return fname
[docs]def make_long_taxa(dirname, tables, overwrite=False): """Make a long format taxa table.""" makedirs(tables, exist_ok=True) dff = SummaryTableFactory(dirname) def my_write_csv(df_func, fname, **kwargs): try: return write_csv(df_func, join(tables, fname), overwrite=overwrite, **kwargs) except Exception: print(f'{df_func} failed with file {fname}') raise yield my_write_csv(dff.taxonomy.krakenhll_long, KRAKENHLL_REFSEQ_LONG)
[docs]def make_all_tables(dirname, tables, overwrite=False): """Make a bunch of tables.""" makedirs(tables, exist_ok=True) dff = SummaryTableFactory(dirname) def my_write_csv(df_func, fname, **kwargs): try: return write_csv(df_func, join(tables, fname), overwrite=overwrite, **kwargs) except Exception: print(f'{df_func} failed with file {fname}') # raise yield my_write_csv(dff.taxonomy.krakenhll, KRAKENHLL_REFSEQ) yield my_write_csv(dff.taxonomy.krakenhll, KRAKENHLL_REFSEQ_STRICT, level='strict') yield my_write_csv(dff.taxonomy.krakenhll, KRAKENHLL_REFSEQ_MEDIUM, level='medium') yield my_write_csv(dff.taxonomy.krakenhll_long, KRAKENHLL_REFSEQ_LONG) yield my_write_csv(dff.taxonomy.metaphlan2, MPA_RELAB) yield my_write_csv(dff.amr.mech, MEGARES_MECH_RPKM) yield my_write_csv(dff.amr.gene, MEGARES_GENE_RPKM) yield my_write_csv(dff.amr.classus, MEGARES_CLASS_RPKM) yield my_write_csv(dff.amr.group, MEGARES_GROUP_RPKM) yield my_write_csv(dff.amr.mech, MEGARES_MECH_RPKMG, rpkmg=True) yield my_write_csv(dff.amr.gene, MEGARES_GENE_RPKMG, rpkmg=True) yield my_write_csv(dff.amr.classus, MEGARES_CLASS_RPKMG, rpkmg=True) yield my_write_csv(dff.amr.group, MEGARES_GROUP_RPKMG, rpkmg=True) yield my_write_csv(dff.amr.card_rpkm, CARD_RPKM) yield my_write_csv(dff.amr.card_rpkmg, CARD_RPKMG) yield my_write_csv(dff.macrobes.table, MACROBES) yield my_write_csv(dff.ags.tbl, AVE_GENOME_SIZE) yield my_write_csv(dff.hmp.raw_table, HMP_COMPARISON) yield my_write_csv(dff.readprops.table, READ_PROPORTIONS) yield my_write_csv(dff.readstats.table, READ_STATS) yield my_write_csv(dff.pathways.pathways, UNIREF90_RELAB) yield my_write_csv(dff.pathways.pathways, UNIREF90_COV, coverage=True)