Source code for scripts.metrics.eval_overlap

import argparse
import logging
import pathlib
from typing import List

import numpy as np
import pandas as pd


[docs] def get_args() -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument("--input", "-i", nargs="+", help="List of path to meta signatures stored in .csv.") parser.add_argument("--output", "-o", type=str, help="Final results.") args = parser.parse_args() return args
[docs] def get_score(overlap: pd.DataFrame) -> float: contribution = overlap.copy() score = 0 for _ in overlap.columns: if contribution.shape[0] == 0: break max_idx = np.unravel_index(np.argmax(contribution.values, axis=None), contribution.values.shape) score += contribution.iloc[max_idx] contribution = contribution.drop(index=contribution.index[max_idx[0]], columns=contribution.columns[max_idx[1]]) return score / overlap.shape[1]
[docs] def main() -> None: args = get_args() results = pd.DataFrame(index=["scores"]) for overlap_path in args.input: overlap_path = pathlib.Path(overlap_path) n_cluster = overlap_path.parent.stem overlap = pd.read_csv(overlap_path, index_col=0) score = get_score(overlap) results[n_cluster] = score results.to_csv(args.output)
if __name__ == "__main__": main()