Spaces:
Sleeping
Sleeping
| import matplotlib.pyplot as plt | |
| import matplotlib | |
| matplotlib.use('agg') | |
| import plot_utils | |
| from constants import * | |
| class MatplotlibDataPlotter: | |
| def __init__(self, single_df, pair_df, num_domains_in_region_df): | |
| self.single_df = single_df | |
| self.pair_df = pair_df | |
| self.num_domains_in_region_df = num_domains_in_region_df | |
| self.single_domains_fig = plt.figure(figsize=(5, 10)) | |
| self.pair_domains_fig = plt.figure(figsize=(5, 10)) | |
| def plot_single_domains(self, num_domains, split_name="stratified"): | |
| selected_region_ids = self.num_domains_in_region_df.loc[ | |
| self.num_domains_in_region_df.num_domains >= num_domains, | |
| 'cds_region_id'].values | |
| single_df_subset = self.single_df.loc[self.single_df.cds_region_id.isin(selected_region_ids)] | |
| biosyn_counts_single = single_df_subset[['cds_region_id', 'biosyn_class']].drop_duplicates().groupby("biosyn_class", as_index=False).count() | |
| hue2count_single = dict(biosyn_counts_single.values) | |
| # split_name = 'stratified' | |
| column_name = f'cosine_similarity_{split_name}' | |
| # single_df_subset = single_df.loc[single_df.dom_location_len >= num_domains] | |
| selected_keyword_index = single_df_subset.groupby('cds_region_id').agg( | |
| {column_name: 'idxmax'} | |
| ).values.flatten() | |
| targets_list = single_df_subset.loc[selected_keyword_index, 'biosyn_class_index'].values | |
| label_list = single_df_subset.loc[selected_keyword_index, 'profile_name'].values | |
| top_n=5 | |
| bin_width=1 | |
| hue_group_offset=0.5 | |
| width=0.9 | |
| fig = self.single_domains_fig | |
| fig.clf() | |
| ax = fig.gca() | |
| plot_utils.draw_barplots( | |
| targets_list, | |
| label_list=label_list, | |
| top_n=top_n, | |
| bin_width=bin_width, | |
| hue_group_offset=hue_group_offset, | |
| hue_order=BIOSYN_CLASS_NAMES, | |
| hue2count=hue2count_single, | |
| width=width, | |
| ax=ax, | |
| show_legend=False, | |
| palette=COLOR_PALETTE | |
| ) | |
| fig.tight_layout() | |
| return fig | |
| def plot_pair_domains(self, num_domains, split_name="stratified"): | |
| selected_region_ids = self.num_domains_in_region_df.loc[ | |
| self.num_domains_in_region_df.num_domains >= num_domains, | |
| 'cds_region_id'].values | |
| pair_df_subset = self.pair_df.loc[self.pair_df.cds_region_id.isin(selected_region_ids)] | |
| biosyn_counts_pairs = pair_df_subset[['cds_region_id', 'biosyn_class']].drop_duplicates().groupby("biosyn_class", as_index=False).count() | |
| hue2count_pairs = dict(biosyn_counts_pairs.values) | |
| column_name = f'cosine_similarity_{split_name}' | |
| selected_keyword_index = pair_df_subset.groupby('cds_region_id').agg( | |
| {column_name: 'idxmax'} | |
| ).values.flatten() | |
| targets_list = pair_df_subset.loc[ | |
| selected_keyword_index, 'biosyn_class_index'].values | |
| label_list=pair_df_subset.loc[ | |
| selected_keyword_index, 'profile_name'].values | |
| top_n=5 | |
| bin_width=1 | |
| hue_group_offset=0.5 | |
| # hue_order=BIOSYN_CLASS_NAMES | |
| hue2count={} | |
| width=0.9 | |
| show_legend=False | |
| fig = self.pair_domains_fig | |
| fig.clf() | |
| ax = fig.gca() | |
| plot_utils.draw_barplots( | |
| targets_list, | |
| label_list=label_list, | |
| top_n=top_n, | |
| bin_width=bin_width, | |
| hue_group_offset=hue_group_offset, | |
| hue_order=BIOSYN_CLASS_NAMES, | |
| hue2count=hue2count_pairs, | |
| width=width, | |
| ax=ax, | |
| show_legend=show_legend, | |
| palette=COLOR_PALETTE | |
| ) | |
| fig.tight_layout() | |
| return fig #plt.gcf() | |