0% found this document useful (0 votes)
2 views

code

The document outlines a Python script that processes an Excel file to compute mean values for different groups and simplifies taxon names. It prepares the data for plotting and identifies the top 20 taxa based on mean abundance, followed by creating a bar plot without error bars. Finally, the generated plot is saved in both PNG and TIFF formats.
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

code

The document outlines a Python script that processes an Excel file to compute mean values for different groups and simplifies taxon names. It prepares the data for plotting and identifies the top 20 taxa based on mean abundance, followed by creating a bar plot without error bars. Finally, the generated plot is saved in both PNG and TIFF formats.
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 2

import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Reload the Excel file


file_path = "/mnt/data/processed_sample_analysis.xlsx"
mean_df = pd.read_excel(file_path)

# Recompute group means


mean_df["G1"] = mean_df[["G1_6w_mean", "G1_10w_mean"]].mean(axis=1)
mean_df["G2"] = mean_df[["G2_6w_mean", "G2_10w_mean"]].mean(axis=1)
mean_df["G3"] = mean_df[["G3_6w_mean", "G3_10w_mean"]].mean(axis=1)

# Simplify taxon name


def simplify_taxonomy(clade):
levels = clade.split('|')
known = [l for l in levels if '__' in l and not any(x in l for x in ['GGB',
'SGB', 'CFGB', 'OFGB'])]
return known[-1] if known else clade

mean_df["Taxon"] = mean_df["clade_name"].apply(simplify_taxonomy)

# Prepare data for plotting


plot_ready_df = mean_df[["Taxon", "G1", "G2", "G3"]].melt(id_vars="Taxon",
var_name="Group", value_name="Abundance")
plot_ready_df["log10_abundance"] = np.log10(plot_ready_df["Abundance"] + 1e-6)
plot_ready_df["Group_Label"] = plot_ready_df["Group"].map({"G1": "ND", "G2": "HFD",
"G3": "HFD+EF-2001"})

# Get top 20 taxa


plot_ready_df["Mean_Abundance"] = plot_ready_df.groupby("Taxon")
["Abundance"].transform("mean")
top_20_df = plot_ready_df.sort_values("Mean_Abundance",
ascending=False).drop_duplicates("Taxon").head(20)
top_taxa_names = top_20_df["Taxon"].tolist()
top_plot_df = plot_ready_df[plot_ready_df["Taxon"].isin(top_taxa_names)]

# Organize plotting order


top_plot_df["Group_Order"] = top_plot_df["Group"].map({"G1": 0, "G2": 1, "G3": 2})
top_plot_df["Taxon_Group"] = top_plot_df["Taxon"] + " (" +
top_plot_df["Group_Label"] + ")"
top_plot_df["Sort_Index"] = top_plot_df["Group_Order"] * 1000 + top_plot_df.index

# Plot without error bars


fig, ax = plt.subplots(figsize=(10, 12))
sns.barplot(
data=top_plot_df.sort_values("Sort_Index"),
x="log10_abundance",
y="Taxon_Group",
hue="Group_Label",
dodge=False,
ci=None, # No error bars
palette={"ND": "green", "HFD": "blue", "HFD+EF-2001": "red"},
ax=ax
)

ax.set_xlabel("LDA SCORE (log10 abundance)", fontsize=12)


ax.set_ylabel("Taxonomic Group by Sample Group", fontsize=12)
ax.set_title("Top 20 Discriminative Taxa", fontsize=14)
ax.legend(title="Group")
plt.tight_layout()

# Save images
png_path_no_error = "/mnt/data/top20_taxa_grouped_plot_no_error.png"
tiff_path_no_error = "/mnt/data/top20_taxa_grouped_plot_no_error.tiff"
fig.savefig(png_path_no_error, dpi=600)
fig.savefig(tiff_path_no_error, dpi=600)

png_path_no_error, tiff_path_no_error

You might also like