-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcode2_scatter_plot.py
77 lines (65 loc) · 2.57 KB
/
code2_scatter_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import pandas as pd
import matplotlib.pyplot as plt
import zipfile
from google.colab import files
def upload_file():
print("Please upload your CSV file:")
uploaded = files.upload()
filename = next(iter(uploaded))
return filename
def preprocess_data(filename):
df = pd.read_csv(filename)
df = df.drop(columns=['Alternate_ID', 'Identified_Proteins']) # Drop unused identifier columns
# Extract condition names and group by mean of replicates
condition_names = df.columns[1:] # assuming 'Accession_Number' is the first column
conditions = {name.split('-')[0] for name in condition_names}
condition_data = {cond: df.filter(regex=f'^{cond}').mean(axis=1) for cond in conditions}
return condition_data, list(conditions)
def get_condition_pairs(conditions):
pairs = []
while True:
print("Available conditions:", conditions)
cond1 = input("Enter name of Condition 1 (or type 'done' to finish): ")
if cond1.lower() == 'done':
break
cond2 = input("Enter name of Condition 2: ")
if cond1 in conditions and cond2 in conditions:
pairs.append((cond1, cond2))
else:
print("Invalid conditions entered. Please try again.")
return pairs
def plot_conditions(condition_data, pairs):
# Font size settings
title_fontsize = 16
label_fontsize = 14
tick_fontsize = 12
svg_files = []
for cond1, cond2 in pairs:
x = condition_data[cond1]
y = condition_data[cond2]
plt.figure(figsize=(6, 6))
plt.scatter(x, y, alpha=0.6, edgecolors='w')
plt.title(f'{cond1} vs {cond2}', fontsize=title_fontsize)
plt.xlabel(f'{cond1} Mean Values', fontsize=label_fontsize)
plt.ylabel(f'{cond2} Mean Values', fontsize=label_fontsize)
plt.xticks(fontsize=tick_fontsize)
plt.yticks(fontsize=tick_fontsize)
plt.grid(True)
file_name = f'{cond1}_vs_{cond2}_scatter.svg'
plt.savefig(file_name, format='svg', bbox_inches='tight')
plt.show() # Display each plot as it is generated
svg_files.append(file_name)
return svg_files
def zip_and_download_files(svg_files):
zip_filename = 'scatter_plots.zip'
with zipfile.ZipFile(zip_filename, 'w') as zipf:
for file in svg_files:
zipf.write(file)
files.download(zip_filename)
def main():
filename = upload_file()
condition_data, conditions = preprocess_data(filename)
pairs = get_condition_pairs(conditions)
svg_files = plot_conditions(condition_data, pairs)
zip_and_download_files(svg_files)
main()