Skip to content

Commit

Permalink
Improved data handling in memory
Browse files Browse the repository at this point in the history
Still suboptimal but performs better than previous implementation
  • Loading branch information
npalacioescat committed Jun 18, 2024
1 parent 05f576b commit 19b0f64
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 10 deletions.
2 changes: 0 additions & 2 deletions src/funki/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
app.layout = html.Div(
children=[
dcc.Store(id='data', storage_type=storage_type),
#dcc.Store(id='ann-data', storage_type=storage_type),
#dcc.Store(id='proc-data', storage_type=storage_type),
html.Div(
html.Img(
src='assets/logos/funki_logo.svg',
Expand Down
5 changes: 2 additions & 3 deletions src/funki/pages/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def update_param_panel(embedding, data):
children.extend(['Select embedding parameters: ', html.Br(), html.Br()])

if embedding == 'tsne':
max_per = len(data['index']) - 1 if data else 50
max_per = len(data['obs_names']) - 1 if data else 50
min_per = 1 if max_per < 10 else 5
step = 1 if max_per < 10 else 5

Expand Down Expand Up @@ -220,7 +220,7 @@ def apply_clustering(n_clicks, data, algorithm, resolution):
)
def update_dropdown(data):
try:
options = list(data['obs']['records'][0].keys())
options = list(data['obs']['var_names'])

except (KeyError, TypeError):
options = []
Expand Down Expand Up @@ -248,7 +248,6 @@ def plot_embedding(n_clicks, data, embedding, param_panel, color):
elif embedding == 'tsne':
# TODO: There is probably a more elegant way to do this
perplexity = param_panel[-1]['props']['value']
print(perplexity)
fig = fpl.plot_tsne(dset, perplexity=perplexity, color=color)

elif embedding == 'umap':
Expand Down
16 changes: 11 additions & 5 deletions src/funki/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,18 @@ def parse_contents(content, filename):
return df

def dataframe_to_serial(df):
return {'index': df.index, 'records': df.to_dict('records')}
return {
'obs_names': df.index,
'var_names': df.columns,
'X': df.values.tolist()
}

def serial_to_dataframe(data):
df = pd.DataFrame(data['records'])

if 'index' in data.keys():
df.index = data['index']
df = pd.DataFrame(
np.array(data['X']),
index=data['obs_names'] if 'obs_names' in data.keys() else None,
columns=data['var_names'] if 'var_names' in data.keys() else None,
)

return df

Expand All @@ -60,6 +65,7 @@ def serial_to_dataset(data):

def dataset_to_serial(dset):
data = dataframe_to_serial(dset.to_df())

data.update({
k: dataframe_to_serial(getattr(dset, k))
for k in ('obs', 'var')
Expand Down

0 comments on commit 19b0f64

Please sign in to comment.