Skip to content

Commit

Permalink
to do: fix the taxonomy
Browse files Browse the repository at this point in the history
  • Loading branch information
Vlasovets committed Dec 20, 2023
1 parent 6dd8088 commit 0a0ec18
Showing 1 changed file with 41 additions and 24 deletions.
65 changes: 41 additions & 24 deletions q2_gglasso/_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from gglasso.helper.basic_linalg import scale_array_by_diagonal
from gglasso.helper.ext_admm_helper import create_group_array, construct_indexer, check_G

from .utils import if_2d_array, get_hyperparameters, list_to_array
from .utils import if_2d_array, get_hyperparameters, list_to_array, rename_index_with_sum
from .utils import normalize, log_transform, zero_imputation, check_lambda_path, get_lambda_mask
from sklearn import preprocessing

Expand Down Expand Up @@ -44,9 +44,11 @@ def transform_features(table: Table, taxonomy: pd.Series, sample_metadata: qiime
Count data projected to Euclidean space.
"""
X = table.to_dataframe()
X = table.to_dataframe() # p, N
X = X.sparse.to_dense()

X = rename_index_with_sum(X)

if transformation == "clr":
X = zero_imputation(X, pseudo_count=pseudo_count)
X = normalize(X)
Expand All @@ -63,16 +65,17 @@ def transform_features(table: Table, taxonomy: pd.Series, sample_metadata: qiime

X = pd.DataFrame(X, columns=X.columns, index=X.index)

# TO DO test taxonomy assignment
taxa = pd.DataFrame(taxonomy.view(pd.Series))
ASV_names = X.index.values

for i in ASV_names:
if i in taxa.index:
name = taxa.loc[i, "Taxon"]
if name not in X.index: # avoid duplicated names
X = X.rename(index={i: name})
# for i in ASV_names:
# if i in taxa.index:
# name = taxa.loc[i, "Taxon"]
# if name not in X.index: # avoid duplicated names
# X = X.rename(index={i: name})

X = X.T # p, N
X = X.T # N,p

if add_metadata:
# metadata is (N, p) matrix
Expand All @@ -92,12 +95,13 @@ def transform_features(table: Table, taxonomy: pd.Series, sample_metadata: qiime
else:
result = X

result = result.T # N, p
result = result.T # p, N

return result


def calculate_covariance(table: pd.DataFrame, method: str = "scaled", bias: bool = True) -> pd.DataFrame:
def calculate_covariance(table: pd.DataFrame, method: str = "scaled",
bias: bool = True) -> pd.DataFrame:
"""
A function calculating covariance matrix.
Expand Down Expand Up @@ -135,7 +139,7 @@ def calculate_covariance(table: pd.DataFrame, method: str = "scaled", bias: bool

result = pd.DataFrame(result, index=table.index, columns=table.index)

return result
return result.round(10)


def build_groups(tables: Table, check_groups: bool = True) -> np.ndarray:
Expand Down Expand Up @@ -203,7 +207,8 @@ def build_groups(tables: Table, check_groups: bool = True) -> np.ndarray:


def solve_SGL(S: np.ndarray, N: list, latent: bool = None, model_selection: bool = None,
lambda1: list = None, mu1: list = None, lambda1_mask: list = None, gamma: float=None):
lambda1: list = None, mu1: list = None, lambda1_mask: list = None,
gamma: float = None):
"""
Solve Single Graphical Lasso (SGL) problem, see Friedman et al. (2007).
Expand Down Expand Up @@ -239,16 +244,19 @@ def solve_SGL(S: np.ndarray, N: list, latent: bool = None, model_selection: bool
"""
if model_selection:
print("\tDD MODEL SELECTION:")
modelselect_params = {'lambda1_range': lambda1, 'mu1_range': mu1, 'lambda1_mask': lambda1_mask}
modelselect_params = {'lambda1_range': lambda1, 'mu1_range': mu1,
'lambda1_mask': lambda1_mask}
P = glasso_problem(S, N=N, latent=latent)
P.model_selection(modelselect_params=modelselect_params, gamma=gamma)

boundary_lambdas = check_lambda_path(P)
if boundary_lambdas:
warnings.warn("lambda is on the edge of the interval, the solution might have not reached global minimum!")
warnings.warn(
"lambda is on the edge of the interval, the solution might have not reached global minimum!")
else:
print("\tWITH LAMBDA={0} and MU={1}".format(lambda1, mu1))
P = glasso_problem(S, N=N, reg_params={'lambda1': lambda1, "mu1": mu1, 'lambda1_mask': lambda1_mask},
P = glasso_problem(S, N=N, reg_params={'lambda1': lambda1, "mu1": mu1,
'lambda1_mask': lambda1_mask},
latent=latent)
P.solve()

Expand Down Expand Up @@ -311,7 +319,8 @@ def solve_MGL(S: np.ndarray, N: list, reg: str, latent: bool = None, model_selec
return P


def solve_non_conforming(S: np.ndarray, N: list, G: list, latent: bool = None, model_selection: bool = None,
def solve_non_conforming(S: np.ndarray, N: list, G: list, latent: bool = None,
model_selection: bool = None,
lambda1: list = None, lambda2: list = None, mu1: list = None,
gamma: float = None):
"""
Expand Down Expand Up @@ -360,10 +369,12 @@ def solve_non_conforming(S: np.ndarray, N: list, G: list, latent: bool = None, m

boundary_lambdas = check_lambda_path(P, mgl_problem=True)
if boundary_lambdas:
warnings.warn("lambda is on the edge of the interval, the solution might have not reached global minimum!")
warnings.warn(
"lambda is on the edge of the interval, the solution might have not reached global minimum!")
else:
print("\tWITH LAMBDA1={0}, LAMBDA2={1} and MU={2}".format(lambda1, lambda2, mu1))
P = glasso_problem(S, N=N, G=G, reg_params={'lambda1': lambda1, 'lambda2': lambda2, "mu1": mu1},
P = glasso_problem(S, N=N, G=G,
reg_params={'lambda1': lambda1, 'lambda2': lambda2, "mu1": mu1},
latent=latent, reg='GGL')
P.solve()

Expand Down Expand Up @@ -442,8 +453,10 @@ def solve_problem(covariance_matrix: pd.DataFrame, n_samples: list, latent: bool

n_samples = list_to_array(n_samples)

h_params = get_hyperparameters(lambda1_min=lambda1_min, lambda1_max=lambda1_max, n_lambda1=n_lambda1,
lambda2_min=lambda2_min, lambda2_max=lambda2_max, n_lambda2=n_lambda2,
h_params = get_hyperparameters(lambda1_min=lambda1_min, lambda1_max=lambda1_max,
n_lambda1=n_lambda1,
lambda2_min=lambda2_min, lambda2_max=lambda2_max,
n_lambda2=n_lambda2,
mu1_min=mu1_min, mu1_max=mu1_max, n_mu1=n_mu1)

model_selection = h_params["model_selection"]
Expand All @@ -461,13 +474,15 @@ def solve_problem(covariance_matrix: pd.DataFrame, n_samples: list, latent: bool
if latent:
print("\n----SOLVING SINGLE GRAPHICAL LASSO PROBLEM WITH LATENT VARIABLES-----")

P = solve_SGL(S=S, N=n_samples, latent=latent, model_selection=model_selection, lambda1=lambda1, mu1=mu1,
P = solve_SGL(S=S, N=n_samples, latent=latent, model_selection=model_selection,
lambda1=lambda1, mu1=mu1,
lambda1_mask=lambda1_mask, gamma=gamma)

else:
print("----SOLVING SINGLE GRAPHICAL LASSO PROBLEM-----")

P = solve_SGL(S=S, N=n_samples, latent=latent, model_selection=model_selection, lambda1=lambda1, mu1=mu1,
P = solve_SGL(S=S, N=n_samples, latent=latent, model_selection=model_selection,
lambda1=lambda1, mu1=mu1,
lambda1_mask=lambda1_mask, gamma=gamma)

# if 3d array => solve MGL
Expand All @@ -492,12 +507,14 @@ def solve_problem(covariance_matrix: pd.DataFrame, n_samples: list, latent: bool
if latent:
print("\n----SOLVING {0} PROBLEM WITH LATENT VARIABLES-----".format(reg))

P = solve_MGL(S=S, N=n_samples, reg=reg, latent=latent, model_selection=model_selection,
P = solve_MGL(S=S, N=n_samples, reg=reg, latent=latent,
model_selection=model_selection,
lambda1=lambda1, lambda2=lambda2, mu1=mu1, gamma=gamma)
else:
print("\n----SOLVING {0} PROBLEM-----".format(reg))

P = solve_MGL(S=S, N=n_samples, reg=reg, latent=latent, model_selection=model_selection,
P = solve_MGL(S=S, N=n_samples, reg=reg, latent=latent,
model_selection=model_selection,
lambda1=lambda1, lambda2=lambda2, mu1=mu1, gamma=gamma)

labels = list(covariance_matrix.columns)
Expand Down

0 comments on commit 0a0ec18

Please sign in to comment.