Skip to content

Commit

Permalink
Merge branch 'main' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
MariaGabrielaReis committed Nov 30, 2023
2 parents 446c55b + f7a6663 commit 3b1a6af
Show file tree
Hide file tree
Showing 12 changed files with 356 additions and 79 deletions.
3 changes: 1 addition & 2 deletions .github/workflows/sonarcloud.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,9 @@ jobs:
- name: Run tests
run: python main_tests.py

- name: SonarCloud Scan
uses: SonarSource/sonarcloud-github-action@master
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Needed to get PR information, if any
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}

4 changes: 3 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ def pipe():
tempo = timeit("pipe()", globals=globals(), number=1)
timedelta_time = timedelta(seconds=tempo)
format = str(timedelta_time)
data_format = {"metrics": {"stage": "Pipeline completa", "day": datetime.now(), "time": format}}
data_format = {
"metrics": {"stage": "Pipeline completa", "day": datetime.now(), "time": format}
}

step5_update_data(data_format)
11 changes: 2 additions & 9 deletions main_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import coverage


tests_path = os.path.dirname(os.path.realpath(__file__)) + '/tests'
tests_path = os.path.dirname(os.path.realpath(__file__)) + "/tests"
sys.path.insert(0, tests_path)

cov = coverage.Coverage()
Expand All @@ -21,11 +21,4 @@

cov.report()

cov.xml_report(outfile='coverage.xml')







cov.xml_report(outfile="coverage.xml")
4 changes: 2 additions & 2 deletions pipeline/clear_data.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd
from pipeline.storage import update_stats, insert_stats


# =============================================================================
# Análise exploratória e adaptações
# =============================================================================
Expand Down Expand Up @@ -58,9 +59,8 @@ def clear_data(df):

update_stats(null_avaluations)


duplicated_avaluations = {
"erros":{
"erros": {
"value": int(before_duplicated),
"type": "duplicate_avaluations",
},
Expand Down
67 changes: 56 additions & 11 deletions pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,28 @@
from datetime import datetime



# =============================================================================
# Acessando os dados disponibilizados
# =============================================================================
def step1_access_data():
init_topic("Acessando os dados disponibilizados...")
df = access_data()
exec_time = finish_topic_default()
insert_stats([{"erros":[]}])

update_stats({"metrics": {"stage": "Acesso aos dados", "day": datetime.now(), "time": exec_time}})
insert_stats([{"erros": []}])

update_stats(
{
"metrics": {
"stage": "Acesso aos dados",
"day": datetime.now(),
"time": exec_time,
}
}
)

return df



# =============================================================================
# Análise exploratória e adaptações
# =============================================================================
Expand All @@ -35,7 +41,15 @@ def step2_pre_processing(df):
df = pre_processing(df)
exec_time = finish_topic_default()

update_stats({"metrics": {"stage": "Pré processamento", "day": datetime.now(), "time": exec_time}})
update_stats(
{
"metrics": {
"stage": "Pré processamento",
"day": datetime.now(),
"time": exec_time,
}
}
)

return df

Expand All @@ -48,7 +62,15 @@ def step_extra_testing_classification_model(df):
testing(df)
exec_time = finish_topic_default()

update_stats({"metrics": {"stage": "Treinamento do modelo", "day": datetime.now(), "time": exec_time}})
update_stats(
{
"metrics": {
"stage": "Treinamento do modelo",
"day": datetime.now(),
"time": exec_time,
}
}
)


# =============================================================================
Expand All @@ -60,7 +82,15 @@ def step3_processing(df):
df = processing(df)
exec_time = finish_topic_default()

update_stats({"metrics": {"stage": "Processamento de dados", "day": datetime.now(), "time": exec_time}})
update_stats(
{
"metrics": {
"stage": "Processamento de dados",
"day": datetime.now(),
"time": exec_time,
}
}
)

return df

Expand All @@ -73,13 +103,28 @@ def step4_storage_data(df):
insert(df)
exec_time = finish_topic_default()

update_stats({"metrics": {"stage":"Armazenamento de dados", "day": datetime.now(), "time":exec_time}})

update_stats(
{
"metrics": {
"stage": "Armazenamento de dados",
"day": datetime.now(),
"time": exec_time,
}
}
)


def step5_update_data(df):
init_topic("Armazenando tempo de execução...")
update_stats(df)
exec_time = finish_topic_default()

update_stats({"metrics": {"stage":"Atualizando métricas", "day": datetime.now(), "time":exec_time}})
update_stats(
{
"metrics": {
"stage": "Atualizando métricas",
"day": datetime.now(),
"time": exec_time,
}
}
)
2 changes: 1 addition & 1 deletion pipeline/select_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# =============================================================================
def select_data(df):
positive = random_select(100, df, "POSITIVO", 0) # máximo: 78210
neutral = random_select(100, df, "NEUTRO", 0) # máximo de neutras: 15834
neutral = random_select(100, df, "NEUTRO", 0) # máximo: 15834
negative = random_select(100, df, "NEGATIVO", 0) # máximo: 32594
select = np.concatenate([positive, neutral, negative])

Expand Down
13 changes: 13 additions & 0 deletions pipeline/show_results.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# =============================================================================
# Visualização dos resultados
# =============================================================================

def show_results(df):
# Distribuição geral
print("Distribuição geral do corpus:\n", df['topic'].value_counts(normalize=True))

# Distribuição de avaliações negativas
# print("\nDistribuição das avaliações Negativas:\n", df['topic'][df['sentiment'] == 'NEGATIVO'].value_counts(normalize=True))

# Distribuição de avaliações positivas
# print("\nDistribuição das avaliações Positivas:\n", df['topic'][df['sentiment'] == 'POSITIVO'].value_counts(normalize=True))
7 changes: 3 additions & 4 deletions pipeline/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

current_stats_id = None


def insert(data):
dados_pln.drop()

Expand Down Expand Up @@ -54,9 +55,7 @@ def insert_stats(data):
if result.inserted_ids:
current_stats_id = result.inserted_ids[0]

return (
f"{len(documents)} documentos inseridos na coleção 'stats' com sucesso. "
)
return f"{len(documents)} documentos inseridos na coleção 'stats' com sucesso. "
else:
return "Erro ao inserir documentos na coleção 'stats'."
else:
Expand All @@ -69,4 +68,4 @@ def update_stats(payload):
stats_pln.update_one({"_id": current_stats_id}, {"$push": payload})
return "Informação atualizada"
else:
return "Erro: ID do documento não encontrado. Insert_stats não foi executado."
return "Erro: ID do documento não encontrado. Insert_stats não foi executado."
Loading

0 comments on commit 3b1a6af

Please sign in to comment.