From f69a3140913bfb3c08b9be7f8631cf4971e571bb Mon Sep 17 00:00:00 2001 From: Lameus <1vany1@mail.ru> Date: Mon, 11 Dec 2023 14:03:41 +0300 Subject: [PATCH] add flag for results of congr, fix token for diar --- expert/core/congruence/congruence_analysis.py | 27 ++++++++++++------- expert/data/annotation/speech_to_text.py | 2 +- .../data/diarization/speaker_diarization.py | 2 +- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/expert/core/congruence/congruence_analysis.py b/expert/core/congruence/congruence_analysis.py index d985ad8..b3c84c2 100644 --- a/expert/core/congruence/congruence_analysis.py +++ b/expert/core/congruence/congruence_analysis.py @@ -65,6 +65,7 @@ def __init__( sr: int = 44100, device: torch.device | None = None, output_dir: str | PathLike | None = None, + return_path: bool = False, ): if lang not in ["en", "ru"]: raise NotImplementedError("'lang' must be 'en' or 'ru'.") @@ -92,6 +93,8 @@ def __init__( if not os.path.exists(self.temp_path): os.makedirs(self.temp_path) + self.return_path = return_path + @property def device(self) -> torch.device: """Check the device type. @@ -194,15 +197,19 @@ def get_congruence(self): emotions_data["audio"] = audio_data.to_dict(orient="records") emotions_data["text"] = text_data.to_dict(orient="records") - with open( - os.path.join(self.temp_path, "emotions.json"), "w" - ) as filename: - json.dump(emotions_data, filename) + if self.return_path: + with open( + os.path.join(self.temp_path, "emotions.json"), "w" + ) as filename: + json.dump(emotions_data, filename) - cong_data[["video_path", "time_sec", "congruence"]].to_json( - os.path.join(self.temp_path, "congruence.json"), orient="records" - ) + cong_data[["video_path", "time_sec", "congruence"]].to_json( + os.path.join(self.temp_path, "congruence.json"), + orient="records", + ) - return os.path.join(self.temp_path, "emotions.json"), os.path.join( - self.temp_path, "congruence.json" - ) + return os.path.join(self.temp_path, "emotions.json"), os.path.join( + self.temp_path, "congruence.json" + ) + else: + return {"emotions": emotions_data, "congruence": cong_data} diff --git a/expert/data/annotation/speech_to_text.py b/expert/data/annotation/speech_to_text.py index e139cba..86a63ae 100644 --- a/expert/data/annotation/speech_to_text.py +++ b/expert/data/annotation/speech_to_text.py @@ -13,7 +13,7 @@ def transcribe_video( video_path: Union[str, PathLike], lang: Optional[str] = "en", - model: Optional[str] = "server", + model: Optional[str] = "local", device: Optional[Union[torch.device, None]] = None, ) -> Dict: """Speech recognition module from video. diff --git a/expert/data/diarization/speaker_diarization.py b/expert/data/diarization/speaker_diarization.py index 8867b74..cbebab6 100644 --- a/expert/data/diarization/speaker_diarization.py +++ b/expert/data/diarization/speaker_diarization.py @@ -56,7 +56,7 @@ def __init__( if device is not None: self._device = device - token = "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" # FIXME убрать харкод пароля # nosec + token = "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" self.pipeline = Pipeline.from_pretrained( "pyannote/speaker-diarization@2.1", use_auth_token=token )