Skip to content

Commit

Permalink
Allow to pass no_timestamp to predict.whisper #76
Browse files Browse the repository at this point in the history
  • Loading branch information
jwijffels committed Dec 23, 2024
1 parent 4b5c6a2 commit ca36a07
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 14 deletions.
5 changes: 2 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: audio.whisper
Type: Package
Title: Transcribe Audio Files using the "Whisper" Automatic Speech Recognition Model
Version: 0.4.1
Version: 0.4.2
Maintainer: Jan Wijffels <jwijffels@bnosac.be>
Authors@R: c(
person('Jan', 'Wijffels', role = c('aut', 'cre', 'cph'), email = 'jwijffels@bnosac.be', comment = "R wrapper"),
Expand Down Expand Up @@ -29,6 +29,5 @@ Suggests:
audio.vadwebrtc (>= 0.2.0)
LinkingTo: Rcpp
SystemRequirements: GNU make
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
Remotes: bnosac/audio.vadwebrtc

4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## CHANGES IN audio.whisper VERSION 0.4.2

- Allow to pass no_timestamps to predict.whisper

## CHANGES IN audio.whisper VERSION 0.4.1

- Added function predict.whisper_transcription which allows to assign a transcription segment to either a left/right channel based on a Voice Activity Detection
Expand Down
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ whisper_load_model <- function(model, use_gpu = FALSE) {
.Call('_audio_whisper_whisper_load_model', PACKAGE = 'audio.whisper', model, use_gpu)
}

whisper_encode <- function(model, path, language, token_timestamps = FALSE, translate = FALSE, duration = 0L, offset = 0L, trace = 1L, n_threads = 1L, n_processors = 1L, entropy_thold = 2.40, logprob_thold = -1.00, beam_size = -1L, best_of = 5L, split_on_word = FALSE, max_context = -1L, prompt = "", print_special = FALSE, diarize = FALSE, diarize_percent = 1.1) {
.Call('_audio_whisper_whisper_encode', PACKAGE = 'audio.whisper', model, path, language, token_timestamps, translate, duration, offset, trace, n_threads, n_processors, entropy_thold, logprob_thold, beam_size, best_of, split_on_word, max_context, prompt, print_special, diarize, diarize_percent)
whisper_encode <- function(model, path, language, token_timestamps = FALSE, translate = FALSE, duration = 0L, offset = 0L, trace = 1L, n_threads = 1L, n_processors = 1L, entropy_thold = 2.40, logprob_thold = -1.00, beam_size = -1L, best_of = 5L, split_on_word = FALSE, max_context = -1L, prompt = "", print_special = FALSE, diarize = FALSE, diarize_percent = 1.1, no_timestamps = FALSE) {
.Call('_audio_whisper_whisper_encode', PACKAGE = 'audio.whisper', model, path, language, token_timestamps, translate, duration, offset, trace, n_threads, n_processors, entropy_thold, logprob_thold, beam_size, best_of, split_on_word, max_context, prompt, print_special, diarize, diarize_percent, no_timestamps)
}

whisper_print_benchmark <- function(model, n_threads = 1L) {
Expand Down
8 changes: 4 additions & 4 deletions man/whisper_download_model.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 5 additions & 4 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ BEGIN_RCPP
END_RCPP
}
// whisper_encode
Rcpp::List whisper_encode(SEXP model, std::string path, std::string language, bool token_timestamps, bool translate, Rcpp::IntegerVector duration, Rcpp::IntegerVector offset, int trace, int n_threads, int n_processors, float entropy_thold, float logprob_thold, int beam_size, int best_of, bool split_on_word, int max_context, std::string prompt, bool print_special, bool diarize, float diarize_percent);
RcppExport SEXP _audio_whisper_whisper_encode(SEXP modelSEXP, SEXP pathSEXP, SEXP languageSEXP, SEXP token_timestampsSEXP, SEXP translateSEXP, SEXP durationSEXP, SEXP offsetSEXP, SEXP traceSEXP, SEXP n_threadsSEXP, SEXP n_processorsSEXP, SEXP entropy_tholdSEXP, SEXP logprob_tholdSEXP, SEXP beam_sizeSEXP, SEXP best_ofSEXP, SEXP split_on_wordSEXP, SEXP max_contextSEXP, SEXP promptSEXP, SEXP print_specialSEXP, SEXP diarizeSEXP, SEXP diarize_percentSEXP) {
Rcpp::List whisper_encode(SEXP model, std::string path, std::string language, bool token_timestamps, bool translate, Rcpp::IntegerVector duration, Rcpp::IntegerVector offset, int trace, int n_threads, int n_processors, float entropy_thold, float logprob_thold, int beam_size, int best_of, bool split_on_word, int max_context, std::string prompt, bool print_special, bool diarize, float diarize_percent, bool no_timestamps);
RcppExport SEXP _audio_whisper_whisper_encode(SEXP modelSEXP, SEXP pathSEXP, SEXP languageSEXP, SEXP token_timestampsSEXP, SEXP translateSEXP, SEXP durationSEXP, SEXP offsetSEXP, SEXP traceSEXP, SEXP n_threadsSEXP, SEXP n_processorsSEXP, SEXP entropy_tholdSEXP, SEXP logprob_tholdSEXP, SEXP beam_sizeSEXP, SEXP best_ofSEXP, SEXP split_on_wordSEXP, SEXP max_contextSEXP, SEXP promptSEXP, SEXP print_specialSEXP, SEXP diarizeSEXP, SEXP diarize_percentSEXP, SEXP no_timestampsSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Expand All @@ -43,7 +43,8 @@ BEGIN_RCPP
Rcpp::traits::input_parameter< bool >::type print_special(print_specialSEXP);
Rcpp::traits::input_parameter< bool >::type diarize(diarizeSEXP);
Rcpp::traits::input_parameter< float >::type diarize_percent(diarize_percentSEXP);
rcpp_result_gen = Rcpp::wrap(whisper_encode(model, path, language, token_timestamps, translate, duration, offset, trace, n_threads, n_processors, entropy_thold, logprob_thold, beam_size, best_of, split_on_word, max_context, prompt, print_special, diarize, diarize_percent));
Rcpp::traits::input_parameter< bool >::type no_timestamps(no_timestampsSEXP);
rcpp_result_gen = Rcpp::wrap(whisper_encode(model, path, language, token_timestamps, translate, duration, offset, trace, n_threads, n_processors, entropy_thold, logprob_thold, beam_size, best_of, split_on_word, max_context, prompt, print_special, diarize, diarize_percent, no_timestamps));
return rcpp_result_gen;
END_RCPP
}
Expand Down Expand Up @@ -71,7 +72,7 @@ END_RCPP

static const R_CallMethodDef CallEntries[] = {
{"_audio_whisper_whisper_load_model", (DL_FUNC) &_audio_whisper_whisper_load_model, 2},
{"_audio_whisper_whisper_encode", (DL_FUNC) &_audio_whisper_whisper_encode, 20},
{"_audio_whisper_whisper_encode", (DL_FUNC) &_audio_whisper_whisper_encode, 21},
{"_audio_whisper_whisper_print_benchmark", (DL_FUNC) &_audio_whisper_whisper_print_benchmark, 2},
{"_audio_whisper_whisper_language_info", (DL_FUNC) &_audio_whisper_whisper_language_info, 0},
{NULL, NULL, 0}
Expand Down
4 changes: 3 additions & 1 deletion src/rcpp_whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ Rcpp::List whisper_encode(SEXP model, std::string path, std::string language,
std::string prompt = "",
bool print_special = false,
bool diarize = false,
float diarize_percent = 1.1) {
float diarize_percent = 1.1,
bool no_timestamps = false) {
float audio_duration=0;

whisper_params params;
Expand All @@ -249,6 +250,7 @@ Rcpp::List whisper_encode(SEXP model, std::string path, std::string language,
params.max_context = max_context;
params.prompt = prompt;
params.diarize = diarize;
params.no_timestamps = no_timestamps;
if (params.fname_inp.empty()) {
Rcpp::stop("error: no input files specified");
}
Expand Down

0 comments on commit ca36a07

Please sign in to comment.