Skip to content

Commit 67d21dd

Browse files
committed
exit/fprintf/printf/fflush
1 parent 4052e99 commit 67d21dd

File tree

7 files changed

+190
-156
lines changed

7 files changed

+190
-156
lines changed

src/whisper_cpp/common-ggml.cpp

+16-15
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <Rcpp.h>
12
#include "common-ggml.h"
23

34
#include <regex>
@@ -27,7 +28,7 @@ enum ggml_ftype ggml_parse_ftype(const char * str) {
2728
if (str[0] == 'q') {
2829
const auto it = GGML_FTYPE_MAP.find(str);
2930
if (it == GGML_FTYPE_MAP.end()) {
30-
fprintf(stderr, "%s: unknown ftype '%s'\n", __func__, str);
31+
Rprintf("%s: unknown ftype '%s'\n", __func__, str);
3132
return GGML_FTYPE_UNKNOWN;
3233
}
3334
ftype = it->second;
@@ -63,13 +64,13 @@ bool ggml_common_quantize_0(
6364
case GGML_FTYPE_MOSTLY_F16:
6465
case GGML_FTYPE_MOSTLY_Q4_1_SOME_F16:
6566
{
66-
fprintf(stderr, "%s: invalid model type %d\n", __func__, ftype);
67+
Rprintf("%s: invalid model type %d\n", __func__, ftype);
6768
return false;
6869
}
6970
};
7071

7172
if (!ggml_is_quantized(qtype)) {
72-
fprintf(stderr, "%s: invalid quantization type %d (%s)\n", __func__, qtype, ggml_type_name(qtype));
73+
Rprintf("%s: invalid quantization type %d (%s)\n", __func__, qtype, ggml_type_name(qtype));
7374
return false;
7475
}
7576

@@ -107,7 +108,7 @@ bool ggml_common_quantize_0(
107108
std::string name(length, 0);
108109
finp.read (&name[0], length);
109110

110-
printf("%64s - [%5d, %5d, %5d], type = %6s ", name.data(), ne[0], ne[1], ne[2], ggml_type_name((ggml_type) ttype));
111+
Rprintf("%64s - [%5d, %5d, %5d], type = %6s ", name.data(), ne[0], ne[1], ne[2], ggml_type_name((ggml_type) ttype));
111112

112113
bool quantize = false;
113114

@@ -132,7 +133,7 @@ bool ggml_common_quantize_0(
132133

133134
if (quantize) {
134135
if (ttype != GGML_TYPE_F32 && ttype != GGML_TYPE_F16) {
135-
fprintf(stderr, "%s: unsupported ttype %d (%s) for integer quantization\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
136+
Rprintf("%s: unsupported ttype %d (%s) for integer quantization\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
136137
return false;
137138
}
138139

@@ -193,46 +194,46 @@ bool ggml_common_quantize_0(
193194
case GGML_TYPE_Q8_K:
194195
case GGML_TYPE_COUNT:
195196
{
196-
fprintf(stderr, "%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
197+
Rprintf("%s: unsupported quantization type %d (%s)\n", __func__, ttype, ggml_type_name((ggml_type) ttype));
197198
return false;
198199
}
199200
}
200201

201202
fout.write(reinterpret_cast<char *>(work.data()), cur_size);
202203
total_size_new += cur_size;
203204

204-
printf("size = %8.2f MB -> %8.2f MB | hist: ", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0);
205+
Rprintf("size = %8.2f MB -> %8.2f MB | hist: ", nelements * sizeof(float)/1024.0/1024.0, cur_size/1024.0/1024.0);
205206
for (int i = 0; i < (int) hist_cur.size(); ++i) {
206207
hist_all[i] += hist_cur[i];
207208
}
208209

209210
for (int i = 0; i < (int) hist_cur.size(); ++i) {
210-
printf("%5.3f ", hist_cur[i] / (float)nelements);
211+
Rprintf("%5.3f ", hist_cur[i] / (float)nelements);
211212
}
212-
printf("\n");
213+
Rprintf("\n");
213214
} else {
214-
printf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0);
215+
Rprintf("size = %8.3f MB\n", data_u8.size()/1024.0/1024.0);
215216
fout.write(reinterpret_cast<char *>(data_u8.data()), data_u8.size());
216217
total_size_new += data_u8.size();
217218
}
218219

219220
total_size_org += nelements * sizeof(float);
220221
}
221222

222-
printf("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
223-
printf("%s: quant size = %8.2f MB | ftype = %d (%s)\n", __func__, total_size_new/1024.0/1024.0, ftype, ggml_type_name(qtype));
223+
Rprintf("%s: model size = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
224+
Rprintf("%s: quant size = %8.2f MB | ftype = %d (%s)\n", __func__, total_size_new/1024.0/1024.0, ftype, ggml_type_name(qtype));
224225

225226
{
226227
int64_t sum_all = 0;
227228
for (int i = 0; i < (int) hist_all.size(); ++i) {
228229
sum_all += hist_all[i];
229230
}
230231

231-
printf("%s: hist: ", __func__);
232+
Rprintf("%s: hist: ", __func__);
232233
for (int i = 0; i < (int) hist_all.size(); ++i) {
233-
printf("%5.3f ", hist_all[i] / (float)sum_all);
234+
Rprintf("%5.3f ", hist_all[i] / (float)sum_all);
234235
}
235-
printf("\n");
236+
Rprintf("\n");
236237
}
237238

238239
return true;

src/whisper_cpp/common.cpp

+69-68
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <Rcpp.h>
12
#define _USE_MATH_DEFINES // for M_PI
23

34
#include "common.h"
@@ -24,7 +25,7 @@ std::string get_next_arg(int& i, int argc, char** argv, const std::string& flag,
2425
if (i + 1 < argc && argv[i + 1][0] != '-') {
2526
return argv[++i];
2627
} else {
27-
fprintf(stderr, "error: %s requires one argument.\n", flag.c_str());
28+
Rprintf("error: %s requires one argument.\n", flag.c_str());
2829
gpt_print_usage(argc, argv, params);
2930
exit(0);
3031
}
@@ -76,7 +77,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
7677
get_next_arg(i, argc, argv, arg, params);
7778
std::ifstream file(argv[i]);
7879
if (!file) {
79-
fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
80+
Rprintf("error: failed to open file '%s'\n", argv[i]);
8081
break;
8182
}
8283
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
@@ -87,7 +88,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
8788
params.token_test = get_next_arg(i, argc, argv, arg, params);
8889
}
8990
else {
90-
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
91+
Rprintf("error: unknown argument: %s\n", arg.c_str());
9192
gpt_print_usage(argc, argv, params);
9293
exit(0);
9394
}
@@ -97,31 +98,31 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
9798
}
9899

99100
void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
100-
fprintf(stderr, "usage: %s [options]\n", argv[0]);
101-
fprintf(stderr, "\n");
102-
fprintf(stderr, "options:\n");
103-
fprintf(stderr, " -h, --help show this help message and exit\n");
104-
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n");
105-
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
106-
fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
107-
fprintf(stderr, " prompt to start generation with (default: random)\n");
108-
fprintf(stderr, " -f FNAME, --file FNAME\n");
109-
fprintf(stderr, " load prompt from a file\n");
110-
fprintf(stderr, " -tt TOKEN_TEST, --token_test TOKEN_TEST\n");
111-
fprintf(stderr, " test tokenization\n");
112-
fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d)\n", params.n_predict);
113-
fprintf(stderr, " --top_k N top-k sampling (default: %d)\n", params.top_k);
114-
fprintf(stderr, " --top_p N top-p sampling (default: %.1f)\n", params.top_p);
115-
fprintf(stderr, " --temp N temperature (default: %.1f)\n", params.temp);
116-
fprintf(stderr, " --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled)\n", params.repeat_last_n);
117-
fprintf(stderr, " --repeat-penalty N penalize repeat sequence of tokens (default: %.2f, 1.0 = disabled)\n", (double)params.repeat_penalty);
118-
fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch);
119-
fprintf(stderr, " -c N, --context N context / KV cache size (default: %d)\n", params.n_ctx);
120-
fprintf(stderr, " --ignore-eos ignore EOS token during generation\n");
121-
fprintf(stderr, " -ngl N, --gpu-layers N number of layers to offload to GPU on supported models (default: %d)\n", params.n_gpu_layers);
122-
fprintf(stderr, " -m FNAME, --model FNAME\n");
123-
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
124-
fprintf(stderr, "\n");
101+
Rprintf("usage: %s [options]\n", argv[0]);
102+
Rprintf("\n");
103+
Rprintf("options:\n");
104+
Rprintf(" -h, --help show this help message and exit\n");
105+
Rprintf(" -s SEED, --seed SEED RNG seed (default: -1)\n");
106+
Rprintf(" -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
107+
Rprintf(" -p PROMPT, --prompt PROMPT\n");
108+
Rprintf(" prompt to start generation with (default: random)\n");
109+
Rprintf(" -f FNAME, --file FNAME\n");
110+
Rprintf(" load prompt from a file\n");
111+
Rprintf(" -tt TOKEN_TEST, --token_test TOKEN_TEST\n");
112+
Rprintf(" test tokenization\n");
113+
Rprintf(" -n N, --n_predict N number of tokens to predict (default: %d)\n", params.n_predict);
114+
Rprintf(" --top_k N top-k sampling (default: %d)\n", params.top_k);
115+
Rprintf(" --top_p N top-p sampling (default: %.1f)\n", params.top_p);
116+
Rprintf(" --temp N temperature (default: %.1f)\n", params.temp);
117+
Rprintf(" --repeat-last-n N last n tokens to consider for penalize (default: %d, 0 = disabled)\n", params.repeat_last_n);
118+
Rprintf(" --repeat-penalty N penalize repeat sequence of tokens (default: %.2f, 1.0 = disabled)\n", (double)params.repeat_penalty);
119+
Rprintf(" -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch);
120+
Rprintf(" -c N, --context N context / KV cache size (default: %d)\n", params.n_ctx);
121+
Rprintf(" --ignore-eos ignore EOS token during generation\n");
122+
Rprintf(" -ngl N, --gpu-layers N number of layers to offload to GPU on supported models (default: %d)\n", params.n_gpu_layers);
123+
Rprintf(" -m FNAME, --model FNAME\n");
124+
Rprintf(" model path (default: %s)\n", params.model.c_str());
125+
Rprintf("\n");
125126
}
126127

127128
std::string gpt_random_prompt(std::mt19937 & rng) {
@@ -170,7 +171,7 @@ std::map<std::string, int32_t> json_parse(const std::string & fname) {
170171
{
171172
std::ifstream ifs(fname);
172173
if (!ifs) {
173-
fprintf(stderr, "Failed to open %s\n", fname.c_str());
174+
Rprintf("Failed to open %s\n", fname.c_str());
174175
exit(1);
175176
}
176177

@@ -233,7 +234,7 @@ std::map<std::string, int32_t> json_parse(const std::string & fname) {
233234
try {
234235
result[str_key] = std::stoi(str_val);
235236
} catch (...) {
236-
//fprintf(stderr, "%s: ignoring key '%s' with value '%s'\n", fname.c_str(), str_key.c_str(), str_val.c_str());
237+
//Rprintf("%s: ignoring key '%s' with value '%s'\n", fname.c_str(), str_key.c_str(), str_val.c_str());
237238

238239
}
239240
str_key = "";
@@ -326,7 +327,7 @@ std::vector<gpt_vocab::id> gpt_tokenize(const gpt_vocab & vocab, const std::stri
326327
break;
327328
}
328329
else if (j == i){ // word.substr(i, 1) has no matching
329-
fprintf(stderr, "%s: unknown token '%s'\n", __func__, word.substr(i, 1).data());
330+
Rprintf("%s: unknown token '%s'\n", __func__, word.substr(i, 1).data());
330331
i++;
331332
}
332333
}
@@ -350,7 +351,7 @@ std::vector<gpt_vocab::id> parse_tokens_from_string(const std::string& input, ch
350351

351352
std::map<std::string, std::vector<gpt_vocab::id>> extract_tests_from_file(const std::string & fpath_test){
352353
if (fpath_test.empty()){
353-
fprintf(stderr, "%s : No test file found.\n", __func__);
354+
Rprintf("%s : No test file found.\n", __func__);
354355
return std::map<std::string, std::vector<gpt_vocab::id>>();
355356
}
356357

@@ -383,37 +384,37 @@ void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test){
383384
n_fails++;
384385

385386
// print out failure cases
386-
fprintf(stderr, "%s : failed test: '%s'\n", __func__, test.first.c_str());
387-
fprintf(stderr, "%s : tokens in hf: ", __func__);
387+
Rprintf("%s : failed test: '%s'\n", __func__, test.first.c_str());
388+
Rprintf("%s : tokens in hf: ", __func__);
388389
for (const auto & t : test.second) {
389-
fprintf(stderr, "%s(%d), ", vocab.id_to_token[t].c_str(), t);
390+
Rprintf("%s(%d), ", vocab.id_to_token[t].c_str(), t);
390391
}
391-
fprintf(stderr, "\n");
392-
fprintf(stderr, "%s : tokens in ggml: ", __func__);
392+
Rprintf("\n");
393+
Rprintf("%s : tokens in ggml: ", __func__);
393394
for (const auto & t : tokens) {
394-
fprintf(stderr, "%s(%d), ", vocab.id_to_token[t].c_str(), t);
395+
Rprintf("%s(%d), ", vocab.id_to_token[t].c_str(), t);
395396
}
396-
fprintf(stderr, "\n");
397+
Rprintf("\n");
397398
}
398399
}
399400

400-
fprintf(stderr, "%s : %zu tests failed out of %zu tests.\n", __func__, n_fails, tests.size());
401+
Rprintf("%s : %zu tests failed out of %zu tests.\n", __func__, n_fails, tests.size());
401402
}
402403

403404
bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab) {
404-
printf("%s: loading vocab from '%s'\n", __func__, fname.c_str());
405+
Rprintf("%s: loading vocab from '%s'\n", __func__, fname.c_str());
405406

406407
vocab.token_to_id = ::json_parse(fname);
407408

408409
for (const auto & kv : vocab.token_to_id) {
409410
vocab.id_to_token[kv.second] = kv.first;
410411
}
411412

412-
printf("%s: vocab size = %d\n", __func__, (int) vocab.token_to_id.size());
413+
Rprintf("%s: vocab size = %d\n", __func__, (int) vocab.token_to_id.size());
413414

414415
// print the vocabulary
415416
//for (auto kv : vocab.token_to_id) {
416-
// printf("'%s' -> %d\n", kv.first.data(), kv.second);
417+
// Rprintf("'%s' -> %d\n", kv.first.data(), kv.second);
417418
//}
418419

419420
return true;
@@ -489,7 +490,7 @@ gpt_vocab::id gpt_sample_top_k_top_p(
489490

490491
//printf("\n");
491492
//for (int i = 0; i < (int) probs.size(); i++) {
492-
// printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]);
493+
// Rprintf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]);
493494
//}
494495
//exit(0);
495496

@@ -602,10 +603,10 @@ gpt_vocab::id gpt_sample_top_k_top_p_repeat(
602603
}
603604
}
604605

605-
// printf("\n");
606+
// Rprintf("\n");
606607
// for (int i = 0; i < (int) probs.size(); i++) {
607608
// for (int i = 0; i < 10; i++) {
608-
// printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]);
609+
// Rprintf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]);
609610
// }
610611

611612
std::discrete_distribution<> dist(probs.begin(), probs.end());
@@ -633,34 +634,34 @@ bool read_wav(const std::string & fname, std::vector<float>& pcmf32, std::vector
633634
}
634635

635636
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
636-
fprintf(stderr, "error: failed to open WAV file from stdin\n");
637+
Rprintf("error: failed to open WAV file from stdin\n");
637638
return false;
638639
}
639640

640-
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
641+
Rprintf("%s: read %zu bytes from stdin\n", __func__, wav_data.size());
641642
}
642643
else if (drwav_init_file(&wav, fname.c_str(), nullptr) == false) {
643-
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname.c_str());
644+
Rprintf("error: failed to open '%s' as WAV file\n", fname.c_str());
644645
return false;
645646
}
646647

647648
if (wav.channels != 1 && wav.channels != 2) {
648-
fprintf(stderr, "%s: WAV file '%s' must be mono or stereo\n", __func__, fname.c_str());
649+
Rprintf("%s: WAV file '%s' must be mono or stereo\n", __func__, fname.c_str());
649650
return false;
650651
}
651652

652653
if (stereo && wav.channels != 2) {
653-
fprintf(stderr, "%s: WAV file '%s' must be stereo for diarization\n", __func__, fname.c_str());
654+
Rprintf("%s: WAV file '%s' must be stereo for diarization\n", __func__, fname.c_str());
654655
return false;
655656
}
656657

657658
if (wav.sampleRate != COMMON_SAMPLE_RATE) {
658-
fprintf(stderr, "%s: WAV file '%s' must be %i kHz\n", __func__, fname.c_str(), COMMON_SAMPLE_RATE/1000);
659+
Rprintf("%s: WAV file '%s' must be %i kHz\n", __func__, fname.c_str(), COMMON_SAMPLE_RATE/1000);
659660
return false;
660661
}
661662

662663
if (wav.bitsPerSample != 16) {
663-
fprintf(stderr, "%s: WAV file '%s' must be 16-bit\n", __func__, fname.c_str());
664+
Rprintf("%s: WAV file '%s' must be 16-bit\n", __func__, fname.c_str());
664665
return false;
665666
}
666667

@@ -739,7 +740,7 @@ bool vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float
739740
energy_last /= n_samples_last;
740741

741742
if (verbose) {
742-
fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold);
743+
Rprintf("%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold);
743744
}
744745

745746
if (energy_last > vad_thold*energy_all) {
@@ -791,7 +792,7 @@ bool sam_params_parse(int argc, char ** argv, sam_params & params) {
791792
sam_print_usage(argc, argv, params);
792793
exit(0);
793794
} else {
794-
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
795+
Rprintf("error: unknown argument: %s\n", arg.c_str());
795796
sam_print_usage(argc, argv, params);
796797
exit(0);
797798
}
@@ -801,17 +802,17 @@ bool sam_params_parse(int argc, char ** argv, sam_params & params) {
801802
}
802803

803804
void sam_print_usage(int /*argc*/, char ** argv, const sam_params & params) {
804-
fprintf(stderr, "usage: %s [options]\n", argv[0]);
805-
fprintf(stderr, "\n");
806-
fprintf(stderr, "options:\n");
807-
fprintf(stderr, " -h, --help show this help message and exit\n");
808-
fprintf(stderr, " -s SEED, --seed SEED RNG seed (default: -1)\n");
809-
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
810-
fprintf(stderr, " -m FNAME, --model FNAME\n");
811-
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
812-
fprintf(stderr, " -i FNAME, --inp FNAME\n");
813-
fprintf(stderr, " input file (default: %s)\n", params.fname_inp.c_str());
814-
fprintf(stderr, " -o FNAME, --out FNAME\n");
815-
fprintf(stderr, " output file (default: %s)\n", params.fname_out.c_str());
816-
fprintf(stderr, "\n");
805+
Rprintf("usage: %s [options]\n", argv[0]);
806+
Rprintf("\n");
807+
Rprintf("options:\n");
808+
Rprintf(" -h, --help show this help message and exit\n");
809+
Rprintf(" -s SEED, --seed SEED RNG seed (default: -1)\n");
810+
Rprintf(" -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
811+
Rprintf(" -m FNAME, --model FNAME\n");
812+
Rprintf(" model path (default: %s)\n", params.model.c_str());
813+
Rprintf(" -i FNAME, --inp FNAME\n");
814+
Rprintf(" input file (default: %s)\n", params.fname_inp.c_str());
815+
Rprintf(" -o FNAME, --out FNAME\n");
816+
Rprintf(" output file (default: %s)\n", params.fname_out.c_str());
817+
Rprintf("\n");
817818
}

0 commit comments

Comments
 (0)