From 5fbcb35e9b17d3d1909ae6ef4cfb1d1719514396 Mon Sep 17 00:00:00 2001 From: kaegi Date: Wed, 14 Aug 2019 17:31:39 +0200 Subject: [PATCH] Major overhaul... move from aligner to alass-core and alass-cli --- .gitignore | 1 + Cargo.lock | 718 ++++--- Cargo.toml | 33 +- MakefileWindows64 | 25 + README.md | 128 +- alass-cli/Cargo.toml | 44 + alass-cli/src/errors.rs | 215 ++ alass-cli/src/main.rs | 818 +++++++ alass-cli/src/video_decoder/ffmpeg_binary.rs | 419 ++++ alass-cli/src/video_decoder/ffmpeg_library.rs | 276 +++ alass-cli/src/video_decoder/mod.rs | 90 + alass-core/Cargo.toml | 30 + alass-core/README.md | 22 + alass-core/src/alass.rs | 778 +++++++ {src => alass-core/src}/lib.rs | 95 +- alass-core/src/rating_type.rs | 88 + alass-core/src/segments.rs | 1900 +++++++++++++++++ alass-core/src/statistics.rs | 257 +++ .../internal => alass-core/src}/time_types.rs | 78 +- .../src}/timespan_ops.rs | 57 +- rustfmt.toml | 9 +- src/binary/errors.rs | 58 - src/binary/mod.rs | 18 - src/internal/aligner.rs | 645 ------ src/internal/delta_compression.rs | 765 ------- src/internal/mod.rs | 31 - src/internal/rating_type.rs | 132 -- src/internal/utils.rs | 381 ---- src/main.rs | 460 ---- .../download_subtitles_for_movie.py | 177 ++ 30 files changed, 5829 insertions(+), 2919 deletions(-) create mode 100644 MakefileWindows64 create mode 100644 alass-cli/Cargo.toml create mode 100644 alass-cli/src/errors.rs create mode 100644 alass-cli/src/main.rs create mode 100644 alass-cli/src/video_decoder/ffmpeg_binary.rs create mode 100644 alass-cli/src/video_decoder/ffmpeg_library.rs create mode 100644 alass-cli/src/video_decoder/mod.rs create mode 100644 alass-core/Cargo.toml create mode 100644 alass-core/README.md create mode 100644 alass-core/src/alass.rs rename {src => alass-core/src}/lib.rs (64%) create mode 100644 alass-core/src/rating_type.rs create mode 100644 alass-core/src/segments.rs create mode 100644 alass-core/src/statistics.rs rename {src/internal => alass-core/src}/time_types.rs (86%) rename {src/internal => alass-core/src}/timespan_ops.rs (83%) delete mode 100644 src/binary/errors.rs delete mode 100644 src/binary/mod.rs delete mode 100644 src/internal/aligner.rs delete mode 100644 src/internal/delta_compression.rs delete mode 100644 src/internal/mod.rs delete mode 100644 src/internal/rating_type.rs delete mode 100644 src/internal/utils.rs delete mode 100644 src/main.rs create mode 100755 test_data/opensubtitles_download_helper/download_subtitles_for_movie.py diff --git a/.gitignore b/.gitignore index f2a39f9..15ba0d1 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ target *.srt *.svg data +*.rs.bk diff --git a/Cargo.lock b/Cargo.lock index dd6d517..9135676 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,36 +1,62 @@ -[root] -name = "aligner" -version = "0.1.6" +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "aho-corasick" +version = "0.6.10" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "arrayvec 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", - "clap 2.26.0 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", - "error-chain 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", - "pbr 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", - "rand 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)", - "subparse 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "aho-corasick" -version = "0.6.3" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "alass-cli" +version = "1.0.0" +dependencies = [ + "alass-core 1.0.0", + "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", + "clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)", + "env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", + "pbr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", + "subparse 0.6.0", + "webrtc-vad 0.1.0", +] + +[[package]] +name = "alass-core" +version = "1.0.0" +dependencies = [ + "arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.3.23 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "ansi_term" -version = "0.9.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "arrayvec" -version = "0.4.0" +version = "0.4.11" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "nodrop 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", - "odds 0.2.25 (registry+https://github.com/rust-lang/crates.io-index)", + "nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -40,50 +66,46 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "atty" -version = "0.2.2" +version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "autocfg" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "backtrace" -version = "0.3.3" +version = "0.3.34" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "backtrace-sys 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", - "cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "dbghelp-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", - "rustc-demangle 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "backtrace-sys 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "rustc-demangle 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "backtrace-sys" -version = "0.1.12" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "gcc 0.3.53 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", + "cc 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "bitflags" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "bitflags" -version = "0.9.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "byteorder" -version = "1.1.0" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -91,161 +113,138 @@ name = "cast" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "cc" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "cfg-if" -version = "0.1.2" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "clap" -version = "2.26.0" +version = "2.33.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", - "atty 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", - "strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)", - "term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "textwrap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", - "vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)", + "bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "combine" -version = "2.5.1" +version = "2.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "ascii 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", - "byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "conv" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "custom_derive 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)", -] - -[[package]] -name = "custom_derive" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "dbghelp-sys" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "either" -version = "1.1.0" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "encoding" -version = "0.2.33" +name = "encoding_rs" +version = "0.8.17" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "encoding-index-japanese" -version = "1.20141219.5" +name = "enum_primitive" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "encoding-index-korean" -version = "1.20141219.5" +name = "env_logger" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)", + "humantime 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "encoding-index-simpchinese" -version = "1.20141219.5" +name = "error-chain" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "backtrace 0.3.34 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "encoding-index-singlebyte" -version = "1.20141219.5" +name = "failure" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "backtrace 0.3.34 (registry+https://github.com/rust-lang/crates.io-index)", + "failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "encoding-index-tradchinese" -version = "1.20141219.5" +name = "failure_derive" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)", + "synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "encoding_index_tests" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" - -[[package]] -name = "enum_primitive" +name = "fuchsia-cprng" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", -] [[package]] -name = "error-chain" -version = "0.10.0" +name = "humantime" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "backtrace 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", + "quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "gcc" -version = "0.3.53" -source = "registry+https://github.com/rust-lang/crates.io-index" - [[package]] name = "image" version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "enum_primitive 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", - "num-iter 0.1.34 (registry+https://github.com/rust-lang/crates.io-index)", - "num-rational 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "num-iter 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)", + "num-rational 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "itertools" -version = "0.6.2" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "itoa" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "kernel32-sys" version = "0.2.2" @@ -257,51 +256,44 @@ dependencies = [ [[package]] name = "lazy_static" -version = "0.2.8" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "libc" -version = "0.2.30" +name = "lazy_static" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "log" -version = "0.3.8" +name = "libc" +version = "0.2.61" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "magenta" -version = "0.1.1" +name = "log" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "conv 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "magenta-sys 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "magenta-sys" -version = "0.1.1" +name = "log" +version = "0.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", + "cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "memchr" -version = "1.0.1" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", -] [[package]] name = "nodrop" -version = "0.1.9" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "odds 0.2.25 (registry+https://github.com/rust-lang/crates.io-index)", -] [[package]] name = "nom" @@ -310,85 +302,185 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "num-integer" -version = "0.1.35" +version = "0.1.41" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "num-iter" -version = "0.1.34" +version = "0.1.39" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "num-integer 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "autocfg 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "num-integer 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "num-rational" -version = "0.1.39" +version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "num-integer 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)", - "num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)", + "num-integer 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "num-traits" -version = "0.1.40" +version = "0.1.43" source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] -name = "odds" -version = "0.2.25" +name = "num-traits" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "autocfg 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "numtoa" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "pbr" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", - "time 0.1.38 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "termion 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "proc-macro2" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "quick-error" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "quote" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand" +version = "0.3.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "rand" -version = "0.3.16" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_core" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand_core 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand_core" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "rdrand" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", - "magenta 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "redox_syscall" -version = "0.1.31" +version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "redox_termios" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "regex" -version = "0.2.2" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", - "utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "regex-syntax" -version = "0.4.1" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "ucd-util 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "rustc-demangle" -version = "0.1.5" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "ryu" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -396,88 +488,139 @@ name = "safemem" version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "serde" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde_derive 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde_derive" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "serde_json" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", + "ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "strsim" -version = "0.6.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "subparse" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" +version = "0.6.0" dependencies = [ - "combine 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)", - "error-chain 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", - "itertools 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "combine 2.5.2 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)", + "failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", + "itertools 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)", "vobsub 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "term_size" -version = "0.3.0" +name = "syn" +version = "0.15.44" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "synstructure" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "termcolor" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "wincolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "termion" +version = "1.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "numtoa 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "textwrap" -version = "0.7.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "thread_local" -version = "0.3.4" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "time" -version = "0.1.38" +version = "0.1.42" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)", - "redox_syscall 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "unicode-segmentation" -version = "1.2.0" +name = "ucd-util" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "unicode-width" -version = "0.1.4" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] -name = "unreachable" -version = "1.0.0" +name = "unicode-xid" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", -] [[package]] name = "utf8-ranges" -version = "1.0.0" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "vec_map" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -488,92 +631,149 @@ dependencies = [ "cast 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "error-chain 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", "image 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", - "lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", - "log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "nom 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] -name = "void" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" +name = "webrtc-vad" +version = "0.1.0" +dependencies = [ + "cc 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)", +] [[package]] name = "winapi" version = "0.2.8" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "winapi" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "winapi-build" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "wincolor" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [metadata] -"checksum aho-corasick 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "500909c4f87a9e52355b26626d890833e9e1d53ac566db76c36faa984b889699" -"checksum ansi_term 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "23ac7c30002a5accbf7e8987d0632fa6de155b7c3d39d0067317a391e00a2ef6" -"checksum arrayvec 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d1154a1f6c2d8ca95194edbee2380f23f2a51ba321a1217ea00e78166636b2a0" +"checksum aho-corasick 0.6.10 (registry+https://github.com/rust-lang/crates.io-index)" = "81ce3d38065e618af2d7b77e10c5ad9a069859b4be3c2250f674af3840d9c8a5" +"checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d" +"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" +"checksum arrayvec 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b8d73f9beda665eaa98ab9e4f7442bd4e7de6652587de55b2525e52e29c1b0ba" "checksum ascii 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3ae7d751998c189c1d4468cf0a39bb2eae052a9c58d50ebb3b9591ee3813ad50" -"checksum atty 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d912da0db7fa85514874458ca3651fe2cddace8d0b0505571dbdcd41ab490159" -"checksum backtrace 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "99f2ce94e22b8e664d95c57fff45b98a966c2252b60691d0b7aeeccd88d70983" -"checksum backtrace-sys 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "afccc5772ba333abccdf60d55200fa3406f8c59dcf54d5f7998c9107d3799c7c" -"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" -"checksum bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4efd02e230a02e18f92fc2735f44597385ed02ad8f831e7c1c1156ee5e1ab3a5" -"checksum byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff81738b726f5d099632ceaffe7fb65b90212e8dce59d518729e7e8634032d3d" +"checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90" +"checksum autocfg 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "22130e92352b948e7e82a49cdb0aa94f2211761117f29e052dd397c1ac33542b" +"checksum backtrace 0.3.34 (registry+https://github.com/rust-lang/crates.io-index)" = "b5164d292487f037ece34ec0de2fcede2faa162f085dd96d2385ab81b12765ba" +"checksum backtrace-sys 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)" = "82a830b4ef2d1124a711c71d263c5abdc710ef8e907bd508c88be475cebc422b" +"checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd" +"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5" "checksum cast 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "926013f2860c46252efceabb19f4a6b308197505082c609025aa6706c011d427" -"checksum cfg-if 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d4c819a1287eb618df47cc647173c5c4c66ba19d888a6e50d605672aed3140de" -"checksum clap 2.26.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2267a8fdd4dce6956ba6649e130f62fb279026e5e84b92aa939ac8f85ce3f9f0" -"checksum combine 2.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f5c279781ad7b9b480fe265770ea5cb87fcbdc8ea1b2b6930d60a206e781dc6c" -"checksum conv 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "78ff10625fd0ac447827aa30ea8b861fead473bb60aeb73af6c1c58caf0d1299" -"checksum custom_derive 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "ef8ae57c4978a2acd8b869ce6b9ca1dfe817bff704c220209fdef2c0b75a01b9" -"checksum dbghelp-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "97590ba53bcb8ac28279161ca943a924d1fd4a8fb3fa63302591647c4fc5b850" -"checksum either 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "18785c1ba806c258137c937e44ada9ee7e69a37e3c72077542cd2f069d78562a" -"checksum encoding 0.2.33 (registry+https://github.com/rust-lang/crates.io-index)" = "6b0d943856b990d12d3b55b359144ff341533e516d94098b1d3fc1ac666d36ec" -"checksum encoding-index-japanese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "04e8b2ff42e9a05335dbf8b5c6f7567e5591d0d916ccef4e0b1710d32a0d0c91" -"checksum encoding-index-korean 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "4dc33fb8e6bcba213fe2f14275f0963fd16f0a02c878e3095ecfdf5bee529d81" -"checksum encoding-index-simpchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d87a7194909b9118fc707194baa434a4e3b0fb6a5a757c73c3adb07aa25031f7" -"checksum encoding-index-singlebyte 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "3351d5acffb224af9ca265f435b859c7c01537c0849754d3db3fdf2bfe2ae84a" -"checksum encoding-index-tradchinese 1.20141219.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fd0e20d5688ce3cab59eb3ef3a2083a5c77bf496cb798dc6fcdb75f323890c18" -"checksum encoding_index_tests 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "a246d82be1c9d791c5dfde9a2bd045fc3cbba3fa2b11ad558f27d01712f00569" +"checksum cc 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)" = "b548a4ee81fccb95919d4e22cfea83c7693ebfd78f0495493178db20b3139da7" +"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33" +"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9" +"checksum combine 2.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1645a65a99c7c8d345761f4b75a6ffe5be3b3b27a93ee731fccc5050ba6be97c" +"checksum either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5527cfe0d098f36e3f8839852688e63c8fff1c90b2b405aef730615f9a7bcf7b" +"checksum encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)" = "4155785c79f2f6701f185eb2e6b4caf0555ec03477cb4c70db67b465311620ed" "checksum enum_primitive 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "be4551092f4d519593039259a9ed8daedf0da12e5109c5280338073eaeb81180" +"checksum env_logger 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "aafcde04e90a5226a6443b7aabdb016ba2f8307c847d524724bd9b346dd1a2d3" "checksum error-chain 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d9435d864e017c3c6afeac1654189b06cdb491cf2ff73dbf0d73b0f292f42ff8" -"checksum gcc 0.3.53 (registry+https://github.com/rust-lang/crates.io-index)" = "e8310f7e9c890398b0e80e301c4f474e9918d2b27fca8f48486ca775fa9ffc5a" +"checksum failure 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "795bd83d3abeb9220f257e597aa0080a508b27533824adf336529648f6abf7e2" +"checksum failure_derive 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "ea1063915fd7ef4309e222a5a07cf9c319fb9c7836b1f89b85458672dbb127e1" +"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" +"checksum humantime 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3ca7e5f2e110db35f93b837c81797f3714500b81d517bf20c431b16d3ca4f114" "checksum image 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1c3f4f5ea213ed9899eca760a8a14091d4b82d33e27cf8ced336ff730e9f6da8" -"checksum itertools 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "22c285d60139cf413244894189ca52debcfd70b57966feed060da76802e415a0" +"checksum itertools 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "d3f2be4da1690a039e9ae5fd575f706a63ad5a2120f161b1d653c9da3930dd21" +"checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" -"checksum lazy_static 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "3b37545ab726dd833ec6420aaba8231c5b320814b9029ad585555d2a03e94fbf" -"checksum libc 0.2.30 (registry+https://github.com/rust-lang/crates.io-index)" = "2370ca07ec338939e356443dac2296f581453c35fe1e3a3ed06023c49435f915" -"checksum log 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "880f77541efa6e5cc74e76910c9884d9859683118839d6a1dc3b11e63512565b" -"checksum magenta 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4bf0336886480e671965f794bc9b6fce88503563013d1bfb7a502c81fe3ac527" -"checksum magenta-sys 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "40d014c7011ac470ae28e2f76a02bfea4a8480f73e701353b49ad7a8d75f4699" -"checksum memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1dbccc0e46f1ea47b9f17e6d67c5a96bd27030519c519c9c91327e31275a47b4" -"checksum nodrop 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "52cd74cd09beba596430cc6e3091b74007169a56246e1262f0ba451ea95117b2" +"checksum lazy_static 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" +"checksum lazy_static 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bc5729f27f159ddd61f4df6228e827e86643d4d3e7c32183cb30a1c08f604a14" +"checksum libc 0.2.61 (registry+https://github.com/rust-lang/crates.io-index)" = "c665266eb592905e8503ba3403020f4b8794d26263f412ca33171600eca9a6fa" +"checksum log 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "e19e8d5c34a3e0e2223db8e060f9e8264aeeb5c5fc64a4ee9965c062211c024b" +"checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7" +"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" +"checksum nodrop 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9667ddcc6cc8a43afc9b7917599d7216aa09c463919ea32c59ed6cac8bc945" "checksum nom 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e5d4598834859fedb9a0a69d5b862a970e77982a92f544d547257a4d49469067" -"checksum num-integer 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)" = "d1452e8b06e448a07f0e6ebb0bb1d92b8890eea63288c0b627331d53514d0fba" -"checksum num-iter 0.1.34 (registry+https://github.com/rust-lang/crates.io-index)" = "7485fcc84f85b4ecd0ea527b14189281cf27d60e583ae65ebc9c088b13dffe01" -"checksum num-rational 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "288629c76fac4b33556f4b7ab57ba21ae202da65ba8b77466e6d598e31990790" -"checksum num-traits 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "99843c856d68d8b4313b03a17e33c4bb42ae8f6610ea81b28abe076ac721b9b0" -"checksum odds 0.2.25 (registry+https://github.com/rust-lang/crates.io-index)" = "c3df9b730298cea3a1c3faa90b7e2f9df3a9c400d0936d6015e6165734eefcba" -"checksum pbr 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e048e3afebb6c454bb1c5d0fe73fda54698b4715d78ed8e7302447c37736d23a" -"checksum rand 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)" = "eb250fd207a4729c976794d03db689c9be1d634ab5a1c9da9492a13d8fecbcdf" -"checksum redox_syscall 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)" = "8dde11f18c108289bef24469638a04dce49da56084f2d50618b226e47eb04509" -"checksum regex 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "1731164734096285ec2a5ec7fea5248ae2f5485b3feeb0115af4fda2183b2d1b" -"checksum regex-syntax 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad890a5eef7953f55427c50575c680c42841653abd2b028b68cd223d157f62db" -"checksum rustc-demangle 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "aee45432acc62f7b9a108cc054142dac51f979e69e71ddce7d6fc7adf29e817e" +"checksum num-integer 0.1.41 (registry+https://github.com/rust-lang/crates.io-index)" = "b85e541ef8255f6cf42bbfe4ef361305c6c135d10919ecc26126c4e5ae94bc09" +"checksum num-iter 0.1.39 (registry+https://github.com/rust-lang/crates.io-index)" = "76bd5272412d173d6bf9afdf98db8612bbabc9a7a830b7bfc9c188911716132e" +"checksum num-rational 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "ee314c74bd753fc86b4780aa9475da469155f3848473a261d2d18e35245a784e" +"checksum num-traits 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "92e5113e9fd4cc14ded8e499429f396a20f98c772a47cc8622a736e1ec843c31" +"checksum num-traits 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba9a427cfca2be13aa6f6403b0b7e7368fe982bfa16fccc450ce74c46cd9b32" +"checksum numtoa 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" +"checksum pbr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "deb73390ab68d81992bd994d145f697451bb0b54fd39738e72eef32458ad6907" +"checksum proc-macro2 0.4.30 (registry+https://github.com/rust-lang/crates.io-index)" = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +"checksum quick-error 1.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9274b940887ce9addde99c4eee6b5c44cc494b182b97e73dc8ffdcb3397fd3f0" +"checksum quote 0.6.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +"checksum rand 0.3.23 (registry+https://github.com/rust-lang/crates.io-index)" = "64ac302d8f83c0c1974bf758f6b041c6c8ada916fbb44a609158ca8b064cc76c" +"checksum rand 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" +"checksum rand_core 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" +"checksum rand_core 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" +"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" +"checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" +"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" +"checksum regex 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9329abc99e39129fcceabd24cf5d85b4671ef7c29c50e972bc5afe32438ec384" +"checksum regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88c3d9193984285d544df4a30c23a4e62ead42edf70a4452ceb76dac1ce05c26" +"checksum regex-syntax 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7d707a4fa2637f2dca2ef9fd02225ec7661fe01a53623c1e6515b6916511f7a7" +"checksum regex-syntax 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b143cceb2ca5e56d5671988ef8b15615733e7ee16cd348e064333b251b89343f" +"checksum rustc-demangle 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783" +"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997" "checksum safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f" -"checksum strsim 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d15c810519a91cf877e7e36e63fe068815c678181439f2f29e2562147c3694" -"checksum subparse 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f02e5df667f15c8c8319b211bc5be180e127a5f002ab49524c92407b06d33045" -"checksum term_size 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2b6b55df3198cc93372e85dd2ed817f0e38ce8cc0f22eb32391bfad9c4bf209" -"checksum textwrap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f728584ea33b0ad19318e20557cb0a39097751dbb07171419673502f848c7af6" -"checksum thread_local 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1697c4b57aeeb7a536b647165a2825faddffb1d3bad386d507709bd51a90bb14" -"checksum time 0.1.38 (registry+https://github.com/rust-lang/crates.io-index)" = "d5d788d3aa77bc0ef3e9621256885555368b47bd495c13dd2e7413c89f845520" -"checksum unicode-segmentation 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a8083c594e02b8ae1654ae26f0ade5158b119bd88ad0e8227a5d8fcd72407946" -"checksum unicode-width 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "bf3a113775714a22dcb774d8ea3655c53a32debae63a063acc00a91cc586245f" -"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" -"checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" -"checksum vec_map 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "887b5b631c2ad01628bbbaa7dd4c869f80d3186688f8d0b6f58774fbe324988c" +"checksum serde 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)" = "7fe5626ac617da2f2d9c48af5515a21d5a480dbd151e01bb1c355e26a3e68113" +"checksum serde_derive 1.0.98 (registry+https://github.com/rust-lang/crates.io-index)" = "01e69e1b8a631f245467ee275b8c757b818653c6d704cdbcaeb56b56767b529c" +"checksum serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)" = "051c49229f282f7c6f3813f8286cc1e3323e8051823fce42c7ea80fe13521704" +"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" +"checksum syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)" = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" +"checksum synstructure 0.10.2 (registry+https://github.com/rust-lang/crates.io-index)" = "02353edf96d6e4dc81aea2d8490a7e9db177bf8acb0e951c24940bf866cb313f" +"checksum termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "96d6098003bde162e4277c70665bd87c326f5a0c3f3fbfb285787fa482d54e6e" +"checksum termion 1.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6a8fb22f7cde82c8220e5aeacb3258ed7ce996142c77cba193f203515e26c330" +"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" +"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b" +"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f" +"checksum ucd-util 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa9b3b49edd3468c0e6565d85783f51af95212b6fa3986a5500954f00b460874" +"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" +"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" +"checksum utf8-ranges 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b4ae116fef2b7fea257ed6440d3cfcff7f190865f170cdad00bb6465bf18ecba" +"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum vobsub 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "aa122d660e26d9b6aa8f3436304b667ec81cbc0d48a5d19640d7e55ca8eac812" -"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" +"checksum winapi 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "f10e386af2b13e47c89e7236a7a14a086791a2b88ebad6df9bf42040195cf770" "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" +"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +"checksum winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7168bab6e1daee33b4557efd0e95d5ca70a03706d39fa5f3fe7a236f584b03c9" +"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +"checksum wincolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "561ed901ae465d6185fa7864d63fbd5720d0ef718366c9a4dc83cf6170d7e9ba" diff --git a/Cargo.toml b/Cargo.toml index ba456aa..f3738eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,29 +1,6 @@ -[package] -name = "aligner" -version = "0.1.6" -authors = ["kaegi "] -description = "Automatically corrects subtitle timings given a second correct subtitle" -repository = "https://github.com/kaegi/aligner" -documentation = "https://docs.rs/aligner" -readme = "README.md" -keywords = ["subtitle", "aligner", "automatic", "api", "tool"] -license = "GPL-3.0" +[workspace] - -[dependencies] -arrayvec = "0.4.0" - -# dependencies of binary -clap = "2.22.2" -error-chain = "~0.10.0" -pbr = "1.0.0-alpha.2" -subparse = "0.3.0" -encoding = "0.2.33" - -[[bin]] -doc = false -name = "aligner" - - -[dev-dependencies] -rand = "0.3" +members = [ + "alass-core", + "alass-cli" +] diff --git a/MakefileWindows64 b/MakefileWindows64 new file mode 100644 index 0000000..c69dc5e --- /dev/null +++ b/MakefileWindows64 @@ -0,0 +1,25 @@ +package_windows64: + curl https://ffmpeg.zeranoe.com/builds/win64/shared/ffmpeg-4.2-win64-shared.zip -o target/ffmpeg.zip + unzip target/ffmpeg.zip -d target + mv target/ffmpeg-4.2-win64-shared target/ffmpeg + echo + mkdir target/alass-windows64 + mkdir target/alass-windows64/ffmpeg + mkdir target/alass-windows64/bin + curl https://www.gnu.org/licenses/gpl-3.0.txt > target/alass-windows64/bin/LICENSE.txt + cp target/ffmpeg/LICENSE.txt target/alass-windows64/ffmpeg/LICENSE.txt + cp target/ffmpeg/README.txt target/alass-windows64/ffmpeg/README.txt + cp -r target/ffmpeg/bin target/alass-windows64/ffmpeg/bin + rm target/alass-windows64/ffmpeg/bin/ffplay.exe + cargo build --release --target x86_64-pc-windows-gnu + cp target/x86_64-pc-windows-gnu/release/alass-cli.exe target/alass-windows64/bin + echo -ne '@echo off\r\nset ALASS_FFMPEG_PATH=.\\ffmpeg\\bin\\ffmpeg.exe\r\nset ALASS_FFPROBE_PATH=.\\ffmpeg\\bin\\ffprobe.exe\r\n.\\bin\\alass-cli.exe %*\r\n' > target/alass-windows64/alass.bat + ( cd target; zip -J -r alass-windows64.zip alass-windows64 ) + + +clean_windows64: + rm target/alass-windows64.zip -f + rm target/ffmpeg-4.2-win64-shared.zip -f + rm target/ffmpeg-4.2-win64-shared -rf + rm target/ffmpeg -rf + rm target/alass-windows64 -rf diff --git a/README.md b/README.md index ee69eb7..f51ad42 100644 --- a/README.md +++ b/README.md @@ -1,96 +1,136 @@ # Introduction -`aligner` is a Rust library and command-line tool that corrects a subtitle given -a second "correct" subtitle. It will figure out offsets and where to +`alass` is a command line tool to synchronize subtitles to movies. It will figure out offsets and where to introduce or remove advertisement breaks to get the best alignment possible. It does not use any language information so it even works for image based subtitles like VobSub. +`alass` stands for "Automatic Language-Agnostic Subtitle Synchronization". + +## Executable for Windows + +Get the lastest executable from [here](https://github.com/kaegi/alass/releases)! Just download and extract the archive. The file `alass.bat` is the command line tool. + ## Usage The most basic command is: ```bash -$ aligner reference_subtitle.ssa incorrect_subtitle.srt output.srt +$ alass movie.mp4 incorrect_subtitle.srt output.srt +``` + +You can also use `alass` to align the incorrect subtitle to a different subtitle: + +```bash +$ alass reference_subtitle.ssa incorrect_subtitle.srt output.srt ``` You can additionally adjust how much the algorithm tries to avoid introducing or removing a break: ```bash -# split-penalty is a value between 0 and 100 -$ aligner reference_subtitle.ssa incorrect_subtitle.srt output.srt --split-penalty 2.6 +# split-penalty is a value between 0 and 100 (default 4) +$ alass reference_subtitle.ssa incorrect_subtitle.srt output.srt --split-penalty 2.6 ``` -Currently supported are `.srt`, `.ssa`/`.ass` and `.idx` files. +Values between 0.1 and 10 are the most useful. Anything above 10 probably does not split the subtitle and anything below 0.1 introduces many unnecessary splits. + +If you only want to shift the subtitle, without introducing splits, you can use `--no-splits`: + +```bash +# synchronizing the subtitles in this mode is very fast +$ alass movie.mp4 incorrect_subtitle.srt output.srt --no-splits +``` + +Currently supported are `.srt`, `.ssa`/`.ass` and `.idx` files. Every common video format is supported for the reference file. + + +## Performance + +The extraction of the audio from a video takes about 10 to 20 seconds. Computing the alignment usually takes between 5 and 10 seconds. + +The alignment is usually very good in my test (subtitles are within 0.1s of the target position). Adjusting the split penalty can help in a few cases if aligning does not work out-of-the box. More extensive testing and statistics will be performed in the future. ## How to compile the binary Install [Rust and Cargo](https://www.rust-lang.org/en-US/install.html) then run: ```bash -# this will create ~/.cargo/bin/aligner -$ cargo install aligner +# this will create the lastest release in ~/.cargo/bin/alass-cli +$ cargo install alass-cli ``` -## How to use the library -Add this to your `Cargo.toml`: -```toml -[dependencies] -aligner = "~0.1.6" +The voice-activity module this project uses is written in C. Therefore a C compiler (`gcc` or `clang`) is needed to compile this project. + +To use `alass-cli` with video files, `ffmpeg` and `ffprobe` have to be installed. It is used to extract the raw audio data. You can set the paths used by `alass` using the environment variables `ALASS_FFMPEG_PATH` (default `ffmpeg`) and `ALASS_FFPROBE_PATH` (default `ffprobe`). + +### Building from Source + +If you want to build and run the project from source code: + +```bash +$ git clone https://github.com/kaegi/alass +$ cd alass +$ cargo build +$ cargo run -- movie.mp4 input.srt output.srt ``` -[Documentation](https://docs.rs/aligner) +### Configuration -[Crates.io](https://crates.io/crates/aligner) +All parameters are shown for `cargo build` can also be used for `cargo install` and `cargo run`. +#### Statistics +You can activate the statistics module inside `alass` using: -## Algorithm +```bash +# Important: you have to be inside `alass-cli`! Otherwise the parameter is ignored. +$ cargo build --features statistics +$ cargo run -- --statistics-path ./some/folder +``` -At the core of the algorithm is the _rating_ of an alignment. For each pair of subtitles (one from the reference subtitle on from the incorrect subtitle) the rating is +This will create the statistics file in `./some/folder`. You can use `--statistics-required-tag` to only generate statistics. -`overlapping_time(refsub, incsub) / max(length(refsub), length(incsub))` +The statistics module allows you to understand/debug the algorihm better. -The maximum of this rating is 1, if and only if `refsub = incsub`. The total rating of an alignment is (for the most part) the sum of all ratings of all possible pairs. By moving the `incsubs` around, we might get a better alignment. As a basic constraint, the order of the `incsubs` will not be changed. So if we have to consecutive subtitles `start(incsubN) <= start(incsubN+1)`, the corrected `'incsubs` will still have `start('incsubN) <= start('incsubN+1)`. +**Warning**: Using this configuration slows down the algorithm by 50% or more _even_ if no statistics files are generated. -If only this formula was used, the algorithm will probably create different offsets for each subtitle line. To avoid that, we have to use `split_penalty` value. For each consecutive subtitles where `start(incsubN) - start(incsubN+1) = start('incsubN) - start('incsubN+1)` we add another `split_penalty` to the total rating. That way, with every extra split we lose `split_penalty` of rating. +#### FFmpeg as a library -This algorithm computes the alignment which yields the maximum of all possible ratings. The algorithm is powered by the principle of [dynamic programming](https://en.wikipedia.org/wiki/Dynamic_programming). +You can also link `ffmpeg` as a dynamic library during compile time. The library implementation can extract the audio about 2 to 3 seconds faster. Unfortunately it is harder to compile, the error handling is only very basic and might still have bugs. -To simplify the problem, we assume `start(sub)` is the start timestamp in milliseconds of a subtitle line `sub` and `0 <= start(sub)` is true. Let's say `get_rating(t, n)` computes the best rating/alignment for the first `n` incorrect subtitle lines with the additional constraint `0 <= start(sub) <= t` for each of these `n` subtitles. +You have to remove "`# FFMPEG-LIB`" from every line that starts with it in `alass-cli/Cargo.toml`. Then use: -Of course we can now simply set `get_rating(t, 0) = 0` because if we have no incorrect subtitles to align, we have a rating of zero (independent of `t`). +```bash +# Important: you have to be inside `alass-cli`! Otherwise the parameters get ignored. +$ cargo build --no-default-features --features ffmpeg-library +``` -Now we handle the case `get_rating(0, 1)`. We can simply compute the overlapping rating (where the first incorrect subtitle starts at the "zero timepoint") with every reference subtitle and add up these values. With `get_rating(1, 1)` things get interesting. We can either have `start(sub) = 0` or `start(sub) = 1`. Fortunaly we already have `get_rating(0, 1)`, so we only need the rating where `start(sub) = 1`. This can be computed by adding up all overlapping ratings. Similarly we can compute `get_rating(2, 1)` by taking the maximum of `get_rating(1, 1)` and the rating where `start(sub) = 2`. In this vein we can create `get_rating(t+1, 1)` from `get_rating(t, 1)`. We can also speed up computing the overlapping rating, because the subtitle line will only be shifted by 1ms from `start(sub) = t` to `start(sub) = t + 1`. The subtitle will lose the rating for the segment `[t, t + 1]` and gain the overlapping rating for the segment `[t + length(sub), t + 1 + length(sub)]` on the other side. By creating a lookup-table for reference subtitles for every `t` this process has a runtime of `O(1)`. -When do we stop? Well, the rating won't change anymore if `t` gets big. At the latest when `start(sub)` is be greater than any of the reference subtitle lines, because after that the overlapping rating will always be zero. Let's call `max_t` the timepoint where all incorrect subtitles have been moved behind the reference subtitles. The best total rating is then `get_rating(max_t, number_of_incorrect_subtitles)`. +### Alias Setup -Now we have all `get_rating(0, 1)` to `get_rating(max_t, 1)`. To compute `get_rating(0, 2)`, which means that `0 <= start(sub0) <= 0` and `0 <= start(sub1) <= 0`. We already have the rating for `sub0` in form of `get_rating(0, 1)`. We only need to add the overlapping rating for `sub1`. To get `get_rating(1, 2)` we can either use `get_rating(0, 2)` (we leave `sub1` where it is), or move `start(sub1)` to 1, which allows `start(sub0)` to be in `0 <= start(sub0) <= start(sub1) = 1`. The best rating for `sub0` for that range has been computed with `get_rating(1, 1)`, we only need to add the overlapping rating for `start(sub1) == 1`. We proceed similarly to get `get_rating(t+1, 2)`: leave the `sub1` like it was for `get_rating(t,2)` or reposition the subtitle to `start(sub1) == t+1` and use `get_rating(t+1,1) + overlapping_rating(sub1,t+1)`. +*For Linux users:* It is recommended to add the folder path to your system path as well as setup an alias for `alass` to `alass-cli`. Add this to your `~/.bashrc` (or the setup file of your favorite shell): -With the same principle we proceed with `subN`: +```bash +export PATH="$PATH:$HOME/.cargo/bin" +alias alass="alass-cli" +``` -- initialize `get_rating(0, n) = get_rating(0, n - 1) + overlapping_rating(subN, 0)` -- choose for `get_rating(t+1, n)` the maximum of - - `get_rating(t, n)` which means "leaving `subN`" and - - `get_rating(t+1, n-1) + overlapping_rating(t+1, subN)` which means repositioning the `subN` +## Folder structure +This `cargo` workspace contains two projects: -Until now we didn't use the `split_penalty`. We need to add the split penalty when `start(subN) - start(subN+1)` is a specific value (the original distance `diff(N)`). The trick here is seeing that we only need to consider the "repostion choice". The only time `get_rating(_, n-1)` is consulted after the inital phase is when `subN` gets repositioned. `subN` will then start at `t+1` and we consult `get_rating(t+1, n-1)`. So if `subN-1` were positioned at `t+1-diff(N-1)` for `get_rating(t+1-diff(N-1), n-1)` we'd be able to get the `split_penalty`. This is exactly the thing we will do when we are in a phase `n`: We will not only have the "leave choice" or "reposition choice" but also the "nosplit choice". If we compute `get_rating(t, n)`, we can also compare the two other values with `get_rating(t-diffN, n-1) + overlapping_rating(t-diffN, subN) + split_penalty`. The `get_rating(t-diffN, n-1) + overlapping_rating(t-diffN, subN)` is again the best rating where `start(subN) = t-diffN`. We are allowed to add the `split_penalty` because in the next phase `n+1`, `subN+1` will start at `t` when `get_rating(t-diffN, n)` is looked up. So the final rating algorithm is: + - `alass-core` which provides the algorithm + + It is targeted at *developers* who want to use the same algorithm in their project. -- initialize `get_rating(t, 0)` with 0 -- initialize `get_rating(0, n) = get_rating(0, n - 1) + overlapping_rating(subN, 0)` -- choose for `get_rating(t+1, n)` the maximum of - - `get_rating(t, n)` which means "leaving `subN`" and - - `get_rating(t+1, n-1) + overlapping_rating(t+1, subN)` which means repositioning the `subN` and - - `get_rating(t+1-diffN, n-1) + overlapping_rating(t+1-diffN, subN) + split_penalty` which means doing a nosplit-repositioning for `subN` + - `alass-cli` which is the official command line tool + It is target at *end users* who want to correct their subtitles. -To get the final alignment, we save for each phase `n` and `t+1` where `subN` was positioned (can be `t+1`, `t+1-diffN` or the previous position). If we look up that value for `n = number_of_incorrect_subtitles` and `t = max_t`, we know where the last subtitles `subN` has to be. We then know `start(subN)`. The best alignment of all previous subtitles is then computed with `get_rating(start(subN), n-1)`. So we look up the position for `subN-1` in that table with `n' = n-1` and `t' = start(subN)`. That way we get all corrected positions of all incorrect subtitles and are done! +## Library Documentation -Though this algorithm works (and was implemented in one of the early versions of `aligner`) it is neither fast nor space-efficient. Let's take a `45 minutes = 2700000 milliseconds < max_t` subtitle file which has about `n = 900 subtitles` (these are realistic values). We build a table of `max_t * n = 2430000000 ` entries. We can discard the ratings of the phase `n-1` after phase `n`, but we always need to store the positions of the subtitle `subN`. Let's assume we need 4 bytes to store them: we then have a table of `2430000000 * 4 bytes = 9720000000 bytes = 9 GB` of data in RAM!!! Even filling the table with zeros might take some noticeable time. But as it turns out we can compress that table in under 2 MB (most of the time; probably the best compression I've ever seen) with `delta encoding`. The empirical foundation is that the choices almost never change from one `t` to `t+1` (about 10 to 1000 times for one phase). If we always take the +[Open README](./alass-core/README.md) from `alass-core`. -- "leave choice", the position will always be `t+1` for every `t+1` (rise by 1) -- "nosplit-reposition choice", the position will always be `t+1-diffN` (rise by 1) -- "reposition choice", the position won't change from `t - 1` (constant) +## Notes -So if we store values in a `(start, delta, length)` tuple, where the first uncompressed value is `start + 0 * delta`, the second is `start + 1 * delta`, the third is `start + 2 * delta`, ..., the last is `start + length * delta`, we can compress an entire phase into a few bytes. The same thing is applicable to the ratings. Without going into details: if we take the overlapping rating of a incorrect subtitle to a reference subtitle and "move" the incorrect subtitle from the far left to the far right we will have five segments of compressed values (first the rating will be zero, then rise linearly, then be constant, then fall linearly, then be zero again). The comparisons/choices can then be done for rating segments instead of single `t`. This yields a speedup of at least one order of magnitude. +This program was called `aligner` in the past. This made it nearly impossible to find on a search engine, so `alass` was chosen instead. \ No newline at end of file diff --git a/alass-cli/Cargo.toml b/alass-cli/Cargo.toml new file mode 100644 index 0000000..e37bda2 --- /dev/null +++ b/alass-cli/Cargo.toml @@ -0,0 +1,44 @@ +[package] +name = "alass-cli" +version = "1.0.0" +authors = ["kaegi "] +description = "Automatic Language-Agnostic Subtitle Synchronization (Command Line Tool)" +repository = "https://github.com/kaegi/alass" +documentation = "https://docs.rs/alass-cli" +readme = "../README.md" +keywords = ["align", "subtitle", "automatic", "api", "tool"] +license = "GPL-3.0" +edition = "2018" + +[features] +default = ["ffmpeg-binary"] + +# this enables writing of statistics but incurs a performance penalty +# of 15% even when not used by the user of the program +statistics = ["alass-core/statistics"] + +# use exactly one of these two features +ffmpeg-binary = ["byteorder"] +# FFMPEG-LIB ffmpeg-library = ["ffmpeg-sys"] + + +[dependencies] +alass-core = { version = "1.0.0", path = "../alass-core" } +webrtc-vad = { version = "0.1.0", path = "../../webrtc-vad" } +subparse = { version = "0.6.0", path = "../../subparse" } + +clap = "2.33.0" +pbr = "1.0.0-alpha.2" +encoding_rs = "0.8.17" +libc = "0.2.60" +failure = "0.1.5" +log = "0.4.8" +env_logger = "0.6.2" +serde_json = "1.0.40" +serde = { version = "1.0.98", features = ["derive"] } +byteorder = { version = "1.3.2", optional = true } + +# FFMPEG-LIB [dependencies.ffmpeg-sys] +# FFMPEG-LIB optional = true +# FFMPEG-LIB git = "https://github.com/meh/rust-ffmpeg-sys" +# FFMPEG-LIB rev = "4f14151b9b8134f1f029d49d02cbea5c7337dedb" diff --git a/alass-cli/src/errors.rs b/alass-cli/src/errors.rs new file mode 100644 index 0000000..77e88fb --- /dev/null +++ b/alass-cli/src/errors.rs @@ -0,0 +1,215 @@ +// This file is part of the Rust library and binary `alass`. +// +// Copyright (C) 2017 kaegi +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use failure::{Backtrace, Context, Fail}; +use std::fmt; +use std::path::PathBuf; +use subparse::SubtitleFormat; + +#[macro_export] +macro_rules! define_error { + ($error:ident, $errorKind:ident) => { + #[derive(Debug)] + pub(crate) struct $error { + inner: Context<$errorKind>, + } + + impl Fail for $error { + fn name(&self) -> Option<&str> { + self.inner.name() + } + + fn cause(&self) -> Option<&Fail> { + self.inner.cause() + } + + fn backtrace(&self) -> Option<&Backtrace> { + self.inner.backtrace() + } + } + + impl fmt::Display for $error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + fmt::Display::fmt(&self.inner, f) + } + } + + #[allow(dead_code)] + impl $error { + pub fn kind(&self) -> &$errorKind { + self.inner.get_context() + } + } + + #[allow(dead_code)] + impl $errorKind { + pub fn into_error(self) -> $error { + $error { + inner: Context::new(self), + } + } + } + + impl From<$errorKind> for $error { + fn from(kind: $errorKind) -> $error { + $error { + inner: Context::new(kind), + } + } + } + + impl From> for $error { + fn from(inner: Context<$errorKind>) -> $error { + $error { inner: inner } + } + } + }; +} + +define_error!(InputFileError, InputFileErrorKind); + +#[derive(Clone, Eq, PartialEq, Debug, Fail)] +pub(crate) enum InputFileErrorKind { + VideoFile(PathBuf), + SubtitleFile(PathBuf), +} + +impl fmt::Display for InputFileErrorKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + InputFileErrorKind::VideoFile(p) => write!(f, "processing video file '{}' failed", p.display()), + InputFileErrorKind::SubtitleFile(p) => write!(f, "processing subtitle file '{}' failed", p.display()), + } + } +} + +define_error!(FileOperationError, FileOperationErrorKind); + +#[derive(Clone, Eq, PartialEq, Debug, Fail)] +pub(crate) enum FileOperationErrorKind { + FileOpen { path: PathBuf }, + FileRead { path: PathBuf }, + FileWrite { path: PathBuf }, +} + +impl fmt::Display for FileOperationErrorKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FileOperationErrorKind::FileOpen { path } => write!(f, "failed to open file '{}'", path.display()), + FileOperationErrorKind::FileRead { path } => write!(f, "failed to read file '{}'", path.display()), + FileOperationErrorKind::FileWrite { path } => write!(f, "failed to read file '{}'", path.display()), + } + } +} + +define_error!(InputVideoError, InputVideoErrorKind); + +#[derive(Clone, Eq, PartialEq, Debug, Fail)] +pub(crate) enum InputVideoErrorKind { + FailedToDecode { path: PathBuf }, + VadCreationFailed, + VadAnalysisFailed, +} + +impl fmt::Display for InputVideoErrorKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + InputVideoErrorKind::FailedToDecode { path } => { + write!(f, "failed to extract voice segments from file '{}'", path.display()) + } + InputVideoErrorKind::VadCreationFailed => write!(f, "failed to create voice-activity-detection module"), + InputVideoErrorKind::VadAnalysisFailed => write!(f, "failed to analyse audio segment for voice activity"), + } + } +} + +define_error!(InputSubtitleError, InputSubtitleErrorKind); + +#[derive(Clone, Eq, PartialEq, Debug, Fail)] +pub(crate) enum InputSubtitleErrorKind { + ReadingSubtitleFileFailed(PathBuf), + UnknownSubtitleFormat(PathBuf), + ParsingSubtitleFailed(PathBuf), + RetreivingSubtitleLinesFailed(PathBuf), +} + +impl fmt::Display for InputSubtitleErrorKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + InputSubtitleErrorKind::ReadingSubtitleFileFailed(path) => { + write!(f, "reading subtitle file '{}' failed", path.display()) + } + InputSubtitleErrorKind::UnknownSubtitleFormat(path) => { + write!(f, "unknown subtitle format for file '{}'", path.display()) + } + InputSubtitleErrorKind::ParsingSubtitleFailed(path) => { + write!(f, "parsing subtitle file '{}' failed", path.display()) + } + InputSubtitleErrorKind::RetreivingSubtitleLinesFailed(path) => { + write!(f, "retreiving subtitle file '{}' failed", path.display()) + } + } + } +} + +define_error!(InputArgumentsError, InputArgumentsErrorKind); + +#[derive(Clone, PartialEq, Debug, Fail)] +pub(crate) enum InputArgumentsErrorKind { + #[fail( + display = "expected value '{}' to be in range '{}'-'{}', found value '{}'", + argument_name, min, max, value + )] + ValueNotInRange { + argument_name: String, + min: f64, + max: f64, + value: f64, + }, + #[fail(display = "expected positive number for '{}', found '{}'", argument_name, value)] + ExpectedPositiveNumber { argument_name: String, value: i64 }, + + #[fail(display = "expected non-negative number for '{}', found '{}'", argument_name, value)] + ExpectedNonNegativeNumber { argument_name: String, value: f64 }, + + #[fail(display = "argument '{}' with value '{}' could not be parsed", argument_name, value)] + ArgumentParseError { argument_name: String, value: String }, +} + +define_error!(TopLevelError, TopLevelErrorKind); + +pub(crate) enum TopLevelErrorKind { + FileFormatMismatch { + input_file_path: PathBuf, + output_file_path: PathBuf, + input_file_format: SubtitleFormat, + }, + FailedToUpdateSubtitle, + FailedToGenerateSubtitleData, + FailedToInstantiateSubtitleFile, +} + +impl fmt::Display for TopLevelErrorKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + TopLevelErrorKind::FileFormatMismatch { input_file_path, output_file_path, input_file_format } => write!(f, "output file '{}' seems to have a different format than input file '{}' with format '{}' (this program does not perform conversions)", output_file_path.display(), input_file_path.display(), input_file_format.get_name()), + TopLevelErrorKind::FailedToUpdateSubtitle => write!(f, "failed to change lines in the subtitle"), + TopLevelErrorKind::FailedToGenerateSubtitleData => write!(f, "failed to generate data for subtitle"), + TopLevelErrorKind::FailedToInstantiateSubtitleFile => write!(f, "failed to instantiate subtitle file"), + } + } +} diff --git a/alass-cli/src/main.rs b/alass-cli/src/main.rs new file mode 100644 index 0000000..0561071 --- /dev/null +++ b/alass-cli/src/main.rs @@ -0,0 +1,818 @@ +// This file is part of the Rust library and binary `alass`. +// +// Copyright (C) 2017 kaegi +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#![allow(unknown_lints)] // for clippy + +// TODO: search for unsafe, panic, unimplemented + +extern crate clap; +extern crate encoding_rs; +extern crate pbr; +extern crate subparse; + +const PKG_VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION"); +const PKG_NAME: Option<&'static str> = option_env!("CARGO_PKG_NAME"); +const PKG_DESCRIPTION: Option<&'static str> = option_env!("CARGO_PKG_DESCRIPTION"); + +// Alg* stands for algorithm (the internal alass algorithm types) + +use crate::subparse::SubtitleFileInterface; + +use alass_core::{align, Statistics, TimeDelta as AlgTimeDelta, TimePoint as AlgTimePoint, TimeSpan as AlgTimeSpan}; +use clap::{App, Arg}; +use encoding_rs::Encoding; +use encoding_rs::UTF_8; +use failure::ResultExt; +use pbr::ProgressBar; +use std::cmp::{max, min}; +use std::ffi::OsStr; +use std::fs::File; +use std::io::{Read, Write}; +use std::path::{Path, PathBuf}; +use std::result::Result; +use std::str::FromStr; + +mod video_decoder; + +mod errors; +use errors::*; + +// subparse +use subparse::timetypes::*; +use subparse::{get_subtitle_format_err, parse_bytes, SubtitleEntry, SubtitleFile, SubtitleFormat}; + +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +enum VideoFileFormat { + /// we don't need to differentiate between video file formats in current code + NotImplemented, +} + +#[derive(Debug, Clone, PartialEq, Eq, Copy)] +enum InputFileFormat { + Subtitle(subparse::SubtitleFormat), + Video(VideoFileFormat), +} + +impl InputFileFormat { + fn subtitle_format(&self) -> Option { + match self { + InputFileFormat::Subtitle(f) => Some(*f), + _ => None, + } + } +} + +struct ProgressInfo { + prescaler: i64, + counter: i64, + progress_bar: Option>, +} + +impl ProgressInfo { + fn new(prescaler: i64) -> ProgressInfo { + ProgressInfo { + prescaler, + counter: 0, + progress_bar: None, + } + } +} + +impl alass_core::ProgressHandler for ProgressInfo { + fn init(&mut self, steps: i64) { + self.progress_bar = Some(ProgressBar::new(steps as u64)); + } + fn inc(&mut self) { + self.counter = self.counter + 1; + if self.counter == self.prescaler { + self.progress_bar.as_mut().unwrap().inc(); + self.counter = 0; + } + } + fn finish(&mut self) { + self.progress_bar.as_mut().unwrap().finish_println("\n"); + } +} + +impl video_decoder::ProgressHandler for ProgressInfo { + fn init(&mut self, steps: i64) { + self.progress_bar = Some(ProgressBar::new((steps / self.prescaler) as u64)); + } + fn inc(&mut self) { + self.counter = self.counter + 1; + if self.counter == self.prescaler { + self.progress_bar.as_mut().unwrap().inc(); + self.counter = 0; + } + } + fn finish(&mut self) { + self.progress_bar.as_mut().unwrap().finish_println("\n"); + } +} + +fn read_file_to_bytes(path: &Path) -> std::result::Result, FileOperationError> { + let mut file = File::open(path).with_context(|_| FileOperationErrorKind::FileOpen { + path: path.to_path_buf(), + })?; + let mut v = Vec::new(); + file.read_to_end(&mut v) + .with_context(|_| FileOperationErrorKind::FileRead { + path: path.to_path_buf(), + })?; + Ok(v) +} + +fn write_data_to_file(path: &Path, d: Vec) -> std::result::Result<(), FileOperationError> { + let mut file = File::create(path).with_context(|_| FileOperationErrorKind::FileOpen { + path: path.to_path_buf(), + })?; + file.write_all(&d).with_context(|_| FileOperationErrorKind::FileWrite { + path: path.to_path_buf(), + })?; + Ok(()) +} + +fn timing_to_alg_timepoint(t: TimePoint, interval: i64) -> AlgTimePoint { + assert!(interval > 0); + AlgTimePoint::from(t.msecs() / interval) +} + +fn alg_delta_to_delta(t: AlgTimeDelta, interval: i64) -> TimeDelta { + assert!(interval > 0); + let time_int: i64 = t.into(); + TimeDelta::from_msecs(time_int * interval) +} + +fn timings_to_alg_timespans(v: &[TimeSpan], interval: i64) -> Vec { + v.iter() + .cloned() + .map(|timespan| { + AlgTimeSpan::new_safe( + timing_to_alg_timepoint(timespan.start, interval), + timing_to_alg_timepoint(timespan.end, interval), + ) + }) + .collect() +} +fn alg_deltas_to_timing_deltas(v: &[AlgTimeDelta], interval: i64) -> Vec { + v.iter().cloned().map(|x| alg_delta_to_delta(x, interval)).collect() +} + +/// Groups consecutive timespans with the same delta together. +fn get_subtitle_delta_groups(mut v: Vec<(AlgTimeDelta, TimeSpan)>) -> Vec<(AlgTimeDelta, Vec)> { + v.sort_by_key(|t| min((t.1).start, (t.1).end)); + + let mut result: Vec<(AlgTimeDelta, Vec)> = Vec::new(); + + for (delta, original_timespan) in v { + let mut new_block = false; + + if let Some(last_tuple_ref) = result.last_mut() { + if delta == last_tuple_ref.0 { + last_tuple_ref.1.push(original_timespan); + } else { + new_block = true; + } + } else { + new_block = true; + } + + if new_block { + result.push((delta, vec![original_timespan])); + } + } + + result +} + +/// Does reading, parsing and nice error handling for a f64 clap parameter. +fn unpack_clap_number_f64( + matches: &clap::ArgMatches, + parameter_name: &'static str, +) -> Result { + let paramter_value_str: &str = matches.value_of(parameter_name).unwrap(); + f64::from_str(paramter_value_str) + .with_context(|_| { + InputArgumentsErrorKind::ArgumentParseError { + argument_name: parameter_name.to_string(), + value: paramter_value_str.to_string(), + } + .into() + }) + .map_err(|e| InputArgumentsError::from(e)) +} + +/// Does reading, parsing and nice error handling for a f64 clap parameter. +fn unpack_clap_number_i64( + matches: &clap::ArgMatches, + parameter_name: &'static str, +) -> Result { + let paramter_value_str: &str = matches.value_of(parameter_name).unwrap(); + i64::from_str(paramter_value_str) + .with_context(|_| { + InputArgumentsErrorKind::ArgumentParseError { + argument_name: parameter_name.to_string(), + value: paramter_value_str.to_string(), + } + .into() + }) + .map_err(|e| InputArgumentsError::from(e)) +} + +fn get_encoding(opt: Option<&str>) -> &'static Encoding { + match opt { + None => UTF_8, + Some(label) => { + match Encoding::for_label(label.as_bytes()) { + None => { + panic!("{} is not a known encoding label; exiting.", label); // TODO: error handling + } + Some(encoding) => encoding, + } + } + } +} + +struct InputFileHandler { + subtitle_file: Option, + file_format: InputFileFormat, + subparse_timespans: Vec, + aligner_timespans: Vec, +} + +impl InputFileHandler { + pub fn open( + file_path: &Path, + interval: i64, + sub_encoding: &'static Encoding, + sub_fps: f64, + ) -> Result { + let known_subitle_endings: [&str; 6] = ["srt", "vob", "idx", "ass", "ssa", "sub"]; + + let extension: Option<&OsStr> = file_path.extension(); + + for subtitle_ending in known_subitle_endings.into_iter() { + if extension == Some(OsStr::new(subtitle_ending)) { + return Ok(Self::open_sub_file(file_path, interval, sub_encoding, sub_fps) + .with_context(|_| InputFileErrorKind::SubtitleFile(file_path.to_path_buf()))?); + } + } + + return Ok(Self::open_video_file(file_path, interval) + .with_context(|_| InputFileErrorKind::VideoFile(file_path.to_path_buf()))?); + } + + pub fn open_video_file(file_path: &Path, interval: i64) -> Result { + //video_decoder::VideoDecoder::decode(file_path, ); + use webrtc_vad::*; + + struct WebRtcFvad { + fvad: Vad, + vad_buffer: Vec, + } + + impl video_decoder::AudioReceiver for WebRtcFvad { + type Output = Vec; + type Error = InputVideoError; + + fn push_samples(&mut self, samples: &[i16]) -> Result<(), InputVideoError> { + // the chunked audio receiver should only provide 10ms of 8000kHz -> 80 samples + assert!(samples.len() == 80); + + let is_voice = self + .fvad + .is_voice_segment(samples) + .map_err(|_| InputVideoErrorKind::VadAnalysisFailed)?; + + self.vad_buffer.push(is_voice); + + Ok(()) + } + + fn finish(self) -> Result, InputVideoError> { + Ok(self.vad_buffer) + } + } + + let vad_processor = WebRtcFvad { + fvad: Vad::new(8000).map_err(|_| InputVideoErrorKind::VadCreationFailed)?, + vad_buffer: Vec::new(), + }; + + let chunk_processor = video_decoder::ChunkedAudioReceiver::new(80, vad_processor); + + println!("extracting audio from reference file '{}'...", file_path.display()); + let vad_buffer = video_decoder::VideoDecoder::decode(file_path, chunk_processor, ProgressInfo::new(500)) + .with_context(|_| InputVideoErrorKind::FailedToDecode { + path: PathBuf::from(file_path), + })?; + + let mut voice_segments: Vec<(i64, i64)> = Vec::new(); + let mut voice_segment_start: i64 = 0; + let mut last_was_voice_segment = false; + + for (i, is_voice_segment) in vad_buffer.into_iter().chain(std::iter::once(false)).enumerate() { + match (last_was_voice_segment, is_voice_segment) { + (false, false) | (true, true) => {} + (false, true) => { + voice_segment_start = i as i64; + } + (true, false) => { + voice_segments.push((voice_segment_start, i as i64 - 1)); + } + } + + last_was_voice_segment = is_voice_segment; + } + + let min_span_length_ms = 200; + + let subparse_timespans: Vec = voice_segments + .into_iter() + .filter(|&(start, end)| start + min_span_length_ms / 10 < end) + .map(|(start, end)| { + subparse::timetypes::TimeSpan::new( + subparse::timetypes::TimePoint::from_msecs(start * 10), + subparse::timetypes::TimePoint::from_msecs(end * 10), + ) + }) + .collect(); + + let aligner_timespans: Vec = timings_to_alg_timespans(&subparse_timespans, interval); + + Ok(InputFileHandler { + file_format: InputFileFormat::Video(VideoFileFormat::NotImplemented), + subparse_timespans, + aligner_timespans, + subtitle_file: None, + }) + } + + pub fn open_sub_file( + file_path: &Path, + interval: i64, + sub_encoding: &'static Encoding, + sub_fps: f64, + ) -> Result { + let sub_data = read_file_to_bytes(file_path.as_ref()) + .with_context(|_| InputSubtitleErrorKind::ReadingSubtitleFileFailed(file_path.to_path_buf()))?; + + let file_format = get_subtitle_format_err(file_path.extension(), &sub_data) + .with_context(|_| InputSubtitleErrorKind::UnknownSubtitleFormat(file_path.to_path_buf()))?; + + let parsed_subtitle_data: SubtitleFile = parse_bytes(file_format, &sub_data, sub_encoding, sub_fps) + .with_context(|_| InputSubtitleErrorKind::ParsingSubtitleFailed(file_path.to_path_buf()))?; + + let subparse_timespans: Vec = parsed_subtitle_data + .get_subtitle_entries() + .with_context(|_| InputSubtitleErrorKind::RetreivingSubtitleLinesFailed(file_path.to_path_buf()))? + .into_iter() + .map(|subentry| subentry.timespan) + .map(|timespan: subparse::timetypes::TimeSpan| { + TimeSpan::new(min(timespan.start, timespan.end), max(timespan.start, timespan.end)) + }) + .collect(); + + let aligner_timespans: Vec = timings_to_alg_timespans(&subparse_timespans, interval); + + Ok(InputFileHandler { + file_format: InputFileFormat::Subtitle(file_format), + subparse_timespans, + aligner_timespans, + subtitle_file: Some(parsed_subtitle_data), + }) + } + + pub fn into_subtitle_file(self) -> Option { + self.subtitle_file + } + + pub fn subparse_timespans(&self) -> &[subparse::timetypes::TimeSpan] { + self.subparse_timespans.as_slice() + } + + pub fn aligner_timespans(&self) -> &[alass_core::TimeSpan] { + self.aligner_timespans.as_slice() + } + + pub fn file_format(&self) -> InputFileFormat { + self.file_format + } +} + +// ////////////////////////////////////////////////////////////////////////////////////////////////// + +struct Arguments { + reference_file_path: PathBuf, + incorrect_file_path: PathBuf, + output_file_path: PathBuf, + + statistics_folder_path_opt: Option, + statistics_required_tags: Vec, + + interval: i64, + + split_penalty: f64, + + sub_fps_inc: f64, + sub_fps_ref: f64, + + allow_negative_timestamps: bool, + encoding_ref: &'static Encoding, + encoding_inc: &'static Encoding, + + no_split_mode: bool, + speed_optimization: Option, +} + +fn parse_args() -> Result { + let matches = App::new(PKG_NAME.unwrap_or("unkown (not compiled with cargo)")) + .version(PKG_VERSION.unwrap_or("unknown (not compiled with cargo)")) + .about(PKG_DESCRIPTION.unwrap_or("unknown (not compiled with cargo)")) + .arg(Arg::with_name("reference-file") + .help("Path to the reference subtitle or video file") + .required(true)) + .arg(Arg::with_name("incorrect-sub-file") + .help("Path to the incorrect subtitle file") + .required(true)) + .arg(Arg::with_name("output-file-path") + .help("Path to corrected subtitle file") + .required(true)) + .arg(Arg::with_name("split-penalty") + .short("p") + .long("split-penalty") + .value_name("floating point number from 0 to 100") + .help("Determines how eager the algorithm is to avoid splitting of the subtitles. 100 means that all lines will be shifted by the same offset, while 0 will produce MANY segments with different offsets. Values from 0.1 to 20 are the most useful.") + .default_value("4")) + .arg(Arg::with_name("interval") + .short("i") + .long("interval") + .value_name("integer in milliseconds") + .help("The smallest recognized time interval, smaller numbers make the alignment more accurate, greater numbers make aligning faster.") + .default_value("1")) + .arg(Arg::with_name("allow-negative-timestamps") + .short("n") + .long("allow-negative-timestamps") + .help("Negative timestamps can lead to problems with the output file, so by default 0 will be written instead. This option allows you to disable this behavior.")) + .arg(Arg::with_name("sub-fps-ref") + .long("sub-fps-ref") + .value_name("floating-point number in frames-per-second") + .default_value("30") + .help("Specifies the frames-per-second for the accompanying video of MicroDVD `.sub` files (MicroDVD `.sub` files store timing information as frame numbers). Only affects the reference subtitle file.")) + .arg(Arg::with_name("sub-fps-inc") + .long("sub-fps-inc") + .value_name("floating-point number in frames-per-second") + .default_value("30") + .help("Specifies the frames-per-second for the accompanying video of MicroDVD `.sub` files (MicroDVD `.sub` files store timing information as frame numbers). Only affects the incorrect subtitle file.")) + .arg(Arg::with_name("encoding-ref") + .long("encoding-ref") + .value_name("encoding") + .help("Charset encoding of the reference subtitle file.")) + .arg(Arg::with_name("encoding-inc") + .long("encoding-inc") + .value_name("encoding") + .help("Charset encoding of the incorrect subtitle file.")) + .arg(Arg::with_name("statistics-path") + .long("statistics-path") + .short("s") + .value_name("path") + .help("enable statistics and put files in the specified folder") + .required(false) + ) + .arg(Arg::with_name("speed-optimization") + .long("speed-optimization") + .short("O") + .value_name("path") + .default_value("2") + .help("(greatly) speeds up synchronization by sacrificing some accuracy; set to 0 to disable speed optimization") + .required(false) + ) + .arg(Arg::with_name("statistics-required-tag") + .long("statistics-required-tag") + .short("t") + .value_name("tag") + .help("only output statistics containing this tag (you can find the tags in statistics file)") + .required(false) + ) + .arg(Arg::with_name("no-split") + .help("synchronize subtitles without looking for splits/breaks - this mode is much faster") + .short("l") + .long("no-split") + ) + .after_help("This program works with .srt, .ass/.ssa, .idx and .sub files. The corrected file will have the same format as the incorrect file.") + .get_matches(); + + let reference_file_path: PathBuf = matches.value_of("reference-file").unwrap().into(); + let incorrect_file_path: PathBuf = matches.value_of("incorrect-sub-file").unwrap().into(); + let output_file_path: PathBuf = matches.value_of("output-file-path").unwrap().into(); + + let statistics_folder_path_opt: Option = matches.value_of("statistics-path").map(|v| PathBuf::from(v)); + let statistics_required_tags: Vec = matches + .values_of("statistics-required-tag") + .map(|iter| iter.map(|s| s.to_string()).collect::>()) + .unwrap_or_else(|| Vec::new()); + + let interval: i64 = unpack_clap_number_i64(&matches, "interval")?; + if interval < 1 { + return Err(InputArgumentsErrorKind::ExpectedPositiveNumber { + argument_name: "interval".to_string(), + value: interval, + } + .into()); + } + + let split_penalty: f64 = unpack_clap_number_f64(&matches, "split-penalty")?; + if split_penalty < 0.0 || split_penalty > 100.0 { + return Err(InputArgumentsErrorKind::ValueNotInRange { + argument_name: "interval".to_string(), + value: split_penalty, + min: 0.0, + max: 100.0, + } + .into()); + } + + let speed_optimization: f64 = unpack_clap_number_f64(&matches, "speed-optimization")?; + if split_penalty < 0.0 { + return Err(InputArgumentsErrorKind::ExpectedNonNegativeNumber { + argument_name: "speed-optimization".to_string(), + value: speed_optimization, + } + .into()); + } + + let no_split_mode: bool = matches.is_present("no-split"); + + Ok(Arguments { + reference_file_path, + incorrect_file_path, + output_file_path, + statistics_folder_path_opt, + statistics_required_tags, + interval, + split_penalty, + sub_fps_ref: unpack_clap_number_f64(&matches, "sub-fps-ref")?, + sub_fps_inc: unpack_clap_number_f64(&matches, "sub-fps-inc")?, + allow_negative_timestamps: matches.is_present("allow-negative-timestamps"), + encoding_ref: get_encoding(matches.value_of("encoding-ref")), + encoding_inc: get_encoding(matches.value_of("encoding-inc")), + no_split_mode, + speed_optimization: if speed_optimization <= 0. { + None + } else { + Some(speed_optimization) + }, + }) +} + +// ////////////////////////////////////////////////////////////////////////////////////////////////// + +fn run() -> Result<(), failure::Error> { + let args = parse_args()?; + + let ref_file = InputFileHandler::open( + &args.reference_file_path, + args.interval, + args.encoding_ref, + args.sub_fps_ref, + )?; + + if args.incorrect_file_path.eq(OsStr::new("_")) { + // DEBUG MODE FOR REFERENCE FILE WAS ACTIVATED + + println!("input file path was given as '_'"); + println!("the output file is a .srt file only containing timing information from the reference file"); + println!("this can be used as a debugging tool"); + println!(); + + let lines: Vec<(subparse::timetypes::TimeSpan, String)> = ref_file + .subparse_timespans() + .iter() + .cloned() + .enumerate() + .map(|(i, time_span)| (time_span, format!("line {}", i))) + .collect(); + + let debug_file = + subparse::SrtFile::create(lines).with_context(|_| TopLevelErrorKind::FailedToInstantiateSubtitleFile)?; + + write_data_to_file( + &args.output_file_path, + debug_file.to_data().unwrap(), // error handling + )?; + + return Ok(()); + } + + let inc_file = InputFileHandler::open_sub_file( + args.incorrect_file_path.as_path(), + args.interval, + args.encoding_inc, + args.sub_fps_inc, + )?; + + let output_file_format; + + match inc_file.file_format() { + InputFileFormat::Subtitle(f) => output_file_format = f, + InputFileFormat::Video(_) => unreachable!(), + } + + // this program internally stores the files in a non-destructable way (so + // formatting is preserved) but has no abilty to convert between formats + if !subparse::is_valid_extension_for_subtitle_format(args.output_file_path.extension(), output_file_format) { + return Err(TopLevelErrorKind::FileFormatMismatch { + input_file_path: args.incorrect_file_path, + output_file_path: args.output_file_path, + input_file_format: inc_file.file_format().subtitle_format().unwrap(), + } + .into_error() + .into()); + } + + let statistics_module_opt: Option; + if let Some(statistics_folder_path) = args.statistics_folder_path_opt { + statistics_module_opt = Some(Statistics::new(statistics_folder_path, args.statistics_required_tags)); + } else { + statistics_module_opt = None; + } + + println!( + "synchronizing '{}' to reference file '{}'...", + args.incorrect_file_path.display(), + args.reference_file_path.display() + ); + let alg_deltas; + if args.no_split_mode { + let alg_delta = alass_core::align_nosplit( + inc_file.aligner_timespans().to_vec(), + ref_file.aligner_timespans().to_vec(), + Some(Box::new(ProgressInfo::new(1))), + statistics_module_opt, + ); + alg_deltas = std::vec::from_elem(alg_delta, inc_file.aligner_timespans().len()); + } else { + alg_deltas = align( + inc_file.aligner_timespans().to_vec(), + ref_file.aligner_timespans().to_vec(), + args.split_penalty / 100.0, + args.speed_optimization, + Some(Box::new(ProgressInfo::new(1))), + statistics_module_opt, + ); + } + let deltas = alg_deltas_to_timing_deltas(&alg_deltas, args.interval); + + // group subtitles lines which have the same offset + let shift_groups: Vec<(AlgTimeDelta, Vec)> = get_subtitle_delta_groups( + alg_deltas + .iter() + .cloned() + .zip(inc_file.subparse_timespans().iter().cloned()) + .collect(), + ); + + for (shift_group_delta, shift_group_lines) in shift_groups { + // computes the first and last timestamp for all lines with that delta + // -> that way we can provide the user with an information like + // "100 subtitles with 10min length" + let min = shift_group_lines + .iter() + .map(|subline| subline.start) + .min() + .expect("a subtitle group should have at least one subtitle line"); + let max = shift_group_lines + .iter() + .map(|subline| subline.start) + .max() + .expect("a subtitle group should have at least one subtitle line"); + + println!( + "shifted block of {} subtitles with length {} by {}", + shift_group_lines.len(), + max - min, + alg_delta_to_delta(shift_group_delta, args.interval) + ); + } + + println!(); + + if ref_file.subparse_timespans().is_empty() { + println!("warn: reference file has no subtitle lines"); + println!(); + } + if inc_file.subparse_timespans().is_empty() { + println!("warn: file with incorrect subtitles has no lines"); + println!(); + } + + let mut corrected_timespans: Vec = inc_file + .subparse_timespans() + .iter() + .zip(deltas.iter()) + .map(|(×pan, &delta)| timespan + delta) + .collect(); + + if corrected_timespans.iter().any(|ts| ts.start.is_negative()) { + println!("warn: some subtitles now have negative timings, which can cause invalid subtitle files"); + if args.allow_negative_timestamps { + println!( + "warn: negative timestamps will be written to file, because you passed '-n' or '--allow-negative-timestamps'", + ); + } else { + println!( + "warn: negative subtitles will therefore moved to the start of the subtitle file by default; pass '-n' or '--allow-negative-timestamps' to disable this behavior", + ); + + for corrected_timespan in &mut corrected_timespans { + if corrected_timespan.start.is_negative() { + let offset = subparse::timetypes::TimePoint::from_secs(0) - corrected_timespan.start; + corrected_timespan.start = corrected_timespan.start + offset; + corrected_timespan.end = corrected_timespan.end + offset; + } + } + } + println!(); + } + + // .idx only has start timepoints (the subtitle is shown until the next subtitle starts) - so retiming with gaps might + // produce errors + if output_file_format == SubtitleFormat::VobSubIdx { + println!("warn: writing to an '.idx' file can lead to unexpected results due to restrictions of this format"); + } + + // incorrect file -> correct file + let shifted_timespans: Vec = corrected_timespans + .into_iter() + .map(|timespan| SubtitleEntry::from(timespan)) + .collect(); + + // write corrected files + let mut correct_file = inc_file + .into_subtitle_file() + .expect("incorrect input file can only be a subtitle") + .clone(); + correct_file + .update_subtitle_entries(&shifted_timespans) + .with_context(|_| TopLevelErrorKind::FailedToUpdateSubtitle)?; + + write_data_to_file( + &args.output_file_path, + correct_file + .to_data() + .with_context(|_| TopLevelErrorKind::FailedToGenerateSubtitleData)?, + )?; + + Ok(()) +} + +// ////////////////////////////////////////////////////////////////////////////////////////////////// + +fn main() { + env_logger::init(); + + match run() { + Ok(_) => std::process::exit(0), + Err(error) => { + let show_bt_opt = std::env::vars() + .find(|(key, _)| key == "RUST_BACKTRACE") + .map(|(_, value)| value); + let show_bt = show_bt_opt != None && show_bt_opt != Some("0".to_string()); + + println!("error: {}", error); + if show_bt { + println!("stack trace: {}", error.backtrace()); + } + + for cause in error.as_fail().iter_causes() { + println!("caused by: {}", cause); + if show_bt { + if let Some(backtrace) = cause.backtrace() { + println!("stack trace: {}", backtrace); + } + } + } + + if !show_bt { + println!(""); + println!("not: run with environment variable 'RUST_BACKTRACE=1' for detailed stack traces"); + } + + std::process::exit(1) + } + } +} diff --git a/alass-cli/src/video_decoder/ffmpeg_binary.rs b/alass-cli/src/video_decoder/ffmpeg_binary.rs new file mode 100644 index 0000000..dcca23b --- /dev/null +++ b/alass-cli/src/video_decoder/ffmpeg_binary.rs @@ -0,0 +1,419 @@ +use failure::{Backtrace, Context, Fail, ResultExt}; +use log::debug; +use std::ffi::OsString; +use std::fmt; +use std::io::Read; +use std::path::{Path, PathBuf}; +use std::process::Child; +use std::process::{ChildStdout, Command, Output, Stdio}; +use std::str::from_utf8; + +use byteorder::ByteOrder; +use serde::{Deserialize, Deserializer}; + +use crate::define_error; + +#[derive(Debug, PartialEq, Eq)] +pub enum CodecType { + Audio, + Video, + Subtitle, + Other(String), +} + +impl<'de> Deserialize<'de> for CodecType { + fn deserialize>(d: D) -> Result { + let s = String::deserialize(d)?; + match &s[..] { + "audio" => Ok(CodecType::Audio), + "video" => Ok(CodecType::Video), + "subtitle" => Ok(CodecType::Subtitle), + s => Ok(CodecType::Other(s.to_owned())), + } + } +} + +#[derive(Debug, Deserialize)] +struct Stream { + pub index: usize, + pub codec_long_name: String, + pub channels: Option, + pub duration: String, + pub codec_type: CodecType, +} + +/// Metadata associated with a video. +#[derive(Debug, Deserialize)] +struct Metadata { + streams: Vec, +} + +define_error!(DecoderError, DecoderErrorKind); + +#[derive(Debug, Fail)] +pub(crate) enum DecoderErrorKind { + FailedToDecodeVideoStreamInfo, + ExtractingMetadataFailed { + cmd_path: PathBuf, + file_path: PathBuf, + args: Vec, + }, + NoAudioStream { + path: PathBuf, + }, + FailedExtractingAudio { + file_path: PathBuf, + cmd_path: PathBuf, + args: Vec, + }, + FailedSpawningSubprocess { + path: PathBuf, + args: Vec, + }, + WaitingForProcessFailed { + cmd_path: PathBuf, + }, + ProcessErrorCode { + cmd_path: PathBuf, + code: Option, + }, + ProcessErrorMessage { + msg: String, + }, + DeserializingMetadataFailed { + path: PathBuf, + }, + ReadError, + FailedToParseDuration { + s: String, + }, + AudioSegmentProcessingFailed, +} + +fn format_cmd(cmd_path: &PathBuf, args: &[OsString]) -> String { + let args_string: String = args + .iter() + .map(|x| format!("{}", x.to_string_lossy())) + .collect::>() + .join(" "); + format!("{} {}", cmd_path.display(), args_string) +} + +impl fmt::Display for DecoderErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + DecoderErrorKind::FailedToDecodeVideoStreamInfo => write!(f, "failed to decode video stream info"), + DecoderErrorKind::DeserializingMetadataFailed { path } => { + write!(f, "failed to deserialize metadata of file '{}'", path.display()) + } + DecoderErrorKind::NoAudioStream { path } => write!(f, "no audio stream in file '{}'", path.display()), + DecoderErrorKind::FailedExtractingAudio { + file_path, + cmd_path, + args, + } => write!( + f, + "failed to extract audio from '{}' with '{}' ", + file_path.display(), + format_cmd(cmd_path, args) + ), + DecoderErrorKind::FailedSpawningSubprocess { path, args } => { + write!(f, "failed to spawn subprocess '{}' ", format_cmd(path, args)) + } + DecoderErrorKind::WaitingForProcessFailed { cmd_path } => { + write!(f, "failed to check status of subprocess '{}'", cmd_path.display()) + } + DecoderErrorKind::ProcessErrorCode { cmd_path, code } => write!( + f, + "process '{}' returned error code '{}'", + cmd_path.display(), + code.map(|x| x.to_string()) + .unwrap_or_else(|| String::from("interrupted?")) + ), + DecoderErrorKind::ProcessErrorMessage { msg } => write!(f, "stderr: {}", msg), + DecoderErrorKind::ExtractingMetadataFailed { + file_path, + cmd_path, + args, + } => write!( + f, + "failed to extract metadata from '{}' using command '{}'", + file_path.display(), + format_cmd(cmd_path, args) + ), + DecoderErrorKind::ReadError => write!(f, "error while reading stdout"), + DecoderErrorKind::FailedToParseDuration { s } => { + write!(f, "failed to parse duration string '{}' from metadata", s) + } + DecoderErrorKind::AudioSegmentProcessingFailed => write!(f, "processing audio segment failed"), + } + } +} + +trait IntoOk { + fn into_ok(self) -> Result; +} +impl IntoOk for T { + fn into_ok(self) -> Result { + Ok(self) + } +} + +pub struct VideoDecoderFFmpegBinary {} + +static PROGRESS_PRESCALER: i64 = 200; + +impl VideoDecoderFFmpegBinary { + /// Samples are pushed in 8kHz mono/single-channel format. + pub(crate) fn decode( + file_path: impl AsRef, + receiver: impl super::AudioReceiver, + mut progress_handler: impl super::ProgressHandler, + ) -> Result { + let file_path_buf: PathBuf = file_path.as_ref().into(); + + let args = vec![ + OsString::from("-v"), + OsString::from("error"), + OsString::from("-show_streams"), + OsString::from("-of"), + OsString::from("json"), + OsString::from(file_path.as_ref()), + ]; + + let ffprobe_path: PathBuf = std::env::var_os("ALASS_FFPROBE_PATH") + .unwrap_or(OsString::from("ffprobe")) + .into(); + + let best_stream_opt: Option = Self::get_metadata(file_path_buf.clone(), ffprobe_path.clone(), &args) + .with_context(|_| DecoderErrorKind::ExtractingMetadataFailed { + file_path: file_path_buf.clone(), + cmd_path: ffprobe_path.clone(), + args: args, + })? + .streams + .into_iter() + .filter(|s| s.codec_type == CodecType::Audio && s.channels.is_some()) + .min_by_key(|s| s.channels.unwrap()); + + let best_stream: Stream; + match best_stream_opt { + Some(x) => best_stream = x, + None => { + return Err(DecoderError::from(DecoderErrorKind::NoAudioStream { + path: file_path.as_ref().into(), + })) + } + } + + let ffmpeg_path: PathBuf = std::env::var_os("ALASS_FFMPEG_PATH") + .unwrap_or(OsString::from("ffprobe")) + .into(); + + let args: Vec = vec![ + // only print errors + OsString::from("-v"), + OsString::from("error"), + // "yes" -> disables user interaction + OsString::from("-y"), + // input file + OsString::from("-i"), + file_path.as_ref().into(), + // select stream + OsString::from("-map"), + format!("0:{}", best_stream.index).into(), + // audio codec: 16-bit signed little endian + OsString::from("-acodec"), + OsString::from("pcm_s16le"), + // resample to 8khz + OsString::from("-ar"), + OsString::from("8000"), + // resample to single channel + OsString::from("-ac"), + OsString::from("1"), + // output 16-bit signed little endian stream directly (no wav, etc.) + OsString::from("-f"), + OsString::from("s16le"), + // output to stdout pipe + OsString::from("-"), + ]; + + let duration = + best_stream + .duration + .parse::() + .with_context(|_| DecoderErrorKind::FailedToParseDuration { + s: best_stream.duration, + })?; + let num_samples: i64 = (duration * 8000.0) as i64 / PROGRESS_PRESCALER; + + progress_handler.init(num_samples); + + return Self::extract_audio_stream(receiver, progress_handler, ffmpeg_path.clone(), &args) + .with_context(|_| DecoderErrorKind::FailedExtractingAudio { + file_path: file_path_buf.clone(), + cmd_path: ffmpeg_path.clone(), + args: args, + })? + .into_ok(); + } + + fn extract_audio_stream( + mut receiver: impl super::AudioReceiver, + mut progress_handler: impl super::ProgressHandler, + ffmpeg_path: PathBuf, + args: &[OsString], + ) -> Result { + let mut ffmpeg_process: Child = Command::new(ffmpeg_path.clone()) + .args(args) + .stdin(Stdio::null()) + .stderr(Stdio::piped()) + .stdout(Stdio::piped()) + .spawn() + .with_context(|_| DecoderErrorKind::FailedSpawningSubprocess { + path: ffmpeg_path.clone(), + args: args.to_vec(), + })?; + + let mut stdout: ChildStdout = ffmpeg_process.stdout.take().unwrap(); + + enum ParserState { + Start, + SingleByte(u8), + } + + let mut data: Vec = std::vec::from_elem(0, 200 * 1024 * 1024); + let data2_cap = 1024 * 1024; + let mut data2: Vec = Vec::with_capacity(data2_cap); + let mut parser_state: ParserState = ParserState::Start; + let mut progress_prescaler_counter = 0; + + loop { + // improves performance by allowing ffmpeg to generate more data in pipe + // TODO: an async tokio read might also have the same effect (without being as machine dependent) + // -> too low: does not do anything (+some otherhead) + // -> too high: slows down computaton because ffmpeg has to wait for this process to read + //std::thread::sleep(Duration::from_nanos(1000)); + + let read_bytes = stdout.read(&mut data).with_context(|_| DecoderErrorKind::ReadError)?; + //println!("{}", read_bytes); + + if read_bytes == 0 { + match ffmpeg_process + .wait() + .with_context(|_| DecoderErrorKind::WaitingForProcessFailed { + cmd_path: ffmpeg_path.clone(), + })? + .code() + { + Some(0) => { + receiver + .push_samples(&data2) + .with_context(|_| DecoderErrorKind::AudioSegmentProcessingFailed)?; + data2.clear(); + progress_handler.finish(); + return Ok(receiver + .finish() + .with_context(|_| DecoderErrorKind::AudioSegmentProcessingFailed)?); + } + code @ Some(_) | code @ None => { + let error_code_err: DecoderErrorKind = DecoderErrorKind::ProcessErrorCode { + cmd_path: ffmpeg_path, + code: code, + }; + + let mut stderr_data = Vec::new(); + ffmpeg_process + .stderr + .unwrap() + .read_to_end(&mut stderr_data) + .with_context(|_| DecoderErrorKind::ReadError)?; + + let stderr_str: String = String::from_utf8_lossy(&stderr_data).into(); + + if stderr_str.is_empty() { + return Err(error_code_err.into()); + } else { + return Err(DecoderError::from(DecoderErrorKind::ProcessErrorMessage { + msg: stderr_str, + })) + .with_context(|_| error_code_err) + .map_err(|x| DecoderError::from(x)); + } + } + } + } + + for &byte in &data[0..read_bytes] { + match parser_state { + ParserState::Start => parser_state = ParserState::SingleByte(byte), + ParserState::SingleByte(last_byte) => { + let two_bytes = [last_byte, byte]; + let sample = byteorder::LittleEndian::read_i16(&two_bytes); + receiver + .push_samples(&[sample]) + .with_context(|_| DecoderErrorKind::AudioSegmentProcessingFailed)?; + + if progress_prescaler_counter == PROGRESS_PRESCALER { + progress_handler.inc(); + progress_prescaler_counter = 0; + } + + progress_prescaler_counter = progress_prescaler_counter + 1; + + /*data2.push(sample); + if data2.len() == data2_cap { + receiver.push_samples(&data2); + data2.clear(); + }*/ + parser_state = ParserState::Start; + } + } + } + } + } + + fn get_metadata(file_path: PathBuf, ffprobe_path: PathBuf, args: &[OsString]) -> Result { + let ffprobe_process: Output = Command::new(ffprobe_path.clone()) + .args(args) + .stdin(Stdio::null()) + .stderr(Stdio::piped()) + .stdout(Stdio::piped()) + .output() + .with_context(|_| DecoderErrorKind::FailedSpawningSubprocess { + path: ffprobe_path.clone(), + args: args.to_vec(), + })?; + + if !ffprobe_process.status.success() { + let stderr: String = String::from_utf8_lossy(&ffprobe_process.stderr) + .to_string() + .trim_end() + .to_string(); + + let err = DecoderErrorKind::ProcessErrorCode { + cmd_path: ffprobe_path.clone(), + code: ffprobe_process.status.code(), + }; + + if stderr.is_empty() { + return Err(DecoderError::from(err)); + } else { + return Err(DecoderError::from(DecoderErrorKind::ProcessErrorMessage { + msg: stderr, + })) + .with_context(|_| err) + .map_err(|x| DecoderError::from(x)); + } + } + + let stdout = + from_utf8(&ffprobe_process.stdout).with_context(|_| DecoderErrorKind::FailedToDecodeVideoStreamInfo)?; + debug!("Video metadata: {}", stdout); + + let metadata: Metadata = serde_json::from_str(stdout) + .with_context(|_| DecoderErrorKind::DeserializingMetadataFailed { path: file_path })?; + + Ok(metadata) + } +} diff --git a/alass-cli/src/video_decoder/ffmpeg_library.rs b/alass-cli/src/video_decoder/ffmpeg_library.rs new file mode 100644 index 0000000..2360b06 --- /dev/null +++ b/alass-cli/src/video_decoder/ffmpeg_library.rs @@ -0,0 +1,276 @@ +use failure::{Backtrace, Context, Fail}; +use ffmpeg_sys::*; +use std::convert::TryInto; +use std::ffi::{CStr, CString, OsString}; +use std::fmt; +use std::path::{Path, PathBuf}; +use std::ptr::null_mut; + +use crate::define_error; + +fn av_err2str(errnum: libc::c_int) -> String { + let mut err_buffer: [libc::c_char; 256] = [0; 256]; + unsafe { + av_make_error_string(err_buffer.as_mut_ptr() as *mut i8, err_buffer.len(), errnum); + CStr::from_ptr(&err_buffer as *const libc::c_char) + .to_string_lossy() + .to_string() + } +} + +define_error!(DecoderError, DecoderErrorKind); + +#[derive(Debug, Fail)] +pub(crate) enum DecoderErrorKind {} + +fn format_cmd(cmd_path: &PathBuf, args: &[OsString]) -> String { + let args_string: String = args + .iter() + .map(|x| format!("{}", x.to_string_lossy())) + .collect::>() + .join(" "); + format!("{} {}", cmd_path.display(), args_string) +} + +impl fmt::Display for DecoderErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + unimplemented!() + } +} + +pub struct VideoDecoderFFmpegLibrary {} + +impl VideoDecoderFFmpegLibrary { + /// Samples are pushed in 8kHz mono/single-channel format. + pub(crate) fn decode( + file_path: impl AsRef, + mut receiver: impl super::AudioReceiver, + mut progress_handler: impl super::ProgressHandler, + ) -> Result { + unsafe { + let mut format_context: *mut AVFormatContext = avformat_alloc_context(); + + let file_path_: String = file_path.as_ref().to_string_lossy().into_owned(); + + let result: libc::c_int; + + result = avformat_open_input( + &mut format_context as *mut *mut AVFormatContext, + file_path_.as_bytes().as_ptr() as *const i8, + null_mut(), + null_mut(), + ); + + if result < 0 { + // TODO + panic!( + "Failed to open media file '{}': {}", + file_path.as_ref().display(), + av_err2str(result) + ); + } + + avformat_find_stream_info(format_context, null_mut()); + + let streams: &[*mut AVStream] = + std::slice::from_raw_parts((*format_context).streams, (*format_context).nb_streams as usize); + + let mut audio_stream_opt: Option<*mut AVStream> = None; + + for &stream in streams { + let local_codec_parameters: *mut AVCodecParameters = (*stream).codecpar; + + if (*local_codec_parameters).codec_type == AVMediaType::AVMEDIA_TYPE_AUDIO { + // choose the audio stream with the least amount of channels (it can be resampled faster) + if let Some(saved_audio_stream) = audio_stream_opt { + if (*(*saved_audio_stream).codecpar).channels > (*local_codec_parameters).channels { + audio_stream_opt = Some(stream); + } + } else { + audio_stream_opt = Some(stream); + } + } + } + + if audio_stream_opt.is_none() { + /* TODO */ + panic!("no audio stream found"); + } + let audio_stream = audio_stream_opt.unwrap(); + + let local_codec_parameters: *mut AVCodecParameters = (*audio_stream).codecpar; + + let local_codec: *mut AVCodec = avcodec_find_decoder((*local_codec_parameters).codec_id); + //let local_codec_name: &CStr = CStr::from_ptr((*local_codec).long_name); + + /*println!( + "Audio Codec '{}': {} channels, sample rate {}", + local_codec_name.to_string_lossy(), + (*local_codec_parameters).channels, + (*local_codec_parameters).sample_rate + );*/ + + let codec_context: *mut AVCodecContext = avcodec_alloc_context3(local_codec as *const AVCodec); + avcodec_parameters_to_context(codec_context, local_codec_parameters); + avcodec_open2(codec_context, local_codec, null_mut()); + + let _av_opt_set_int = |swr: *mut SwrContext, name: &str, val: i64, search_flag: libc::c_int| { + av_opt_set_int( + swr as *mut libc::c_void, + CString::new(name).unwrap().into_raw(), + val, + search_flag, + ) + }; + + let _av_opt_set_int = |swr: *mut SwrContext, name: &str, val: i64, search_flag: libc::c_int| { + av_opt_set_int( + swr as *mut libc::c_void, + CString::new(name).unwrap().into_raw(), + val, + search_flag, + ) + }; + + let _av_opt_set_sample_fmt = + |obj: *mut SwrContext, name: &str, fmt: AVSampleFormat, search_flags: libc::c_int| -> libc::c_int { + av_opt_set_sample_fmt( + obj as *mut libc::c_void, + CString::new(name).unwrap().into_raw(), + fmt, + search_flags, + ) + }; + + let in_channel_layout = (*codec_context).channel_layout.try_into().unwrap(); + let in_channel_count: i64 = (*codec_context).channels.try_into().unwrap(); + let in_sample_rate: i64 = (*codec_context).sample_rate.try_into().unwrap(); + let in_sample_format = (*codec_context).sample_fmt; + + let out_channel_count = 1; + let out_channel_layout = AV_CH_LAYOUT_MONO.try_into().unwrap(); + let out_sample_rate = 8000; + let out_sample_format = AVSampleFormat::AV_SAMPLE_FMT_S16P; + + // prepare resampler + let swr: *mut SwrContext = swr_alloc(); + _av_opt_set_int(swr, "in_channel_count", in_channel_count, 0); + _av_opt_set_int(swr, "in_channel_layout", in_channel_layout, 0); + _av_opt_set_int(swr, "in_sample_rate", in_sample_rate, 0); + _av_opt_set_sample_fmt(swr, "in_sample_fmt", in_sample_format, 0); + + _av_opt_set_int(swr, "out_channel_count", out_channel_count, 0); + _av_opt_set_int(swr, "out_channel_layout", out_channel_layout, 0); + _av_opt_set_int(swr, "out_sample_rate", out_sample_rate, 0); + _av_opt_set_sample_fmt(swr, "out_sample_fmt", out_sample_format, 0); + + swr_init(swr); + if swr_is_initialized(swr) == 0 { + unimplemented!(); + //pri(stderr, "Resampler has not been properly initialized\n"); + //return -1; + } + + /* compute the number of converted samples: buffering is avoided + * ensuring that the output buffer will contain at least all the + * converted input samples */ + let src_nb_samples = 1024; // this is just a guess... + let mut max_out_samples: i32 = + av_rescale_rnd(src_nb_samples, out_sample_rate, in_sample_rate, AVRounding::AV_ROUND_UP) as i32; + + let mut buffer: *mut i16 = null_mut(); + av_samples_alloc( + &mut buffer as *mut *mut i16 as *mut *mut u8, + null_mut(), + out_channel_count as i32, + max_out_samples, + out_sample_format, + 0, + ); + + let packet: *mut AVPacket = av_packet_alloc(); + let frame: *mut AVFrame = av_frame_alloc(); + + progress_handler.init((*audio_stream).nb_frames); + + while av_read_frame(format_context, packet) >= 0 { + //println!("read frame {:?}", packet); + + if (*packet).stream_index != (*audio_stream).index { + continue; + } + + progress_handler.inc(); + + //println!("stream fits"); + + let mut response = avcodec_send_packet(codec_context, packet); + if response < 0 { + panic!("{}", av_err2str(response)); + } + + loop { + //println!("begin receive_frame"); + response = avcodec_receive_frame(codec_context, frame); + //println!("end receive_frame"); + + if response == AVERROR(EAGAIN) || response == AVERROR_EOF { + break; + } else if response < 0 { + panic!("Error: {}", av_err2str(response)); + } + + //let out_samples = av_rescale_rnd(swr_get_delay(swr, 48000) + in_samples, 44100, 48000, AV_ROUND_UP); + let out_sample_count = swr_get_out_samples(swr, (*frame).nb_samples); + + // Resize output buffer to allow all samples (without buffering) to be stored. + if out_sample_count > max_out_samples { + max_out_samples = out_sample_count; + av_freep(&mut buffer as *mut *mut i16 as *mut libc::c_void); + av_samples_alloc( + &mut buffer as *mut *mut i16 as *mut *mut u8, + null_mut(), + out_channel_count as i32, + max_out_samples, + out_sample_format, + 0, + ); + } + + // resample frames + let frame_count = swr_convert( + swr, + &mut buffer as *mut *mut i16 as *mut *mut u8, + out_sample_count, + (*frame).data.as_mut_ptr() as *mut *const u8, + (*frame).nb_samples, + ); + + //println!("Samples: {} Predicted: {} Frames: {}", (*frame).nb_samples, out_sample_count, frame_count); + let out_slice = std::slice::from_raw_parts_mut(buffer, frame_count as usize); + + receiver.push_samples(out_slice); + + /*for v in out_slice { + println!("{}", v); + }*/ + + //println!("Frame count: {}", frame_count); + + //println!("freep done"); + } + + av_packet_unref(packet); + } + + av_freep(&mut buffer as *mut *mut i16 as *mut libc::c_void); + + avformat_free_context(format_context); + // TODO: cleanup everything + } + + progress_handler.finish(); + + Ok(receiver.finish()) + } +} diff --git a/alass-cli/src/video_decoder/mod.rs b/alass-cli/src/video_decoder/mod.rs new file mode 100644 index 0000000..2c7a840 --- /dev/null +++ b/alass-cli/src/video_decoder/mod.rs @@ -0,0 +1,90 @@ +#[cfg(feature = "ffmpeg-library")] +mod ffmpeg_library; + +#[cfg(feature = "ffmpeg-library")] +pub use ffmpeg_library::VideoDecoderFFmpegLibrary as VideoDecoder; + +#[cfg(feature = "ffmpeg-binary")] +mod ffmpeg_binary; + +#[cfg(feature = "ffmpeg-binary")] +pub use ffmpeg_binary::VideoDecoderFFmpegBinary as VideoDecoder; + +pub trait AudioReceiver { + type Output; + type Error: failure::Fail; + + /// Samples are in 8000kHz mono/single-channel format. + fn push_samples(&mut self, samples: &[i16]) -> Result<(), Self::Error>; + + fn finish(self) -> Result; +} + +pub struct ChunkedAudioReceiver { + buffer: Vec, + filled: usize, + next: R, +} + +impl ChunkedAudioReceiver { + pub fn new(size: usize, next: R) -> ChunkedAudioReceiver { + ChunkedAudioReceiver { + buffer: std::vec::from_elem(0, size), + filled: 0, + next, + } + } +} + +impl AudioReceiver for ChunkedAudioReceiver { + type Output = R::Output; + type Error = R::Error; + + fn push_samples(&mut self, mut samples: &[i16]) -> Result<(), R::Error> { + assert!(self.buffer.len() > self.filled); + + loop { + if samples.is_empty() { + break; + } + + let sample_count = std::cmp::min(self.buffer.len() - self.filled, samples.len()); + self.buffer[self.filled..self.filled + sample_count].clone_from_slice(&samples[..sample_count]); + + samples = &samples[sample_count..]; + + self.filled = self.filled + sample_count; + + if self.filled == self.buffer.len() { + self.next.push_samples(self.buffer.as_slice())?; + self.filled = 0; + } + } + + Ok(()) + } + + fn finish(self) -> Result { + self.next.finish() + } +} + +/// Use this trait if you want more detailed information about the progress of operations. +pub trait ProgressHandler { + /// Will be called one time before `inc()` is called. `steps` is the + /// number of times `inc()` will be called. + /// + /// The number of steps is around the number of lines in the "incorrect" subtitle. + /// Be aware that this number can be zero! + #[allow(unused_variables)] + fn init(&mut self, steps: i64) {} + + /// We made (small) progress! + fn inc(&mut self) {} + + /// Will be called after the last `inc()`, when `inc()` was called `steps` times. + fn finish(&mut self) {} +} + +/*struct NoProgressHandler {} +impl ProgressHandler for NoProgressHandler {}*/ diff --git a/alass-core/Cargo.toml b/alass-core/Cargo.toml new file mode 100644 index 0000000..4e2b8c2 --- /dev/null +++ b/alass-core/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "alass-core" +version = "1.0.0" +authors = ["kaegi "] +description = "Automatically corrects subtitle timings given a second correct subtitle (binary)" +repository = "https://github.com/kaegi/alass/alass-core" +documentation = "https://docs.rs/alass-core" +readme = "README.md" +keywords = ["subtitle", "align", "automatic", "api", "tool"] +license = "GPL-3.0" +edition = "2018" + +[features] +default = [] + +# this enables writing of statistics but incurs a performance penalty +# of 15% even when not used by the user of the program +statistics = [] + +# In nosplit mode, the most expensive operation is sorting of sorted +# vectors. In runtime analysis, using a heap-sort-like algorithm +# is more performant than assuming a large unsorted array. In +# tests it is only half as fast. (1s vs 2s) +nosplit-heap-sort = [] + +[dependencies] +arrayvec = "0.4.0" + +[dev-dependencies] +rand = "0.3" diff --git a/alass-core/README.md b/alass-core/README.md new file mode 100644 index 0000000..a479931 --- /dev/null +++ b/alass-core/README.md @@ -0,0 +1,22 @@ +# alass-core + +This Rust library contains the core algorithm for `alass`, the "Automatic Language-Agnostic Subtitle Sychronization" tool. If you want to go to the command line tool instead, please click [here](https://github.com/kaegi/alass). + + +## How to use the library +Add this to your `Cargo.toml`: + +```toml +[dependencies] +alass-core = "1.0" +``` + +The library only contains one function that takes two sequences of time spans and returns the offsets to get the best possible alignment. + +[Documentation](https://docs.rs/alass-core) + +[Crates.io](https://crates.io/crates/alass-core) + +### Documentaion + +For much more information, please see the workspace information [here](https://github.com/kaegi/alass). \ No newline at end of file diff --git a/alass-core/src/alass.rs b/alass-core/src/alass.rs new file mode 100644 index 0000000..943c2a0 --- /dev/null +++ b/alass-core/src/alass.rs @@ -0,0 +1,778 @@ +// This file is part of the Rust library and binary `alass`. +// +// Copyright (C) 2017 kaegi +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use crate::rating_type::{Rating, RatingDelta, RatingDeltaDelta, RatingDeltaExt, RATING_PRECISION}; +use crate::segments::{ + add_rating_iterators, combined_maximum_of_dual_iterators, zero_rating_iterator, DifferentialRatingBufferBuilder, + PositionBuffer, RatingBuffer, RatingIterator, RatingSegment, SeparateDualBuffer, +}; +use crate::statistics::Statistics; +use crate::time_types::{TimeDelta, TimePoint, TimeSpan}; + +use arrayvec::ArrayVec; +use std::cell::RefCell; +use std::cmp::min; +use std::iter::once; +use std::rc::Rc; + +/// Use this trait if you want more detailed information about the progress of the align operation +/// (which might take some seconds). +pub trait ProgressHandler { + /// Will be called one time before `inc()` is called. `steps` is the + /// number of times `inc()` will be called. + /// + /// The number of steps is around the number of lines in the "incorrect" subtitle. + /// Be aware that this number can be zero! + #[allow(unused_variables)] + fn init(&mut self, steps: i64) {} + + /// We made (small) progress! + fn inc(&mut self) {} + + /// Will be called after the last `inc()`, when `inc()` was called `steps` times. + fn finish(&mut self) {} +} + +/// The "main" structure which holds the infomation needed to align the subtitles to each other. +pub struct Aligner { + /// List of incorrect subtitles which are aligned with this library. This + /// list will always be non-empty. + list: Vec, + + /// The fixed reference subtitles. This list will always be non-empty. + reference: Vec, + + /// Contains the range in which the incorrect subtitles can be moved. + buffer_timespan: TimeSpan, + + #[allow(dead_code)] + statistics: Option>>, +} + +impl Aligner { + /// In each list no time span should intersect any other and both list are + /// sorted by starting times. + pub fn new(list: Vec, reference: Vec, statistics_opt: Option) -> Aligner { + assert!(list.len() > 0); + assert!(reference.len() > 0); + + /*println!("{} reference lines", reference.len()); + println!("{} incorrect lines", list.len());*/ + + let incorrect_start: TimePoint = (*list.first().unwrap()).start(); + let incorrect_end: TimePoint = (*list.last().unwrap()).end(); + + let reference_start: TimePoint = (*reference.first().unwrap()).start(); + let reference_end: TimePoint = (*reference.last().unwrap()).end(); + + // this is the timespan length which can contain all incorrect subtitles + let list_timespan = incorrect_end - incorrect_start; + + let start = reference_start - list_timespan - TimeDelta::one(); + let end = reference_end + list_timespan + TimeDelta::one(); + + // It might be possible that all corrected subtiles fit in the reference list + // timeframe. It they don't + // we need to provide extra space, so that the produting corrected subtitles + // still fit into the + // whole [start, end] timeframe. Because `list_timespan` is the length of the + // whole incorrect subtitle file, + // we can just extend the reference timeframe by `list_timespan` on both ends. + let min_offset: TimeDelta = start - incorrect_start; + let max_offset: TimeDelta = end - incorrect_start; + + assert!(min_offset <= max_offset); + + Aligner { + list: list, + reference: reference, + buffer_timespan: TimeSpan::new(start, end), + statistics: statistics_opt.map(|s| Rc::new(RefCell::new(s))), + } + } + + pub fn get_start(&self) -> TimePoint { + self.buffer_timespan.start() + } + + pub fn get_end(&self) -> TimePoint { + self.buffer_timespan.end() + } + + #[allow(dead_code)] + pub fn align_constant_delta(&self) -> TimeDelta { + #[derive(PartialEq, Eq, Clone)] + struct DeltaCorrect { + rating: RatingDeltaDelta, + time: TimeDelta, + } + + impl DeltaCorrect { + fn new(rating: RatingDeltaDelta, time: TimeDelta) -> DeltaCorrect { + DeltaCorrect { + rating: rating, + time: time, + } + } + } + + type OrderedDeltaCorrect = Vec; + + let mut delta_corrects: Vec = Vec::new(); + + for incorrect_ts in &self.list { + let mut rise_ordered_delta_corrects: OrderedDeltaCorrect = OrderedDeltaCorrect::new(); + let mut up_ordered_delta_corrects: OrderedDeltaCorrect = OrderedDeltaCorrect::new(); + let mut fall_ordered_delta_corrects: OrderedDeltaCorrect = OrderedDeltaCorrect::new(); + let mut down_ordered_delta_corrects: OrderedDeltaCorrect = OrderedDeltaCorrect::new(); + + for reference_ts in &self.reference { + let rise_time; + let up_time; + let fall_time; + let down_time; + + if incorrect_ts.len() < reference_ts.len() { + rise_time = reference_ts.start() - incorrect_ts.len(); + up_time = reference_ts.start(); + fall_time = reference_ts.end() - incorrect_ts.len(); + down_time = reference_ts.end(); + } else { + rise_time = reference_ts.start() - incorrect_ts.len(); + up_time = reference_ts.end() - incorrect_ts.len(); + fall_time = reference_ts.start(); + down_time = reference_ts.end(); + } + + let rating_delta_delta: RatingDeltaDelta = + RatingDelta::compute_rating_delta(incorrect_ts.len(), reference_ts.len()); + + rise_ordered_delta_corrects + .push(DeltaCorrect::new(rating_delta_delta, rise_time - incorrect_ts.start())); + up_ordered_delta_corrects.push(DeltaCorrect::new(-rating_delta_delta, up_time - incorrect_ts.start())); + fall_ordered_delta_corrects + .push(DeltaCorrect::new(-rating_delta_delta, fall_time - incorrect_ts.start())); + down_ordered_delta_corrects + .push(DeltaCorrect::new(rating_delta_delta, down_time - incorrect_ts.start())); + } + + delta_corrects.push(rise_ordered_delta_corrects); + delta_corrects.push(up_ordered_delta_corrects); + delta_corrects.push(fall_ordered_delta_corrects); + delta_corrects.push(down_ordered_delta_corrects); + } + + // test if all delta correct arrays are sorted (should be true) + /*for dc in &delta_corrects { + for (a, b) in dc.iter().zip(dc.iter().skip(1)) { + assert!(a.time < b.time); + } + } + println!("b");*/ + + // we now have "4 * len(incorrect_list)" sorted arrays with each "4 * len(reference_list)" elements + // -> sort with max heap (pop from end) + + // in heap sort implementation, the delta corrects are sorted descending by time, in the "simple" sort ascending + let mut all_delta_corrects: Vec; + let sorted_delta_corrects_iter; // : impl Iter + let first_delta_correct: DeltaCorrect; + + #[cfg(not(feature = "nosplit-heap-sort"))] + { + all_delta_corrects = delta_corrects.into_iter().flat_map(|dc| dc).collect(); + all_delta_corrects.sort_unstable_by_key(|dc| dc.time); + + first_delta_correct = all_delta_corrects + .first() + .cloned() + .expect("delta corrects should have at least one element"); + + sorted_delta_corrects_iter = all_delta_corrects.iter(); + } + + #[cfg(feature = "nosplit-heap-sort")] + { + use std::cmp::Ordering; + use std::collections::BinaryHeap; + + #[derive(PartialEq, Eq)] + struct MaxHeapInfo { + heap_id: usize, + data: DeltaCorrect, + } + + impl Ord for MaxHeapInfo { + fn cmp(&self, other: &MaxHeapInfo) -> Ordering { + TimeDelta::cmp(&self.data.time, &other.data.time) + } + } + + impl PartialOrd for MaxHeapInfo { + fn partial_cmp(&self, other: &MaxHeapInfo) -> Option { + Some(self.cmp(other)) + } + } + + let mut heap = BinaryHeap::new(); + + for (heap_id, data) in delta_corrects.iter_mut().enumerate() { + let last_elem: DeltaCorrect = data + .pop() + .expect("at least one element should be in every delta correct list"); + heap.push(MaxHeapInfo { + heap_id: heap_id, + data: last_elem, + }); + } + + all_delta_corrects = Vec::with_capacity(4 * self.list.len() * self.reference.len()); + + loop { + let max_heap_elem: MaxHeapInfo; + + match heap.pop() { + Some(x) => max_heap_elem = x, + + // are all vectors empty? + None => break, + } + + all_delta_corrects.push(max_heap_elem.data); + + if let Some(new_delta_correct) = delta_corrects[max_heap_elem.heap_id].pop() { + heap.push(MaxHeapInfo { + heap_id: max_heap_elem.heap_id, + data: new_delta_correct, + }); + } + } + + assert!(all_delta_corrects.len() == 4 * self.list.len() * self.reference.len()); + sorted_delta_corrects_iter = all_delta_corrects.iter().rev(); + + first_delta_correct = all_delta_corrects + .last() + .cloned() + .expect("delta corrects should have at least one element"); + } + + // compute maximum rating + let mut delta: i64 = 0; + let mut rating: i64 = 0; + let mut maximum: (i64, TimeDelta) = (0, first_delta_correct.time); + for (delta_correct, next_delta_correct) in sorted_delta_corrects_iter + .clone() + .zip(sorted_delta_corrects_iter.skip(1)) + { + //println!("rating: {}", rating); + delta += delta_correct.rating; + rating += delta * (next_delta_correct.time - delta_correct.time).as_i64(); + if rating > maximum.0 { + maximum = (rating, next_delta_correct.time); + } + } + + assert!(rating == 0); + + return maximum.1; + } + + #[cfg(feature = "statistics")] + pub fn do_statistics(&self, f: impl Fn(&mut Statistics) -> std::io::Result<()>) { + if let Some(statistics) = &self.statistics { + f(&mut statistics.borrow_mut()).expect("failed to write statistics"); + } + } + + pub fn align_with_splits( + &self, + mut progress_handler_opt: Option>, + nopsplit_bonus_normalized: f64, + speed_optimization_opt: Option, + ) -> Vec { + // For each segment the full rating can only be 1. So the maximum rating + // without the nosplit bonus is `min(list.len(), reference.len())`. So to get + // from the normalized rating `[0, 1]` to a unnormalized rating (where only + // values between `[0, max_rating]` are interesting) we multiply by + // `min(list.len(), reference.len())`. + + if let Some(progress_handler) = progress_handler_opt.as_mut() { + progress_handler.init(self.list.len() as i64); + } + + let nopsplit_bonus_unnormalized: RatingDelta = ((min(self.list.len(), self.reference.len()) as f64 + * nopsplit_bonus_normalized) + * RATING_PRECISION as f64) as RatingDelta; + + let mut last_rating_buffer: Option = + Some(zero_rating_iterator(self.get_start(), self.get_end()).save()); + + assert!(self.list.len() > 0); + + /*: impl Iterator>*/ + let nosplit_delta_iter = self + .list + .iter() + .zip(self.list.iter().skip(1)) + .map(|(incorrect_timespan, next_timespan)| Some(next_timespan.start - incorrect_timespan.start)) + .chain(once(None)); + + // these buffers save the start position of a line dependent on the position of the next line, + // -> this allows to compute the final corrected line positions + let mut position_buffers: Vec = Vec::new(); + + for (line_nr, (&incorrect_span, nosplit_delta_opt)) in self.list.iter().zip(nosplit_delta_iter).enumerate() { + assert!(incorrect_span.len() > TimeDelta::zero()); // otherwise shift_simple/extend_to creates a zero-length segment + + let pline_tag = format!("line:{}", line_nr); + let nline_tag = format!("line:-{}", self.list.len() - 1 - line_nr); + let _line_tags: Vec<&str> = vec![pline_tag.as_str(), nline_tag.as_str()]; + + let single_span_ratings; + let _single_span_ratings = self.single_span_ratings(incorrect_span.len()); + + #[cfg(not(feature = "statistics"))] + { + single_span_ratings = _single_span_ratings; + } + + #[cfg(feature = "statistics")] + { + let single_span_ratings_buffer = _single_span_ratings.save(); + + self.do_statistics(|s| { + s.save_rating_buffer( + "[1] INDIVIDUAL span ratings for start position", + &_line_tags + .clone() + .into_iter() + .chain(once("individual")) + .collect::>(), + &single_span_ratings_buffer, + ) + }); + + single_span_ratings = single_span_ratings_buffer.into_iter(); + } + + let added_buffer: RatingBuffer; + if let Some(speed_optimization) = speed_optimization_opt { + let progress_factor = line_nr as f64 / self.list.len() as f64; + let epsilon = (RATING_PRECISION as f64 * speed_optimization * 0.1 * progress_factor) as i64; + + added_buffer = add_rating_iterators(last_rating_buffer.unwrap().into_iter(), single_span_ratings) + .discard_start_times() + .save_aggressively_simplified(epsilon); + } else { + added_buffer = add_rating_iterators(last_rating_buffer.unwrap().into_iter(), single_span_ratings) + .discard_start_times() + .save(); + //.save_simplified(); // this seems to not change runtime very much (TODO: test with benchmark) + } + + #[cfg(feature = "statistics")] + self.do_statistics(|s| { + s.save_rating_buffer( + "[2] TOTAL span ratings for start position (last rating + new span rating)", + &_line_tags + .clone() + .into_iter() + .chain(once("individual")) + .collect::>(), + &buffer, + ) + }); + + if let Some(nosplit_delta) = nosplit_delta_opt { + assert!(nosplit_delta > TimeDelta::zero()); // otherwise shift_simple/extend_to creates a zero-length segment + + let best_split_positions; + let _best_split_positions = added_buffer + .iter() + .shift_simple(incorrect_span.len()) + .clamp_end(self.get_end()) + .annotate_with_segment_start_times() + .annotate_with_position_info(|segment_start_point| segment_start_point - incorrect_span.len()) + .left_to_right_maximum() + .discard_start_times() + .simplify() + .discard_start_times(); + + let nosplit_positions; + let _nosplit_positions = added_buffer + .iter() + .shift_simple(nosplit_delta) + .clamp_end(self.get_end()) + .annotate_with_segment_start_times() + .discard_start_times() + .add_rating(nopsplit_bonus_unnormalized) + .annotate_with_segment_start_times() + .annotate_with_position_info(|segment_start_point| segment_start_point - nosplit_delta) + .discard_start_times(); + //.simplify() + //.discard_start_times(); + + let combined_maximum_buffer: SeparateDualBuffer; + + #[cfg(feature = "statistics")] + { + let nosplit_positions_buffer = _nosplit_positions.save(); + + self.do_statistics(|s| { + s.save_rating_buffer( + "[3a] NOsplit ratings (positions are FIXED to end)", + &_line_tags + .clone() + .into_iter() + .chain(once("nosplit")) + .collect::>(), + &nosplit_positions_buffer.iter().only_ratings().save(), + ) + }); + + self.do_statistics(|s| { + s.save_position_buffer( + "[3b] NOsplit positions (positions are FIXED to end)", + &_line_tags + .clone() + .into_iter() + .chain(once("nosplit")) + .collect::>(), + &nosplit_positions_buffer.iter().only_positions().save(), + ) + }); + + let best_split_positions_buffer = _best_split_positions.save(); + + self.do_statistics(|s| { + s.save_rating_buffer( + "[4a] split ratings (positions computed by LEFT-TO-RIGHT maxmimum)", + &_line_tags.clone().into_iter().chain(once("split")).collect::>(), + &best_split_positions_buffer.iter().only_ratings().save(), + ) + }); + + self.do_statistics(|s| { + s.save_position_buffer( + "[4b] split positions (positions computed by LEFT-TO-RIGHT maxmimum)", + &_line_tags.clone().into_iter().chain(once("split")).collect::>(), + &best_split_positions_buffer.iter().only_positions().save(), + ) + }); + + nosplit_positions = nosplit_positions_buffer.into_iter(); + best_split_positions = best_split_positions_buffer.into_iter(); + } + + #[cfg(not(feature = "statistics"))] + { + nosplit_positions = _nosplit_positions; + best_split_positions = _best_split_positions; + } + + combined_maximum_buffer = combined_maximum_of_dual_iterators(nosplit_positions, best_split_positions) + .discard_start_times() + .save_separate(); + + #[cfg(feature = "statistics")] + self.do_statistics(|s| { + s.save_rating_buffer( + "[5] COMBINED ratings for this span (vertical maximum of split and nosplit)", + &_line_tags + .clone() + .into_iter() + .chain(once("combined")) + .collect::>(), + &combined_maximum_buffer.rating_buffer, + ) + }); + + /*println!( + "Last rating buffer length: {}", + combined_maximum_buffer.rating_buffer.buffer.len() + );*/ + + last_rating_buffer = Some(combined_maximum_buffer.rating_buffer); + + position_buffers.push(combined_maximum_buffer.position_buffer); + } else { + last_rating_buffer = None; + + let best_position = added_buffer + .iter() + .annotate_with_segment_start_times() + .annotate_with_position_info(|segment_start_point| segment_start_point) + .left_to_right_maximum() + .discard_start_times() + .only_positions(); + + position_buffers.push(best_position.save()); + } + + #[cfg(feature = "statistics")] + self.do_statistics(|s| { + s.save_position_buffer( + "[5] COMBINED positions span (by vertical rating maximum of split and nosplit)", + &_line_tags + .clone() + .into_iter() + .chain(once("combined")) + .collect::>(), + &position_buffers.last().unwrap(), + ) + }); + + if let Some(progress_handler) = progress_handler_opt.as_mut() { + progress_handler.inc(); + } + } + + // ------------------------------------------------------------------------------ + // Extract the best position for each incorrect span from position buffers + + assert!(self.list.len() == position_buffers.len()); + + let mut next_segment_position = self.get_end() - TimeDelta::one(); + let mut result_deltas = Vec::new(); + for (incorrect_span, position_buffer) in Iterator::zip(self.list.iter(), position_buffers.iter()).rev() { + let best_position = position_buffer.get_at(next_segment_position); + result_deltas.push(best_position - incorrect_span.start); + + next_segment_position = best_position; + } + + // the deltas were inserted back-to-front + result_deltas.reverse(); + + if let Some(progress_handler) = progress_handler_opt.as_mut() { + progress_handler.finish(); + } + + result_deltas + } + + /// Requires "start1 <= start2". Returns the compressed rating vector for + /// the overlapping ratings of a timespan of length + /// "length" on all start position from "start1" to "start2". + /// + /// This function has O(n) runtime, where n is the number of spans in the + /// reference list. + + fn single_span_ratings(&self, length: TimeDelta) -> RatingIterator> { + // If we fix one timespan and let an other timespan variable, we get such a + // curve for the rating: + // + // / --------- \ + // / \ + // ------- -------------------------- + // + // at first the rating be zero, then rise linearly, then it will be constant + // for a time and then fall to zero again + // + // The next function will return these special changepoints and their + // "delta"-change (delta-delta). + // Because the timespans in "self.reference" are sorted and non overlapping, + // the changepoints of a certain type (first rise, start of constant, ...) + // will also be sorted. That means we only have to compare the current first + // changepoints of each type to get the really first + // changepoint. We then apply this changepoint-delta to the current total delta + // and add the segment with the + // previous total delta to the buffer. This way we get the segments with the + // same delta very efficently in O(n). + + let mut builder = DifferentialRatingBufferBuilder::new(self.get_start(), self.get_end()); + let mut timepoints: [Vec<(RatingDelta, TimePoint)>; 4] = [Vec::new(), Vec::new(), Vec::new(), Vec::new()]; + for &ref_ts in &self.reference { + let changepoints = Self::get_overlapping_rating_changepoints(length, ref_ts); + timepoints[0].push(changepoints[0]); + timepoints[1].push(changepoints[1]); + timepoints[2].push(changepoints[2]); + timepoints[3].push(changepoints[3]); + } + + // this is a vector of 4 iterators, each iterating over the contents of + // "timepoints[0]" to "timepoints[3]" + let mut iterators: ArrayVec<[_; 4]> = timepoints + .into_iter() + .cloned() + .map(|v| v.into_iter().peekable()) + .collect(); + loop { + // unpack the first value of each iterator + let next_timepoints: ArrayVec<[(usize, (Rating, TimePoint)); 4]> = iterators + .iter_mut() + .enumerate() + .map(|(i, iter)| iter.peek().map(|&v| (i, v))) + .filter_map(|opt| opt) + .collect(); + + // take the first next timepoint + let next_changepoint_opt = next_timepoints.into_iter().min_by_key::(|a| (a.1).1); + + // because each original array had the same length, all iterators should end at + // the same time + let (next_id, (segment_end_delta_delta, segment_end)) = match next_changepoint_opt { + Some(next_changepoint) => next_changepoint, + None => break, + }; + + builder.add_segment(segment_end, segment_end_delta_delta); + + // "next_id" contains the index of the iterator which contains + // "next_changepoint" -> pop that from the front so we don't have a endless loop + iterators[next_id].next(); + } + + // the rating values are continuous, so the first value of a segment is the + // last value of the previous segment. + // To avoid having each of these segment-break values two times in the buffer, + // every segments stops one timepoint + // before the real segment end. The real segment end is then the first value of + // the next value. + // + // The last rating has to be 0, so we extend the last segment with the missing + // timepoint. + builder.extend_to_end(); + + builder.build().into_rating_iter() + } + + /// Returns the timepoints at which the rating delta changes if we move one + /// subtitle compared to + /// an other. + /// + /// If we fix one timespan and let an other timespan variable, we get such a + /// curve for the rating: + /// + /// ```text + /// + /// / --------- \ + /// / \ + /// ------- -------------------------- + /// ``` + /// + /// At first the rating be zero, then rise linearly, then it will be constant + /// for a time and then fall to zero again. This function computes these 4 + /// special timepoints. + pub fn get_overlapping_rating_changepoints( + length: TimeDelta, + constspan: TimeSpan, + ) -> [(RatingDeltaDelta, TimePoint); 4] { + let start_of_rise = constspan.start() - length; + let end_of_rise = constspan.end() - length; + let start_of_fall = constspan.start(); + let end_of_fall = constspan.end(); + + let timepoints: [TimePoint; 4] = if end_of_rise <= start_of_fall { + [start_of_rise, end_of_rise, start_of_fall, end_of_fall] + } else { + [start_of_rise, start_of_fall, end_of_rise, end_of_fall] + }; + + assert!(timepoints[0] <= timepoints[1]); + assert!(timepoints[1] <= timepoints[2]); + assert!(timepoints[2] <= timepoints[3]); + + let rise_delta = RatingDelta::compute_rating_delta(length, constspan.len()); + + [ + (rise_delta, timepoints[0]), + (-rise_delta, timepoints[1]), + (-rise_delta, timepoints[2]), + (rise_delta, timepoints[3]), + ] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::rating_type::RatingExt; + use crate::segments::RatingFullSegment; + use crate::tests::get_random_prepared_test_time_spans; + + fn get_dummy_aligner() -> Aligner { + loop { + let reference_ts = get_random_prepared_test_time_spans(); + let incorrect_ts = get_random_prepared_test_time_spans(); + + // this is unlikely + if reference_ts.is_empty() || incorrect_ts.is_empty() { + continue; + } + + // new will return None, if both lists are empty -> highly unlikely + return Aligner::new(reference_ts, incorrect_ts, None); + } + } + + #[test] + /// Aligns random timespans to each other and calls alass. General test whether any internal + /// assertions are invalidated. + fn run_aligner() { + for _ in 0..20 { + get_dummy_aligner().align_with_splits(None, 0.1, None); + } + } + + #[test] + fn test_single_span_ratings() { + for _ in 0..30 { + let alass = get_dummy_aligner(); + + for span in alass.list.clone() { + let last: RatingFullSegment = alass + .single_span_ratings(span.len()) + .annotate_with_segment_start_times() + .into_iter() + .last() + .unwrap(); + assert_eq!(last.end_rating(), Rating::zero()); + //assert_eq!(dbg!(last.data.delta), RatingDelta::zero()); + } + } + } + + /*#[test] + /// `get_compressed_overlapping_ratings()` is highly optimized -> compare the results of slow and fast + /// implemntations. + fn get_compressed_overlapping_ratings() { + let mut rng = rand::thread_rng(); + + for _ in 0..30 { + let alass = get_dummy_aligner(); + let len: i64 = (rng.next_u32() % 100) as i64; + let rating_buffer1 = alass.get_compressed_overlapping_ratings( + alass.get_start(), + alass.get_end(), + TimeDelta::one() * len, + ); + let rating_buffer2 = alass.get_compressed_overlapping_ratings_slow( + alass.get_start(), + alass.get_end(), + TimeDelta::one() * len, + ); + assert_eq!( + rating_buffer1.iter().collect::>(), + rating_buffer2.iter().collect::>() + ); + } + }*/ + +} diff --git a/src/lib.rs b/alass-core/src/lib.rs similarity index 64% rename from src/lib.rs rename to alass-core/src/lib.rs index 0237892..5c92ff6 100644 --- a/src/lib.rs +++ b/alass-core/src/lib.rs @@ -1,4 +1,4 @@ -// This file is part of the Rust library and binary `aligner`. +// This file is part of the Rust library and binary `alass`. // // Copyright (C) 2017 kaegi // @@ -15,36 +15,71 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . - -#![deny(missing_docs, - missing_debug_implementations, missing_copy_implementations, - trivial_casts, - unsafe_code, - unstable_features, - unused_import_braces, unused_qualifications)] +#![deny( + //missing_docs, + missing_debug_implementations, + //missing_copy_implementations, + trivial_casts, + //unsafe_code, + unstable_features, + unused_import_braces, + unused_qualifications +)] #![allow(unknown_lints)] // for clippy -//! `aligner` takes two timespan arrays (e.g. from two subtitle files) and +//! `alass` takes two timespan arrays (e.g. from two subtitle files) and //! tries to align the `incorrect` subtitles //! to the `reference` subtitle. It automatically fixes offsets and //! introduces/removes breaks between subtitles in the `incorrect` //! subtitle to achive the best alignment. +extern crate arrayvec; #[cfg(test)] extern crate rand; -extern crate arrayvec; -// for internal use (in sub-modules) -mod internal; +mod alass; +mod rating_type; +mod segments; +mod statistics; +mod time_types; +mod timespan_ops; -// for external use (in other crates) - -use internal::{Aligner, prepare_time_spans}; -pub use internal::{ProgressHandler, TimeDelta, TimePoint, TimeSpan}; +use crate::alass::Aligner; +pub use crate::alass::ProgressHandler; +pub use crate::statistics::Statistics; +pub use crate::time_types::{TimeDelta, TimePoint, TimeSpan}; +use crate::timespan_ops::prepare_time_spans; // for use in this module (in lib.rs) use std::vec::from_elem; +pub fn align_nosplit( + list: Vec, + reference: Vec, + mut progress_handler_opt: Option>, + statistics_opt: Option, +) -> TimeDelta { + if let Some(p) = progress_handler_opt.as_mut() { + p.init(1); + } + + let (list_nonoverlapping, _) = prepare_time_spans(list.clone()); + let (ref_nonoverlapping, _) = prepare_time_spans(reference.clone()); + + if list_nonoverlapping.is_empty() || ref_nonoverlapping.is_empty() { + return TimeDelta::zero(); + } + + let alass = Aligner::new(list_nonoverlapping, ref_nonoverlapping, statistics_opt); + + if let Some(p) = progress_handler_opt.as_mut() { + p.inc(); + p.finish(); + } + + // get deltas for non-overlapping timespans + return alass.align_constant_delta(); +} /// Matches an `incorrect` subtitle list to a `reference` subtitle list. /// @@ -56,14 +91,20 @@ use std::vec::from_elem; /// so only one/the best offset is applied to ALL lines. The most common useful values are in the /// 0.2 to 0.01 range. /// -/// Especially for larger subtitles(e.g. 1 hour in millisecond resolution and 1000 subtitle lines) this +/// Especially for larger subtitles (e.g. 1 hour in millisecond resolution and 1000 subtitle lines) this /// process might take some seconds. To provide user feedback one can pass a `ProgressHandler` to /// this function. +/// +/// If you want to increase the speed of the alignment process, you can use the `speed_optimization` +/// parameter. This value can be between `0` and `+inf`, altough after `10` the accuracy +/// will have greatly degraded. It is recommended to supply a value around `5`. pub fn align( list: Vec, reference: Vec, split_penalty_normalized: f64, - progress_handler: Option>, + speed_optimization: Option, + progress_handler_opt: Option>, + statistics_opt: Option, ) -> Vec { let (list_nonoverlapping, list_indices) = prepare_time_spans(list.clone()); let (ref_nonoverlapping, _) = prepare_time_spans(reference.clone()); @@ -72,29 +113,19 @@ pub fn align( return from_elem(TimeDelta::zero(), list.len()); } - let list_len = list_nonoverlapping.len(); - let aligner_opt = Aligner::new( - list_nonoverlapping, - ref_nonoverlapping, - split_penalty_normalized, - progress_handler, - ); + let alass = Aligner::new(list_nonoverlapping, ref_nonoverlapping, statistics_opt); // get deltas for non-overlapping timespans - let deltas = match aligner_opt { - Some(mut aligner) => aligner.align_all_spans(), - None => (0..list_len).map(|_| TimeDelta::zero()).collect(), - }; + let deltas = alass.align_with_splits(progress_handler_opt, split_penalty_normalized, speed_optimization); // get deltas for overlapping timspan-list list_indices.into_iter().map(|i| deltas[i]).collect() } - #[cfg(test)] mod tests { use super::*; - use internal::{TimePoint, prepare_time_spans}; + use crate::{prepare_time_spans, TimePoint}; use rand; use rand::Rng; @@ -138,7 +169,7 @@ mod tests { /// All test time span sequences (some are predefined some are random). pub fn get_test_time_spans() -> Vec> { - (0..100) + (0..1000) .map(|_| generate_random_time_spans()) .chain(predefined_time_spans().into_iter()) .collect() diff --git a/alass-core/src/rating_type.rs b/alass-core/src/rating_type.rs new file mode 100644 index 0000000..1755a98 --- /dev/null +++ b/alass-core/src/rating_type.rs @@ -0,0 +1,88 @@ +// This file is part of the Rust library and binary `alass`. +// +// Copyright (C) 2017 kaegi +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use crate::TimeDelta; +use std::cmp::{max, min}; + +// these objects determine the precision/length of the rating (i32/i64) - lower +// values take less space and time, higher values have higher precision +pub type Rating = i64; +pub type RatingDelta = i64; +pub type RatingDeltaDelta = i64; +pub const RATING_PRECISION: Rating = (1 << 32); + +pub trait RatingExt { + #[inline] + fn add_mul(r: Rating, rd: RatingDelta, td: TimeDelta) -> Rating { + r + rd * td.as_i64() + } + + #[inline] + fn compute(a: TimeDelta, b: TimeDelta) -> Rating { + let min: i64 = min(a, b).as_i64(); + let max: i64 = max(a, b).as_i64(); + (min * RATING_PRECISION) / max + } + + #[inline] + fn nosplit_bonus(unnormalized: f64) -> Rating { + (RATING_PRECISION as f64 * unnormalized) as Rating + } + + #[inline] + fn zero() -> Rating { + 0 + } + + #[inline] + fn as_f32(self) -> f32; + + #[inline] + fn as_f64(self) -> f32; + + #[inline] + fn as_readable_f32(self) -> f32; +} +impl RatingExt for Rating { + #[inline] + fn as_f32(self) -> f32 { + self as f32 + } + + #[inline] + fn as_f64(self) -> f32 { + self as f32 + } + + #[inline] + fn as_readable_f32(self) -> f32 { + self as f32 / RATING_PRECISION as f32 + } +} + +pub trait RatingDeltaExt { + #[inline] + fn compute_rating_delta(a: TimeDelta, b: TimeDelta) -> RatingDelta { + Rating::compute(a, b) / min(a, b).as_i64() + } + + #[inline] + fn from_i64(v: i64) -> RatingDelta { + v + } +} +impl RatingDeltaExt for RatingDelta {} diff --git a/alass-core/src/segments.rs b/alass-core/src/segments.rs new file mode 100644 index 0000000..5e64052 --- /dev/null +++ b/alass-core/src/segments.rs @@ -0,0 +1,1900 @@ +use crate::rating_type::{Rating, RatingDelta, RatingDeltaDelta, RatingExt}; +use crate::time_types::{TimeDelta, TimePoint, TimeSpan}; +use std::cmp::{max, min}; +use std::iter::once; +use std::ops::Add; + +#[derive(Clone, Copy, Debug)] +pub struct PositionInfo { + pub position: TimePoint, + pub drag: bool, // if true, position has a "delta" of 1; if false it has a delta of 0 +} + +impl PositionInfo { + #[inline] + fn constant(position: TimePoint) -> PositionInfo { + PositionInfo { position, drag: false } + } + + #[inline] + pub fn start_position(&self) -> TimePoint { + self.position + } + + #[inline] + pub fn end_position(&self, span_length: TimeDelta) -> TimePoint { + if self.drag { + self.position + span_length - TimeDelta::one() + } else { + self.position + } + } + + #[inline] + pub fn exclusive_end_position(&self, span_length: TimeDelta) -> TimePoint { + if self.drag { + self.position + span_length + } else { + self.position + } + } + + #[inline] + fn advanced_position(self, time_delta: TimeDelta) -> TimePoint { + if self.drag { + self.position + time_delta + } else { + self.position + } + } + + #[inline] + fn advanced(self, time_delta: TimeDelta) -> PositionInfo { + if self.drag { + PositionInfo { + position: self.position + time_delta, + drag: true, + } + } else { + PositionInfo { + position: self.position, + drag: false, + } + } + } + + #[inline] + fn advance(&mut self, time_delta: TimeDelta) { + if self.drag { + self.position += time_delta; + } + } +} + +#[derive(Default, Clone, Copy, Debug)] +pub struct RatingInfo { + pub rating: Rating, + pub delta: RatingDelta, +} + +impl RatingInfo { + #[inline] + fn constant(rating: Rating) -> RatingInfo { + RatingInfo { + rating, + delta: RatingDelta::zero(), + } + } + + #[inline] + fn advanced(self, len: TimeDelta) -> RatingInfo { + RatingInfo { + rating: Rating::add_mul(self.rating, self.delta, len), + delta: self.delta, + } + } + + #[inline] + fn get_at(self, len: TimeDelta) -> Rating { + Rating::add_mul(self.rating, self.delta, len) + } + + #[inline] + fn advance(&mut self, len: TimeDelta) { + self.rating = Rating::add_mul(self.rating, self.delta, len); + } + + #[inline] + pub fn start_rating(self) -> Rating { + self.rating + } + + #[inline] + pub fn end_rating(self, len: TimeDelta) -> Rating { + Rating::add_mul(self.rating, self.delta, len - TimeDelta::one()) + } + + #[inline] + pub fn exclusive_end_rating(self, len: TimeDelta) -> Rating { + Rating::add_mul(self.rating, self.delta, len) + } +} + +#[derive(Clone, Debug)] +pub struct DualInfo { + position_info: PositionInfo, + rating_info: RatingInfo, +} + +impl DualInfo { + #[inline] + fn advanced(self, len: TimeDelta) -> DualInfo { + DualInfo { + rating_info: self.rating_info.advanced(len), + position_info: self.position_info.advanced(len), + } + } +} + +impl Add for RatingInfo { + type Output = RatingInfo; + + #[inline] + fn add(self, rhs: RatingInfo) -> RatingInfo { + RatingInfo { + rating: self.rating + rhs.rating, + delta: self.delta + rhs.delta, + } + } +} + +#[derive(Debug)] +pub struct RatingBuffer { + pub start: TimePoint, + pub buffer: Vec, +} + +impl RatingBuffer { + #[inline] + pub fn into_iter(self) -> RatingIterator { + RatingIterator::<_> { + start: self.start, + iter: self.buffer.into_iter(), + } + } + + #[inline] + pub fn iter(&self) -> RatingIterator>> { + RatingIterator::<_> { + start: self.start, + iter: self.buffer.iter().cloned(), + } + } + + #[inline] + pub fn first_end_point(&self) -> Option { + self.buffer.first().map(|rating_segment| rating_segment.end_point) + } + + #[inline] + pub fn end(&self) -> Option { + self.buffer.last().map(|rating_segment| rating_segment.end_point) + } + + #[inline] + pub fn start(&self) -> TimePoint { + self.start + } + + #[inline] + pub fn maximum(&self) -> Rating { + self.buffer + .iter() + .fold( + (Rating::zero(), self.start), + #[inline] + |(current_max, segment_start): (Rating, TimePoint), segment: &RatingSegment| { + assert!(segment_start < segment.end_point); + let start_rating = segment.start_rating(); + let end_rating = segment.end_rating(segment.end_point - segment_start); + + let new_max = max(max(current_max, start_rating), end_rating); + + (new_max, segment.end_point) + }, + ) + .0 + } +} + +pub struct DifferentialRatingBufferBuilder { + start: TimePoint, + end: TimePoint, + buffer: Vec>, +} + +impl DifferentialRatingBufferBuilder { + #[inline] + pub fn new(start: TimePoint, end: TimePoint) -> DifferentialRatingBufferBuilder { + assert!(start < end); + + DifferentialRatingBufferBuilder { + start: start, + end: end, + buffer: Vec::new(), + } + } + + #[inline] + pub fn add_segment(&mut self, segment_end: TimePoint, segment_end_delta_delta: RatingDeltaDelta) { + if let Some(last_segment) = self.buffer.last_mut() { + assert!(last_segment.end_point <= segment_end); + + if last_segment.end_point == segment_end { + last_segment.data += segment_end_delta_delta; + return; + } + } else { + assert!(self.start < segment_end); + } + + self.buffer.push(Segment { + end_point: segment_end, + data: segment_end_delta_delta, + }); + } + + #[inline] + pub fn extend_to_end(&mut self) { + self.add_segment(self.end, RatingDeltaDelta::zero()) + } + + #[inline] + pub fn build(self) -> DifferentialRatingBuffer { + DifferentialRatingBuffer { + start: self.start, + buffer: self.buffer, + } + } +} + +pub struct DifferentialRatingBuffer { + start: TimePoint, + buffer: Vec>, +} + +impl DifferentialRatingBuffer { + #[inline] + pub fn into_rating_iter(self) -> RatingIterator> { + struct ScanState { + rating: Rating, + delta: RatingDelta, + last_segment_end: TimePoint, + } + + let start_state = ScanState { + rating: Rating::zero(), + delta: RatingDelta::zero(), + last_segment_end: self.start, + }; + + let iter = self.buffer.into_iter().scan( + start_state, + #[inline] + |state: &mut ScanState, segment: Segment| { + let result = Segment { + end_point: segment.end_point, + data: RatingInfo { + rating: state.rating, + delta: state.delta, + }, + }; + + state.rating = Rating::add_mul(state.rating, state.delta, segment.end_point - state.last_segment_end); + state.delta += segment.data; + state.last_segment_end = segment.end_point; + + Some(result) + }, + ); + + RatingIterator::<_> { + start: self.start, + iter: iter, + } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct Segment { + pub end_point: TimePoint, + pub data: D, +} + +impl Segment { + #[inline] + fn with_start_point(self, start_point: TimePoint) -> FullSegment { + assert!(start_point < self.end_point); + + FullSegment { + span: TimeSpan::new(start_point, self.end_point), + data: self.data, + } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct FullSegment { + pub span: TimeSpan, + pub data: D, +} +impl FullSegment { + #[inline] + fn discard_start_time(self) -> Segment { + Segment { + end_point: self.span.end, + data: self.data, + } + } +} + +#[derive(Debug)] +pub struct FullSegmentIterator +where + I: Iterator>, +{ + start: TimePoint, + iter: I, +} + +impl FullSegmentIterator +where + I: Iterator>, +{ + pub fn into_iter(self) -> I { + self.iter + } +} + +#[derive(Debug)] +pub struct SegmentIterator +where + I: Iterator>, +{ + start: TimePoint, + iter: I, +} + +impl SegmentIterator +where + I: Iterator>, +{ + #[inline] + pub fn annotate_with_segment_start_times(self) -> FullSegmentIterator>> { + FullSegmentIterator:: { + start: self.start, + iter: self.iter.scan( + self.start, + #[inline] + |last_segment_end: &mut TimePoint, segment: Segment| { + /*println!( + "ANNOTATE segment start {} segment end {}", + *last_segment_end, segment.end_point + );*/ + if *last_segment_end >= segment.end_point { + // DEBUG TODO: remove + println!( + "ANNOTATE segment start {} segment end {}", + *last_segment_end, segment.end_point + ); + } + + assert!(*last_segment_end < segment.end_point); + + let span = TimeSpan::new(*last_segment_end, segment.end_point); + + *last_segment_end = segment.end_point; + + Some(FullSegment { + span: span, + data: segment.data, + }) + }, + ), + } + } + + #[inline] + pub fn into_iter(self) -> I { + self.iter + } + + #[inline] + pub fn shift(self, t: TimeDelta) -> SegmentIterator> { + SegmentIterator:: { + start: self.start + t, + iter: self.iter.map( + #[inline] + move |mut segment: Segment| { + segment.end_point += t; + segment + }, + ), + } + } + + #[inline] + pub fn shift_simple(self, t: TimeDelta) -> SegmentIterator> { + SegmentIterator:: { + start: self.start, + iter: self.iter.map( + #[inline] + move |mut segment: Segment| { + segment.end_point += t; + segment + }, + ), + } + } + + #[inline] + pub fn append(self, end_point: TimePoint, data: D) -> SegmentIterator> { + SegmentIterator:: { + start: self.start, + iter: self.iter.chain(once(Segment:: { + end_point: end_point, + data: data, + })), + } + } +} + +impl> FullSegmentIterator { + #[inline] + pub fn discard_start_times(self) -> SegmentIterator> { + SegmentIterator:: { + start: self.start, + iter: self.iter.map(|segment: FullSegment| Segment:: { + end_point: segment.span.end, + data: segment.data, + }), + } + } +} + +pub type PositionSegment = Segment; +impl PositionSegment { + #[inline] + pub fn start_position(&self) -> TimePoint { + self.data.start_position() + } + + #[inline] + pub fn end_position(&self, len: TimeDelta) -> TimePoint { + self.data.end_position(len) + } +} + +pub type PositionFullSegment = FullSegment; +impl PositionFullSegment { + #[inline] + pub fn exclusive_end_position(&self) -> TimePoint { + self.data.exclusive_end_position(self.span.len()) + } +} + +pub type RatingSegment = Segment; +impl RatingSegment { + #[inline] + pub fn advance(&mut self, delta: TimeDelta) { + self.data.advance(delta); + } + + #[inline] + pub fn start_rating(&self) -> Rating { + self.data.start_rating() + } + + #[inline] + pub fn end_rating(&self, len: TimeDelta) -> Rating { + self.data.end_rating(len) + } +} + +pub type DualSegment = Segment; +impl DualSegment { + #[inline] + fn advance(&mut self, delta: TimeDelta) { + self.data.rating_info.advance(delta); + self.data.position_info.advance(delta); + } + + #[inline] + fn as_rating_segment(&self) -> RatingSegment { + RatingSegment { + end_point: self.end_point, + data: self.data.rating_info, + } + } + + #[inline] + fn as_position_segment(&self) -> PositionSegment { + PositionSegment { + end_point: self.end_point, + data: self.data.position_info, + } + } + + #[inline] + pub fn start_rating(&self) -> Rating { + self.data.rating_info.rating + } + + #[inline] + pub fn start_position(&self) -> TimePoint { + self.data.position_info.position + } +} + +pub type RatingFullSegment = FullSegment; +pub type DualFullSegment = FullSegment; +impl DualFullSegment { + #[inline] + pub fn start_rating(&self) -> Rating { + self.data.rating_info.rating + } + + #[inline] + pub fn start_position(&self) -> TimePoint { + self.data.position_info.position + } + + #[inline] + pub fn end_rating(&self) -> Rating { + Rating::add_mul( + self.data.rating_info.rating, + self.data.rating_info.delta, + self.span.len() - TimeDelta::one(), + ) + } + + #[inline] + pub fn exclusive_end_position(&self) -> TimePoint { + self.data.position_info.exclusive_end_position(self.span.len()) + } + + #[inline] + pub fn exclusive_end_rating(&self) -> Rating { + self.data.rating_info.exclusive_end_rating(self.span.len()) + } +} + +pub trait SI: Iterator> {} +impl SI for T where T: Iterator> {} + +pub trait RI: Iterator {} +impl RI for T where T: Iterator {} + +pub trait PI: Iterator {} +impl PI for T where T: Iterator {} + +pub trait DI: Iterator {} +impl DI for T where T: Iterator {} + +pub trait SFI: Iterator> {} +impl SFI for T where T: Iterator> {} + +pub trait RFI: Iterator {} +impl RFI for T where T: Iterator {} + +pub trait PFI: Iterator {} +impl PFI for T where T: Iterator {} + +pub trait DFI: Iterator {} +impl DFI for T where T: Iterator {} + +pub type DualIterator = SegmentIterator; +impl DualIterator { + #[inline] + pub fn save(self) -> DualBuffer { + DualBuffer { + start: self.start, + buffer: self.iter.collect(), + } + } + + pub fn save_separate(self) -> SeparateDualBuffer { + let (rating_buffer, position_buffer): (Vec, Vec) = into_push_iter( + self.iter, + dual_push_iter( + only_ratings_push_iter(simplifiy_ratings_push_iter( + self.start, + discard_start_times_push_iter(collect_to_vec_push_iter()), + )), + only_positions_push_iter(simplifiy_positions_push_iter( + self.start, + discard_start_times_push_iter(collect_to_vec_push_iter()), + )), + ), + /*dual_push_iter( + only_ratings_push_iter( + collect_to_vec_push_iter(), + ), + only_positions_push_iter( + collect_to_vec_push_iter(), + ), + ),*/ + ); + + SeparateDualBuffer { + rating_buffer: RatingBuffer { + start: self.start, + buffer: rating_buffer, + }, + position_buffer: PositionBuffer { + start: self.start, + buffer: position_buffer, + }, + } + } + + pub fn only_positions(self) -> PositionIterator { + PositionIterator::<_> { + start: self.start, + iter: self.iter.map(|dual_segment| dual_segment.as_position_segment()), + } + } + + pub fn only_ratings(self) -> RatingIterator { + RatingIterator::<_> { + start: self.start, + iter: self.iter.map(|dual_segment| dual_segment.as_rating_segment()), + } + } + + pub fn simplify(mut self) -> DualFullSegmentIterator { + DualFullSegmentIterator { + start: self.start, + iter: DualSimplifyIterator { + current_segment: self.iter.next().map(|seg| seg.with_start_point(self.start)), + iter: self.iter, + }, + } + } +} + +pub type PositionIterator = SegmentIterator; +impl PositionIterator { + #[inline] + pub fn save(self) -> PositionBuffer { + PositionBuffer { + start: self.start, + buffer: self.iter.collect(), + } + } +} + +pub type RatingIterator = SegmentIterator; +impl RatingIterator { + #[inline] + pub fn save(self) -> RatingBuffer { + RatingBuffer { + start: self.start, + buffer: self.iter.collect(), + } + } + + #[inline] + pub fn save_simplified(self) -> RatingBuffer { + RatingBuffer { + start: self.start, + buffer: into_push_iter( + self.iter, + simplifiy_ratings_push_iter(self.start, discard_start_times_push_iter(collect_to_vec_push_iter())), + ), + } + } + + #[inline] + pub fn save_aggressively_simplified(self, epsilon: RatingDelta) -> RatingBuffer { + RatingBuffer { + start: self.start, + buffer: into_push_iter( + self.iter, + aggressive_simplifiy_ratings_push_iter( + self.start, + epsilon, + discard_start_times_push_iter(collect_to_vec_push_iter()), + ), + ), + } + } + + #[inline] + pub fn extend_to(self, end_point: TimePoint) -> RatingIterator { + self.append( + end_point, + RatingInfo { + rating: Rating::zero(), + delta: RatingDelta::zero(), + }, + ) + } + + #[inline] + pub fn add_rating(self, rating_delta: RatingDelta) -> RatingIterator { + RatingIterator::<_> { + start: self.start, + iter: self.iter.map(move |rating_segment| RatingSegment { + end_point: rating_segment.end_point, + data: RatingInfo { + rating: rating_segment.data.rating + rating_delta, + delta: rating_segment.data.delta, + }, + }), + } + } + + #[inline] + pub fn clamp_end(self, clamp: TimePoint) -> RatingIterator { + //println!("CLAMP {}", clamp); + RatingIterator::<_> { + start: self.start, + iter: self.iter.map(move |rating_segment| { + /*if rating_segment.end_point >= clamp { + println!("CLAMPED {} to {} (prev {})", rating_segment.end_point, clamp, last_start); + }*/ + RatingSegment { + end_point: min(rating_segment.end_point, clamp), + data: rating_segment.data, + } + }), + } + } +} + +struct LeftToRightMaximumIterator +where + I: DFI, +{ + input_iter: I, + + current_best_rating: Rating, + + /// The start timepoint of the position, which has the maximum rating + current_best_timepoint: TimePoint, + + stored_segment: Option, +} + +impl LeftToRightMaximumIterator { + #[inline] + fn new(i: I, start: TimePoint) -> LeftToRightMaximumIterator { + LeftToRightMaximumIterator:: { + input_iter: i, + current_best_rating: Rating::zero(), + current_best_timepoint: start, + stored_segment: None, + } + } + + #[inline] + fn constant_dual_info(&self) -> DualInfo { + DualInfo { + rating_info: RatingInfo::constant(self.current_best_rating), + position_info: PositionInfo::constant(self.current_best_timepoint), + } + } + + #[inline] + fn constant_segment(&self, span: TimeSpan) -> DualFullSegment { + DualFullSegment { + span: span, + data: self.constant_dual_info(), + } + } +} + +impl Iterator for LeftToRightMaximumIterator { + type Item = DualFullSegment; + + #[inline] // XXX: is this really faster? + fn next(&mut self) -> Option { + // TODO: rewrite when changing to 2018 edition + if let Some(stored_segment) = self.stored_segment.take() { + return Some(stored_segment); + } + + // TODO: unify segments when possible + let segment: DualFullSegment; + + match self.input_iter.next() { + None => return None, + Some(_segment) => segment = _segment, + } + + let segment_start_rating = segment.start_rating(); + let segment_end_rating = segment.end_rating(); + + let start_position = segment.data.position_info.position; + let end_position = segment.data.position_info.end_position(segment.span.len()); + + if segment_start_rating <= self.current_best_rating && segment_end_rating <= self.current_best_rating { + return Some(self.constant_segment(segment.span)); + } else if segment_start_rating >= self.current_best_rating { + if segment_start_rating >= segment_end_rating { + self.current_best_rating = segment_start_rating; + self.current_best_timepoint = start_position; + + return Some(self.constant_segment(segment.span)); + } else { + self.current_best_rating = segment_end_rating; + self.current_best_timepoint = end_position; + + return Some(segment); + } + } else { + /* implicit: + segment_start_rating < self.current_best_rating && + ( + segment_start_rating > self.current_best_rating || + segment_end_rating > self.current_best_rating + ) + + which is equivalent to + + segment_start_rating < self.current_best_rating && + segment_end_rating > self.current_best_rating + */ + + assert!(segment_start_rating < self.current_best_rating); + assert!(segment_end_rating > self.current_best_rating); + + let switch_i64 = (self.current_best_rating - segment_start_rating) / segment.data.rating_info.delta + 1; + assert!(0 < switch_i64); + assert!(switch_i64 < segment.span.len().as_i64()); + + let switch_timedelta = TimeDelta::from_i64(switch_i64); + + let segment1 = DualFullSegment { + span: TimeSpan::new(segment.span.start, segment.span.start + switch_timedelta), + data: self.constant_dual_info(), + }; + + self.current_best_rating = segment_end_rating; + self.current_best_timepoint = end_position; + + let segment2 = DualFullSegment { + span: TimeSpan::new(segment.span.start + switch_timedelta, segment.span.end), + data: segment.data.advanced(switch_timedelta), + }; + + self.stored_segment = Some(segment2); + return Some(segment1); + } + } +} + +#[derive(Debug)] +pub struct PositionBuffer { + start: TimePoint, + buffer: Vec, +} + +impl PositionBuffer { + #[inline] + pub fn into_iter(self) -> PositionIterator { + PositionIterator::<_> { + start: self.start, + iter: self.buffer.into_iter(), + } + } + + #[inline] + pub fn iter(&self) -> PositionIterator>> { + PositionIterator::<_> { + start: self.start, + iter: self.buffer.iter().cloned(), + } + } + + #[inline] + pub fn end_point(&self) -> Option { + self.buffer.last().map(|rating_segment| rating_segment.end_point) + } + + #[inline] + pub fn end(&self) -> Option { + self.buffer.last().map(|rating_segment| rating_segment.end_point) + } + + #[inline] + pub fn start(&self) -> TimePoint { + self.start + } + + #[inline] + pub fn end_position(&self) -> TimePoint { + assert!(self.buffer.len() > 0); + + let segment_start: TimePoint; + if self.buffer.len() == 1 { + segment_start = self.start; + } else { + segment_start = self.buffer[self.buffer.len() - 2].end_point; + } + + let last_segment = *self.buffer.last().unwrap(); + last_segment.data.end_position(last_segment.end_point - segment_start) + } + + #[inline] + pub fn get_at(&self, t: TimePoint) -> TimePoint { + assert!(t >= self.start && t < self.end_point().unwrap()); + let mut segment_start = self.start; + + for segment in &self.buffer { + if t >= segment_start && t < segment.end_point { + return segment.data.advanced_position(t - segment_start); + } + segment_start = segment.end_point; + } + + unreachable!() + } + + #[inline] + pub fn maximum(&self) -> TimePoint { + let state: (TimePoint, TimePoint) = (self.buffer.first().unwrap().start_position(), self.start); + + self.buffer + .iter() + .fold( + state, + #[inline] + |(current_max, segment_start): (TimePoint, TimePoint), segment: &PositionSegment| { + assert!(segment_start < segment.end_point); + let start_rating: TimePoint = segment.start_position(); + let end_rating: TimePoint = segment.end_position(segment.end_point - segment_start); + + let new_max = max(max(current_max, start_rating), end_rating); + + (new_max, segment.end_point) + }, + ) + .0 + } + + #[inline] + pub fn minimum(&self) -> TimePoint { + let state: (TimePoint, TimePoint) = (self.buffer.first().unwrap().start_position(), self.start); + + self.buffer + .iter() + .fold( + state, + #[inline] + |(current_max, segment_start): (TimePoint, TimePoint), segment: &PositionSegment| { + assert!(segment_start < segment.end_point); + let start_rating: TimePoint = segment.start_position(); + let end_rating: TimePoint = segment.end_position(segment.end_point - segment_start); + + let new_max = min(min(current_max, start_rating), end_rating); + + (new_max, segment.end_point) + }, + ) + .0 + } +} + +#[derive(Debug)] +pub struct DualBuffer { + pub start: TimePoint, + pub buffer: Vec, +} + +impl DualBuffer { + #[inline] + pub fn into_iter(self) -> DualIterator { + DualIterator::<_> { + start: self.start, + iter: self.buffer.into_iter(), + } + } + + #[inline] + pub fn iter(&self) -> DualIterator>> { + DualIterator::<_> { + start: self.start, + iter: self.buffer.iter().cloned(), + } + } +} + +#[derive(Debug)] +pub struct SeparateDualBuffer { + pub rating_buffer: RatingBuffer, + pub position_buffer: PositionBuffer, +} + +pub type DualFullSegmentIterator = FullSegmentIterator; // TODO: rename to DualFullIterator +impl DualFullSegmentIterator { + #[inline] + pub fn left_to_right_maximum(self) -> DualFullSegmentIterator { + DualFullSegmentIterator::<_> { + start: self.start, + iter: LeftToRightMaximumIterator::<_>::new(self.iter, self.start), + } + } +} + +struct CombinedMaximumDualIterator +where + I1: DI, + I2: DI, +{ + stored_segment: Option, + segment_start: TimePoint, + dual_seg1: DualSegment, + dual_seg2: DualSegment, + input_iter1: I1, + input_iter2: I2, + finished: bool, +} + +impl CombinedMaximumDualIterator { + #[inline] + fn generate_maximum_segments(&mut self, len: TimeDelta, segment_end: TimePoint) -> DualFullSegment { + let start_rating1 = self.dual_seg1.data.rating_info.rating; + let start_rating2 = self.dual_seg2.data.rating_info.rating; + let end_rating1 = self.dual_seg1.data.rating_info.end_rating(len); + let end_rating2 = self.dual_seg2.data.rating_info.end_rating(len); + + let delta1 = self.dual_seg1.data.rating_info.delta; + let delta2 = self.dual_seg2.data.rating_info.delta; + + if start_rating1 >= start_rating2 && end_rating1 >= end_rating2 { + // first segment is better + + DualFullSegment { + span: TimeSpan::new(self.segment_start, segment_end), + data: self.dual_seg1.data.clone(), + } + } else if start_rating1 <= start_rating2 && end_rating1 <= end_rating2 { + // second segment is better + + DualFullSegment { + span: TimeSpan::new(self.segment_start, segment_end), + data: self.dual_seg2.data.clone(), + } + } else { + // segments switch somewhere in this segment + + // spoint is the first position where the second better segment is better + let spoint = Self::get_switch_point(start_rating1, start_rating2, delta1, delta2); + assert!(0 < spoint); + assert!(spoint < len.as_i64()); + + let spoint_delta = TimeDelta::from_i64(spoint); + + let segment1; + let segment2; + + if start_rating1 > start_rating2 && end_rating1 < end_rating2 { + // first segment starts above second segment + + segment1 = DualFullSegment { + span: TimeSpan::new(self.segment_start, self.segment_start + spoint_delta), + data: self.dual_seg1.data.clone(), + }; + segment2 = DualFullSegment { + span: TimeSpan::new(self.segment_start + spoint_delta, segment_end), + data: self.dual_seg2.data.clone().advanced(spoint_delta), + }; + } else { + // second segment starts above first segment + + segment1 = DualFullSegment { + span: TimeSpan::new(self.segment_start, self.segment_start + spoint_delta), + data: self.dual_seg2.data.clone(), + }; + segment2 = DualFullSegment { + span: TimeSpan::new(self.segment_start + spoint_delta, segment_end), + data: self.dual_seg1.data.clone().advanced(spoint_delta), + } + } + + self.stored_segment = Some(segment2); + + segment1 + } + } + + #[inline] + fn get_switch_point(start_rating1: Rating, start_rating2: Rating, delta1: RatingDelta, delta2: RatingDelta) -> i64 { + // start_rating1 + delta1 * x = start_rating2 + delta2 * x + // delta1 * x - delta2 * x = start_rating2 - start_rating1 + // (delta1 - delta2) * x = start_rating2 - start_rating1 + // + // solving for x: + // + // x = (start_rating2 - start_rating1) / (delta1 - delta2) + (start_rating2 - start_rating1) / (delta1 - delta2) + 1 + } +} + +impl Iterator for CombinedMaximumDualIterator { + type Item = DualFullSegment; + + #[inline] + fn next(&mut self) -> Option { + if let Some(stored_segment) = self.stored_segment.take() { + return Some(stored_segment); + } + + if self.finished { + return None; + } + + assert!(self.segment_start < self.dual_seg1.end_point); + assert!(self.segment_start < self.dual_seg2.end_point); + + let len; + let result: DualFullSegment; + + if self.dual_seg1.end_point < self.dual_seg2.end_point { + len = self.dual_seg1.end_point - self.segment_start; + + /*println!( + "COMBINED1 {} {}", + self.dual_seg2.end_point, self.segment_start + );*/ + result = self.generate_maximum_segments(len, self.dual_seg1.end_point); + + self.segment_start = self.dual_seg1.end_point; + + self.dual_seg1 = self + .input_iter1 + .next() + .expect("CombinedMaximumDualIterator: First iterator ended before second"); + self.dual_seg2.advance(len); + } else if self.dual_seg2.end_point < self.dual_seg1.end_point { + len = self.dual_seg2.end_point - self.segment_start; + + /*println!( + "COMBINED2 {} {}", + self.segment_start, self.dual_seg2.end_point + );*/ + result = self.generate_maximum_segments(len, self.dual_seg2.end_point); + + self.segment_start = self.dual_seg2.end_point; + + self.dual_seg1.advance(len); + self.dual_seg2 = self + .input_iter2 + .next() + .expect("CombinedMaximumDualIterator: Second iterator ended before first"); + } else { + match (self.input_iter1.next(), self.input_iter2.next()) { + (Some(dual_seg1), Some(dual_seg2)) => { + len = self.dual_seg1.end_point - self.segment_start; + + result = self.generate_maximum_segments(len, self.dual_seg1.end_point); + + self.segment_start = self.dual_seg1.end_point; + + self.dual_seg1 = dual_seg1; + self.dual_seg2 = dual_seg2; + } + (Some(_), None) => panic!("CombinedMaximumDualIterator: Second iterator ended before first"), + (None, Some(_)) => panic!("CombinedMaximumDualIterator: First iterator ended before second"), + (None, None) => { + len = self.dual_seg1.end_point - self.segment_start; + + //println!("COMBINED END POINT {}", self.dual_seg1.end_point); + + result = self.generate_maximum_segments(len, self.dual_seg1.end_point); + + self.finished = true; + } + } + } + + Some(result) + } +} + +#[inline] +pub fn combined_maximum_of_dual_iterators( + mut iter1: DualIterator, + mut iter2: DualIterator, +) -> DualFullSegmentIterator { + assert!(iter1.start == iter2.start); + let start = iter1.start; + + let dual_seg1 = iter1 + .iter + .next() + .expect("First iterator should have at least one element"); + let dual_seg2 = iter2 + .iter + .next() + .expect("Second iterator should have at least one element"); + + DualFullSegmentIterator::<_> { + start: start, + iter: CombinedMaximumDualIterator::<_, _> { + stored_segment: None, + segment_start: start, + dual_seg1: dual_seg1, + dual_seg2: dual_seg2, + input_iter1: iter1.iter, + input_iter2: iter2.iter, + finished: false, + }, + } +} + +pub type RatingFullIterator = FullSegmentIterator; +impl RatingFullIterator { + #[inline] + pub fn annotate_with_position_info( + self, + position_generate: impl Fn(/* segment_start */ TimePoint) -> TimePoint, + ) -> DualFullSegmentIterator { + DualFullSegmentIterator::<_> { + start: self.start, + iter: self.iter.map( + #[inline] + move |rating_full_segment| DualFullSegment { + span: rating_full_segment.span, + data: DualInfo { + rating_info: rating_full_segment.data, + position_info: PositionInfo { + position: position_generate(rating_full_segment.span.start), + drag: true, + }, + }, + }, + ), + } + } +} + +#[inline] +pub fn zero_rating_iterator(start: TimePoint, end: TimePoint) -> RatingIterator { + RatingIterator::<_> { + start: start, + iter: once(Segment { + end_point: end, + data: RatingInfo { + rating: Rating::zero(), + delta: RatingDelta::zero(), + }, + }), + } +} + +impl RatingFullSegment { + #[inline] + pub fn start_rating(self) -> Rating { + self.data.rating + } + + #[inline] + pub fn end_rating(self) -> Rating { + Rating::add_mul(self.data.rating, self.data.delta, self.span.len() - TimeDelta::one()) + } + + #[inline] + pub fn exclusive_end_rating(&self) -> Rating { + Rating::add_mul(self.data.rating, self.data.delta, self.span.len()) + } +} + +struct RatingAdderIterator +where + I1: RI, + I2: RI, +{ + segment_start: TimePoint, + dual_seg1: RatingSegment, + dual_seg2: RatingSegment, + input_iter1: I1, + input_iter2: I2, + finished: bool, +} + +impl RatingAdderIterator { + #[inline] + fn generate_segment(&mut self, segment_end: TimePoint) -> RatingFullSegment { + let start_rating1 = self.dual_seg1.data.rating; + let start_rating2 = self.dual_seg2.data.rating; + + let delta1 = self.dual_seg1.data.delta; + let delta2 = self.dual_seg2.data.delta; + + /*println!( + "ADDER new segment {} to {}", + self.segment_start, segment_end + );*/ + + RatingFullSegment { + span: TimeSpan::new(self.segment_start, segment_end), + data: RatingInfo { + rating: start_rating1 + start_rating2, + delta: delta1 + delta2, + }, + } + } +} + +impl Iterator for RatingAdderIterator { + type Item = RatingFullSegment; + + #[inline] + fn next(&mut self) -> Option { + if self.finished { + return None; + } + + let len; + let result: RatingFullSegment; + + /*println!( + "ADDER start: {} ep1: {} ep2: {}", + self.segment_start, self.dual_seg1.end_point, self.dual_seg2.end_point + );*/ + + if self.dual_seg1.end_point < self.dual_seg2.end_point { + len = self.dual_seg1.end_point - self.segment_start; + + result = self.generate_segment(self.dual_seg1.end_point); + + self.segment_start = self.dual_seg1.end_point; + + self.dual_seg1 = self + .input_iter1 + .next() + .expect("RatingAdderIterator: First iterator ended before second"); + self.dual_seg2.advance(len); + } else if self.dual_seg2.end_point < self.dual_seg1.end_point { + len = self.dual_seg2.end_point - self.segment_start; + + result = self.generate_segment(self.dual_seg2.end_point); + + self.segment_start = self.dual_seg2.end_point; + + self.dual_seg1.advance(len); + self.dual_seg2 = self + .input_iter2 + .next() + .expect("RatingAdderIterator: Second iterator ended before first"); + } else { + // self.dual_seg2.end_point === self.dual_seg1.end_point + + match (self.input_iter1.next(), self.input_iter2.next()) { + (Some(dual_seg1), Some(dual_seg2)) => { + result = self.generate_segment(self.dual_seg1.end_point); + + self.segment_start = self.dual_seg1.end_point; + + self.dual_seg1 = dual_seg1; + self.dual_seg2 = dual_seg2; + } + (Some(new_dual_seg1), None) => { + panic!( + "RatingAdderIterator: Second iterator ended before first {}", + new_dual_seg1.end_point + ); + } + (None, Some(new_dual_seg2)) => { + panic!( + "RatingAdderIterator: First iterator ended before second {}", + new_dual_seg2.end_point + ); + } + (None, None) => { + result = self.generate_segment(self.dual_seg1.end_point); + + self.finished = true; + } + } + } + + Some(result) + } +} + +pub fn add_rating_iterators( + mut iter1: RatingIterator, + mut iter2: RatingIterator, +) -> RatingFullIterator { + assert!(iter1.start == iter2.start); + let start = iter1.start; + + //println!("ADDER segment start {}", start); + + let dual_seg1 = iter1 + .iter + .next() + .expect("First iterator should have at least one element"); + let dual_seg2 = iter2 + .iter + .next() + .expect("Second iterator should have at least one element"); + + RatingFullIterator::<_> { + start: start, + iter: RatingAdderIterator::<_, _> { + segment_start: start, + dual_seg1: dual_seg1, + dual_seg2: dual_seg2, + input_iter1: iter1.iter, + input_iter2: iter2.iter, + finished: false, + }, + } +} + +trait PushIterator { + type Item; + type Output; + + /// False means the called function requests the end of the stream. + #[inline] + fn push(&mut self, item: Self::Item); + + #[inline] + fn finish(self) -> Self::Output; +} + +// ////////////////////////////////////////////////////////////////////////////////////////////////// + +struct AggressiveSimplifySegmentData { + seg: RatingFullSegment, + offset_interval: (RatingDelta, RatingDelta), + pivot: TimePoint, +} + +struct AggressiveSimplifyRatingPushIterator> { + start: TimePoint, + epsilon: RatingDelta, + current_segment: Option, + iter: I, +} + +impl> AggressiveSimplifyRatingPushIterator { + fn get_min_max_offset_for_target( + target_rating: Rating, + target: TimePoint, + pivot_rating: Rating, + pivot: TimePoint, + max_diff: RatingDelta, + ) -> (RatingDelta, RatingDelta) { + if target == pivot { + return (-max_diff, max_diff); + } // TODO: this works buy actually we want -inf and +inf + + let min_delta = (target_rating - max_diff - pivot_rating) / (target - pivot).as_i64(); // BUG TODO: ROUND UP + let max_delta = (target_rating + max_diff - pivot_rating) / (target - pivot).as_i64(); + + if min_delta <= max_delta { + (min_delta, max_delta) + } else { + (max_delta, min_delta) + } + } + + fn get_min_max_offset_for_segment( + seg: RatingFullSegment, + pivot_rating: Rating, + pivot: TimePoint, + max_diff: RatingDelta, + ) -> (RatingDelta, RatingDelta) { + let interval1 = + Self::get_min_max_offset_for_target(seg.data.start_rating(), seg.span.start, pivot_rating, pivot, max_diff); + let interval2 = Self::get_min_max_offset_for_target( + seg.data.end_rating(seg.span.len()), + seg.span.end - TimeDelta::one(), + pivot_rating, + pivot, + max_diff, + ); + + return Self::intersect_intervals(interval1, interval2); + } + + fn intersect_intervals(a: (RatingDelta, RatingDelta), b: (RatingDelta, RatingDelta)) -> (RatingDelta, RatingDelta) { + return (max(a.0, b.0), min(a.1, b.1)); + } + + fn create_segment(&self, seg: RatingFullSegment) -> AggressiveSimplifySegmentData { + let pivot = seg.span.half(); + let pivot_rating = seg.data.get_at(pivot - seg.span.start); + + AggressiveSimplifySegmentData { + seg: seg, + pivot: pivot, + offset_interval: Self::get_min_max_offset_for_segment(seg, pivot_rating, pivot, self.epsilon), + } + } +} + +impl> PushIterator for AggressiveSimplifyRatingPushIterator { + type Item = RatingSegment; + type Output = I::Output; + + #[inline] + fn finish(mut self) -> Self::Output { + if let Some(current_segment) = self.current_segment { + self.iter.push(current_segment.seg); + } + self.iter.finish() + } + + #[inline] + fn push(&mut self, next_segment: RatingSegment) { + let mut current_segment: AggressiveSimplifySegmentData; + + match self.current_segment.take() { + None => { + let seg = next_segment.with_start_point(self.start); + self.current_segment = Some(self.create_segment(seg)); + return; + } + Some(v) => current_segment = v, + } + + let seg = next_segment.with_start_point(current_segment.seg.span.end); + + let pivot_diff: TimeDelta = current_segment.pivot - current_segment.seg.span.start; + let pivot_rating = current_segment.seg.data.get_at(pivot_diff); + + let interval = Self::get_min_max_offset_for_segment(seg, pivot_rating, current_segment.pivot, self.epsilon); + + let next_interval = Self::intersect_intervals(current_segment.offset_interval, interval); + + if next_interval.0 <= next_interval.1 { + let new_delta = (next_interval.0 + next_interval.1) / 2; + let new_start_rating = Rating::add_mul(pivot_rating, new_delta, -pivot_diff); + + //println!("len1 {} len2 {} min {} max {} delta {} new delta {}", current_segment.seg.span.len(), seg.span.len(), next_interval.0.as_readable_f32(), next_interval.1.as_readable_f32(), current_segment.seg.data.delta.as_readable_f32(), new_delta.as_readable_f32()); + + current_segment.seg.span.end = next_segment.end_point; + current_segment.seg.data.delta = new_delta; + current_segment.seg.data.rating = new_start_rating; + current_segment.offset_interval = next_interval; + //println!("Simplified"); + } else { + //println!("push"); + self.iter.push(current_segment.seg); + current_segment = self.create_segment(seg); + } + + self.current_segment = Some(current_segment); + } +} + +fn aggressive_simplifiy_ratings_push_iter( + start: TimePoint, + epsilon: RatingDelta, + iter: I, +) -> impl PushIterator +where + I: PushIterator, +{ + AggressiveSimplifyRatingPushIterator { + current_segment: None, + epsilon: epsilon, + start: start, + iter: iter, + } +} + +// ////////////////////////////////////////////////////////////////////////////////////////////////// + +struct SimplifyPositionPushIterator> { + start: TimePoint, + current_segment: Option, + iter: I, +} + +impl> PushIterator for SimplifyPositionPushIterator { + type Item = PositionSegment; + type Output = I::Output; + + #[inline] + fn finish(mut self) -> Self::Output { + if let Some(current_segment) = self.current_segment { + self.iter.push(current_segment); + } + self.iter.finish() + } + + #[inline] + fn push(&mut self, next_segment: PositionSegment) { + let mut current_segment: PositionFullSegment; + + match self.current_segment { + None => { + self.current_segment = Some(next_segment.with_start_point(self.start)); + return; + } + Some(v) => current_segment = v, + } + + if current_segment.data.drag == next_segment.data.drag + && current_segment.exclusive_end_position() == next_segment.data.start_position() + { + current_segment.span.end = next_segment.end_point; + } else { + self.iter.push(current_segment); + + current_segment = next_segment.with_start_point(current_segment.span.end); + } + + self.current_segment = Some(current_segment); + } +} + +#[inline] +fn simplifiy_positions_push_iter( + start: TimePoint, + iter: I, +) -> impl PushIterator +where + I: PushIterator, +{ + SimplifyPositionPushIterator { + current_segment: None, + start: start, + iter: iter, + } +} + +// ////////////////////////////////////////////////////////////////////////////////////////////////// + +struct SimplifyRatingPushIterator> { + start: TimePoint, + current_segment: Option, + iter: I, +} + +impl> PushIterator for SimplifyRatingPushIterator { + type Item = RatingSegment; + type Output = I::Output; + + #[inline] + fn finish(mut self) -> Self::Output { + if let Some(current_segment) = self.current_segment { + self.iter.push(current_segment); + } + self.iter.finish() + } + + #[inline] + fn push(&mut self, next_segment: RatingSegment) { + let mut current_segment: RatingFullSegment; + + match self.current_segment { + None => { + self.current_segment = Some(next_segment.with_start_point(self.start)); + return; + } + Some(v) => current_segment = v, + } + + if current_segment.data.delta == next_segment.data.delta + && current_segment.exclusive_end_rating() == next_segment.data.start_rating() + { + current_segment.span.end = next_segment.end_point; + } else { + self.iter.push(current_segment); + current_segment = next_segment.with_start_point(current_segment.span.end); + } + + self.current_segment = Some(current_segment); + } +} + +fn simplifiy_ratings_push_iter( + start: TimePoint, + iter: I, +) -> impl PushIterator +where + I: PushIterator, +{ + SimplifyRatingPushIterator { + current_segment: None, + start: start, + iter: iter, + } +} + +// ////////////////////////////////////////////////////////////////////////////////////////////////// + +struct CollectToVecPushIterator { + v: Vec, +} + +impl PushIterator for CollectToVecPushIterator { + type Item = T; + type Output = Vec; + + #[inline] + fn finish(self) -> Vec { + self.v + } + + #[inline] + fn push(&mut self, item: T) { + self.v.push(item) + } +} + +struct DualPushIterator +where + T: Clone, + I1: PushIterator, + I2: PushIterator, +{ + iter1: I1, + iter2: I2, +} + +impl PushIterator for DualPushIterator +where + T: Clone, + I1: PushIterator, + I2: PushIterator, +{ + type Item = T; + type Output = (I1::Output, I2::Output); + + #[inline] + fn finish(self) -> (I1::Output, I2::Output) { + (self.iter1.finish(), self.iter2.finish()) + } + + #[inline] + fn push(&mut self, item: T) { + self.iter1.push(item.clone()); + self.iter2.push(item); + } +} + +fn dual_push_iter(i1: I1, i2: I2) -> DualPushIterator +where + T: Clone, + I1: PushIterator, + I2: PushIterator, +{ + DualPushIterator { iter1: i1, iter2: i2 } +} + +struct MapPushIterator +where + I: PushIterator, + F: Fn(A) -> B, +{ + iter: I, + f: F, + _marker: std::marker::PhantomData, +} + +impl PushIterator for MapPushIterator +where + I: PushIterator, + F: Fn(A) -> B, +{ + type Item = A; + type Output = I::Output; + + #[inline] + fn finish(self) -> I::Output { + self.iter.finish() + } + + #[inline] + fn push(&mut self, item: A) { + self.iter.push((self.f)(item)); + } +} + +fn into_push_iter(v: I, mut iter: O) -> O::Output +where + I: Iterator, + O: PushIterator, +{ + for item in v { + iter.push(item); + } + + iter.finish() +} + +fn map_push_iterator(iter: I, f: impl Fn(A) -> B) -> impl PushIterator +where + I: PushIterator, +{ + MapPushIterator { + iter: iter, + f: f, + _marker: Default::default(), + } +} + +fn only_ratings_push_iter(iter: I) -> impl PushIterator +where + I: PushIterator, +{ + map_push_iterator(iter, |dual_segment: DualSegment| dual_segment.as_rating_segment()) +} + +fn only_positions_push_iter(iter: I) -> impl PushIterator +where + I: PushIterator, +{ + map_push_iterator(iter, |dual_segment: DualSegment| dual_segment.as_position_segment()) +} + +fn collect_to_vec_push_iter() -> impl PushIterator> { + CollectToVecPushIterator { v: Vec::new() } +} + +fn discard_start_times_push_iter(iter: I) -> impl PushIterator, Output = I::Output> +where + I: PushIterator>, +{ + map_push_iterator(iter, |full_segment: FullSegment| { + assert!(full_segment.span.len() > TimeDelta::zero()); + full_segment.discard_start_time() + }) +} + +struct DualSimplifyIterator +where + I: DI, +{ + current_segment: Option, + iter: I, +} + +impl Iterator for DualSimplifyIterator { + type Item = DualFullSegment; + + #[inline] + fn next(&mut self) -> Option { + let mut current_segment: DualFullSegment; + + match self.current_segment.take() { + None => { + return None; + } + Some(v) => current_segment = v, + } + + loop { + match self.iter.next() { + None => { + self.current_segment = None; + return Some(current_segment); + } + Some(next_segment) => { + if next_segment.data.rating_info.delta == current_segment.data.rating_info.delta + && next_segment.data.position_info.drag == current_segment.data.position_info.drag + && current_segment.exclusive_end_position() == next_segment.start_position() + && current_segment.exclusive_end_rating() == next_segment.start_rating() + { + current_segment.span.end = next_segment.end_point; + } else { + self.current_segment = Some(next_segment.with_start_point(current_segment.span.end)); + return Some(current_segment); + } + } + } + } + } +} diff --git a/alass-core/src/statistics.rs b/alass-core/src/statistics.rs new file mode 100644 index 0000000..dbfcdea --- /dev/null +++ b/alass-core/src/statistics.rs @@ -0,0 +1,257 @@ +use crate::rating_type::RatingExt; +use crate::segments::{PositionBuffer, PositionFullSegment, RatingBuffer, RatingFullSegment}; +use crate::time_types::TimeDelta; + +use std::fs::File; +use std::io::prelude::*; +use std::iter::Iterator; +use std::path::PathBuf; + +#[derive(Debug)] +pub struct Statistics { + number: i64, + path: PathBuf, + filter_tags: Vec, +} + +#[derive(Clone, Copy)] +struct Color { + r: u8, + g: u8, + b: u8, +} + +impl Color { + fn from_hex(c: u32) -> Color { + let r = ((c >> 0) & 0xFF) as u8; + let g = ((c >> 8) & 0xFF) as u8; + let b = ((c >> 16) & 0xFF) as u8; + Color { r, g, b } + } +} + +enum GraphicalObject { + Line { + x1: f32, + y1: f32, + x2: f32, + y2: f32, + color: Color, + }, +} + +impl GraphicalObject { + fn min_x(&self) -> f32 { + match self { + GraphicalObject::Line { x1, x2, .. } => (*x1).min(*x2), + } + } + + fn max_x(&self) -> f32 { + match self { + GraphicalObject::Line { x1, x2, .. } => (*x1).max(*x2), + } + } + + fn min_y(&self) -> f32 { + match self { + GraphicalObject::Line { y1, y2, .. } => (*y1).min(*y2), + } + } + + fn max_y(&self) -> f32 { + match self { + GraphicalObject::Line { y1, y2, .. } => (*y1).max(*y2), + } + } +} + +impl Statistics { + pub fn new(path: impl AsRef, filter_tags: Vec) -> Statistics { + Statistics { + number: 1, + path: path.as_ref().to_path_buf(), + filter_tags, + } + } + + pub fn prepare_file(&self, filename: String) -> std::io::Result { + std::fs::create_dir(&self.path).or_else(|error| { + if error.kind() == std::io::ErrorKind::AlreadyExists { + return Ok(()); + } else { + Err(error) + } + })?; + + File::create(self.path.join(filename)) + } + + fn write_svg( + &mut self, + name: &str, + tags: &[&str], + objs_fn: impl Fn() -> Vec, + ) -> std::io::Result<()> { + self.number = self.number + 1; + + if !self.passing_tags_filter(tags) { + return Ok(()); + } + + let mut file = self.prepare_file(format!( + "{:03}{}.svg", + self.number, + name.to_lowercase().replace(' ', "-") + ))?; + + let objs: Vec = objs_fn(); + + if !objs.is_empty() { + let mut min_x = objs[0].min_x(); + let mut max_x = objs[0].max_x(); + let mut min_y = objs[0].min_y(); + let mut max_y = objs[0].max_y(); + + for obj in &objs { + min_x = min_x.min(obj.min_x()); + min_y = min_y.min(obj.min_y()); + max_x = max_x.max(obj.max_x()); + max_y = max_y.max(obj.max_y()); + } + + let target_width = 8000f32; + let target_height = 1000f32; + + let height = max_y - min_y + 1.; + let width = max_x - min_x + 1.; + + let scalex = target_width / width; + let scaley = -target_height / height * 0.8; + + let movex = -min_x * scalex; + let movey = -min_y * scaley + target_height - target_height * 0.1; + + file.write_all(format!("", target_height, target_width).as_bytes())?; + + let line = format!("Name: {}\n", name); + file.write_all(line.as_bytes())?; + + let line = format!( + "Tags: {}\n", + tags.join(", ") + ); + file.write_all(line.as_bytes())?; + + let line = format!("{}\n", max_y); + file.write_all(line.as_bytes())?; + + let line = format!( + "{}\n", + target_height - 5., + min_y + ); + file.write_all(line.as_bytes())?; + + for obj in &objs { + match obj { + GraphicalObject::Line { x1, x2, y1, y2, color } => { + let line = format!("\n", + x1 * scalex + movex, + y1 * scaley + movey, + x2 * scalex + movex, + y2 * scaley + movey, + color.r,color.g,color.b); + file.write_all(line.as_bytes())?; + } + } + } + + file.write_all(b"")?; + } + + Ok(()) + } + + pub fn save_rating_buffer(&mut self, name: &str, tags: &[&str], buffer: &RatingBuffer) -> std::io::Result<()> { + let mut tags2: Vec<&str> = tags.to_vec(); + tags2.push("rating"); + + let compute_data = || { + buffer + .iter() + .annotate_with_segment_start_times() + .into_iter() + .zip( + [Color::from_hex(0xFF00FF), Color::from_hex(0x008888)] + .into_iter() + .cloned() + .cycle(), + ) + .map(|(segment, color): (RatingFullSegment, Color)| { + let start = segment.span.start; + let end = segment.span.end; + let start_rating = segment.data.start_rating(); + let end_rating = segment.data.end_rating(end - start); + + let x1 = start.as_f32(); + let x2 = (end - TimeDelta::one()).as_f32(); + let y1 = start_rating.as_readable_f32(); + let y2 = end_rating.as_readable_f32(); + + return GraphicalObject::Line { x1, x2, y1, y2, color }; + }) + .collect() + }; + + self.write_svg(name, &tags2, compute_data)?; + + Ok(()) + } + + pub fn passing_tags_filter(&self, tags: &[&str]) -> bool { + for filter_tag in &self.filter_tags { + if !tags.contains(&filter_tag.as_str()) { + return false; + } + } + + true + } + + pub fn save_position_buffer(&mut self, name: &str, tags: &[&str], buffer: &PositionBuffer) -> std::io::Result<()> { + let mut tags2: Vec<&str> = tags.to_vec(); + tags2.push("position"); + + let compute_data = || { + buffer + .iter() + .annotate_with_segment_start_times() + .into_iter() + .zip( + [Color::from_hex(0xFF0000), Color::from_hex(0x00FF00)] + .into_iter() + .cloned() + .cycle(), + ) + .map(|(segment, color): (PositionFullSegment, Color)| { + let start = segment.span.start; + let end = segment.span.end; + let start_rating = segment.data.start_position(); + let end_rating = segment.data.end_position(end - start); + + let x1 = start.as_f32(); + let x2 = (end - TimeDelta::one()).as_f32(); + let y1 = start_rating.as_f32(); + let y2 = end_rating.as_f32(); + + return GraphicalObject::Line { x1, x2, y1, y2, color }; + }) + .collect() + }; + + self.write_svg(name, &tags2, compute_data)?; + + Ok(()) + } +} diff --git a/src/internal/time_types.rs b/alass-core/src/time_types.rs similarity index 86% rename from src/internal/time_types.rs rename to alass-core/src/time_types.rs index 1a801b9..1b41dc7 100644 --- a/src/internal/time_types.rs +++ b/alass-core/src/time_types.rs @@ -1,4 +1,4 @@ -// This file is part of the Rust library and binary `aligner`. +// This file is part of the Rust library and binary `alass`. // // Copyright (C) 2017 kaegi // @@ -15,16 +15,19 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . - use std; -use std::cmp::{Ordering, max, min}; +use std::cmp::{max, min, Ordering}; use std::ops::*; /// Implements conversion to integer variables for TimeDelta and TimePoint. macro_rules! impl_from { ($f:ty, $t:ty) => { - impl From<$f> for $t { fn from(t: $f) -> $t { t.0 as $t } } - } + impl From<$f> for $t { + fn from(t: $f) -> $t { + t.0 as $t + } + } + }; } /// This struct represents a time difference between two `TimePoints`. @@ -42,8 +45,27 @@ impl TimeDelta { pub fn one() -> TimeDelta { TimeDelta(1) } -} + /// Create time delta as "TimeDelta::one() * v". + pub fn from_i64(v: i64) -> TimeDelta { + TimeDelta(v) + } + + /// Return time difference as f64. + pub fn as_f64(&self) -> f64 { + self.0 as f64 + } + + /// Return time difference as f64. + pub fn as_f32(&self) -> f32 { + self.0 as f32 + } + + /// Return time difference as i64. + pub fn as_i64(&self) -> i64 { + self.0 as i64 + } +} impl_from!(TimeDelta, i32); impl_from!(TimeDelta, u32); @@ -80,7 +102,6 @@ impl AddAssign for TimeDelta { } } - impl Sub for TimeDelta { type Output = TimeDelta; fn sub(self, rhs: TimeDelta) -> TimeDelta { @@ -140,7 +161,7 @@ impl Neg for TimeDelta { /// The only way to create a new `TimePoint` is with `TimePoint::from({i64})`. /// /// ``` -/// use aligner::TimePoint; +/// use alass_core::TimePoint; /// /// let p = TimePoint::from(10); /// @@ -152,6 +173,18 @@ impl Neg for TimeDelta { #[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq, Hash)] pub struct TimePoint(i64); +impl TimePoint { + /// Returns a f32 for the given time point. + pub fn as_f32(self) -> f32 { + self.0 as f32 + } + + /// Returns a i64 for the given time point. + pub fn as_i64(self) -> i64 { + self.0 as i64 + } +} + impl From for TimePoint { fn from(f: i64) -> TimePoint { TimePoint(f) @@ -159,7 +192,6 @@ impl From for TimePoint { } impl_from!(TimePoint, i64); - impl Sub for TimePoint { type Output = TimeDelta; fn sub(self, rhs: TimePoint) -> TimeDelta { @@ -203,10 +235,10 @@ impl SubAssign for TimePoint { #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] pub struct TimeSpan { /// The first time point of the time span (inclusive) - start: TimePoint, + pub start: TimePoint, /// The last time point of the time span (excluded) - end: TimePoint, + pub end: TimePoint, } impl TimeSpan { @@ -214,7 +246,7 @@ impl TimeSpan { /// /// # Examples /// ```rust - /// use aligner::{TimeSpan, TimePoint}; + /// use alass_core::{TimeSpan, TimePoint}; /// /// let t0 = TimePoint::from(0); /// let t10 = TimePoint::from(10); @@ -228,7 +260,7 @@ impl TimeSpan { /// This function asserts that `start` is less or equal `end`. /// /// ```rust,should_panic - /// use aligner::{TimeSpan, TimePoint}; + /// use alass_core::{TimeSpan, TimePoint}; /// /// let t0 = TimePoint::from(0); /// let t10 = TimePoint::from(10); @@ -236,22 +268,19 @@ impl TimeSpan { /// // this will case a panic /// let ts = TimeSpan::new(t10, t0); /// ``` + #[inline] pub fn new(start: TimePoint, end: TimePoint) -> TimeSpan { assert!(start <= end); - TimeSpan { - start: start, - end: end, - } + TimeSpan { start: start, end: end } } - /// Create a new TimeSpan with `start` and `end`. This function will not /// panic on `end < start`, but /// swap the values before calling `TimeSpan::new()`. /// /// # Examples /// ```rust - /// use aligner::{TimeSpan, TimePoint}; + /// use alass_core::{TimeSpan, TimePoint}; /// /// let t0 = TimePoint::from(0); /// let t10 = TimePoint::from(10); @@ -267,8 +296,6 @@ impl TimeSpan { } } - - /// Mutates a `TimeSpan`s end. /// /// # Panics @@ -300,11 +327,16 @@ impl TimeSpan { self.end } + /// Returns one (of the possibly two) points in the center of the `TimeSpan`. + pub fn half(self) -> TimePoint { + TimePoint::from((self.start.as_i64() + self.end.as_i64()) / 2) + } + /// Returns true if `self` contains `TimeSpan` `other`. /// /// # Examples /// ``` - /// use aligner::{TimeSpan, TimePoint}; + /// use alass_core::{TimeSpan, TimePoint}; /// ``` pub fn contains(self, other: TimeSpan) -> bool { other.start >= self.start && other.end <= self.end @@ -313,7 +345,7 @@ impl TimeSpan { /// Returns the smallest difference between two `TimeSpan`s. /// /// ``` - /// use aligner::{TimeSpan, TimePoint, TimeDelta}; + /// use alass_core::{TimeSpan, TimePoint, TimeDelta}; /// /// let p = TimePoint::from(0); /// let d = TimeDelta::one(); diff --git a/src/internal/timespan_ops.rs b/alass-core/src/timespan_ops.rs similarity index 83% rename from src/internal/timespan_ops.rs rename to alass-core/src/timespan_ops.rs index fe1a21b..5b3d79c 100644 --- a/src/internal/timespan_ops.rs +++ b/alass-core/src/timespan_ops.rs @@ -1,4 +1,4 @@ -// This file is part of the Rust library and binary `aligner`. +// This file is part of the Rust library and binary `alass`. // // Copyright (C) 2017 kaegi // @@ -15,8 +15,7 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . - -use internal::{TimeDelta, TimeSpan}; +use crate::{TimeDelta, TimeSpan}; use std; use std::cmp::max; @@ -37,10 +36,7 @@ fn prepare_spans_sorted(overlapping: Vec) -> (Vec, Vec) -> (Vec, Vec /// are grouped together with next or previous time spans. fn prepare_spans_nonzero(v: Vec) -> (Vec, Vec) { // list of non-zero spans - let non_zero_spans: Vec = v.iter() - .cloned() - .filter(|&ts| ts.len() > TimeDelta::zero()) - .collect(); + let non_zero_spans: Vec = v.iter().cloned().filter(|&ts| ts.len() > TimeDelta::zero()).collect(); if non_zero_spans.is_empty() { return (Vec::new(), Vec::new()); } @@ -120,12 +113,7 @@ fn prepare_spans_nonzero(v: Vec) -> (Vec, Vec) { (Some(p), Some(n)) => ts.fast_distance_to(p) <= ts.fast_distance_to(n), }; - - indices.push(if merge_with_prev { - new_index - 1 - } else { - new_index - }); + indices.push(if merge_with_prev { new_index - 1 } else { new_index }); } (non_zero_spans, indices) @@ -155,12 +143,11 @@ pub fn prepare_time_spans(v: Vec) -> (Vec, Vec) { (result, mapping) } - #[cfg(test)] mod tests { use super::*; - use internal::prepare_time_spans; - use tests::get_test_time_spans; + use crate::prepare_time_spans; + use crate::tests::get_test_time_spans; #[test] fn test_prepare_time_spans() { @@ -171,10 +158,11 @@ mod tests { // function will condense non-zero timespans into one -> vector of zero-length // timespans will turn into empty vector - let full_length: i64 = time_spans.iter() - .cloned() - .map(|time_spans| i64::from(time_spans.len())) - .sum(); + let full_length: i64 = time_spans + .iter() + .cloned() + .map(|time_spans| i64::from(time_spans.len())) + .sum(); if full_length == 0 { assert!(non_overlapping.is_empty()); continue; @@ -186,15 +174,16 @@ mod tests { assert!(non_overlapping.len() > 0); // test whether some spans overlap (they shouldn't) - non_overlapping.iter() - .cloned() - .zip(non_overlapping.iter().cloned().skip(1)) - .inspect(|&(last, current)| { - assert!(last.start() <= last.end()); - assert!(last.end() <= current.start()); - assert!(current.start() <= current.end()); - }) - .count(); + non_overlapping + .iter() + .cloned() + .zip(non_overlapping.iter().cloned().skip(1)) + .inspect(|&(last, current)| { + assert!(last.start() <= last.end()); + assert!(last.end() <= current.start()); + assert!(current.start() <= current.end()); + }) + .count(); // test mapping from "overlapping -> non-overlapping" assert!(time_spans.len() == indices.len()); @@ -202,13 +191,11 @@ mod tests { assert!(non_overlapping[indices[i]].contains(span) || span.len() == TimeDelta::zero()); } - // ----------------------------------------------------------- // apply `prepare_time_spans()` a second time which should now be a noop let (prepared_timespans2, indices2) = prepare_time_spans(non_overlapping.clone()); assert_eq!(non_overlapping, prepared_timespans2); assert_eq!(indices2, (0..indices2.len()).collect::>()); - } } } diff --git a/rustfmt.toml b/rustfmt.toml index af239d5..6953a45 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -1,9 +1,2 @@ -write-mode="overwrite" reorder_imports = true -format_strings = false -chain_overflow_last = false -chain_indent = "Visual" -single_line_if_else = true -same_line_if_else = false -fn_single_line = false -max_width = 150 +max_width = 120 diff --git a/src/binary/errors.rs b/src/binary/errors.rs deleted file mode 100644 index 4186da7..0000000 --- a/src/binary/errors.rs +++ /dev/null @@ -1,58 +0,0 @@ -// This file is part of the Rust library and binary `aligner`. -// -// Copyright (C) 2017 kaegi -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -use subparse; - -// see https://docs.rs/error-chain/0.8.1/error_chain/ -#[cfg_attr(rustfmt, rustfmt_skip)] -error_chain! { - foreign_links { - Io(::std::io::Error); - } - - links { - SubparseError(subparse::errors::Error, subparse::errors::ErrorKind); - } - - errors { - FileOperation(file: String) { - display("operation on file '{}' failed", file) - } - UnknownEncoding(enc: String) { - display("unknown file encoding '{}'", enc) - } - SsaFormattingInfoNotFound { - description("file did not have a `[Events]` section containing a line beginning with `Format: `") - } - ArgumentParseError(argument_name: &'static str, s: String) { - display("command line argument '{}' could not be parsed from string '{}'", argument_name, s) - } - InvalidArgument(argument_name: &'static str) { - display("command line argument '{}' has invalid value", argument_name) - } - ExpectedPositiveNumber(i: i64) { - display("expected positive number, got '{}'", i) - } - ValueNotInRange(v: f64, min: f64, max: f64) { - display("expected value in the range from '{}' to '{}', found value '{}'", min, max, v) - } - DifferentOutputFormat(input_file: String, output_file: String) { - description("the requested output file has a different format than the incorrect subtitle file (this program does not convert)") - display("the requested output '{}' file has a different format than the incorrect subtitle file '{}' (this program does not convert)", output_file, input_file) - } - } -} diff --git a/src/binary/mod.rs b/src/binary/mod.rs deleted file mode 100644 index 79c7c20..0000000 --- a/src/binary/mod.rs +++ /dev/null @@ -1,18 +0,0 @@ -// This file is part of the Rust library and binary `aligner`. -// -// Copyright (C) 2017 kaegi -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - -pub mod errors; diff --git a/src/internal/aligner.rs b/src/internal/aligner.rs deleted file mode 100644 index 435b51f..0000000 --- a/src/internal/aligner.rs +++ /dev/null @@ -1,645 +0,0 @@ -// This file is part of the Rust library and binary `aligner`. -// -// Copyright (C) 2017 kaegi -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - - -use arrayvec::ArrayVec; -use internal::{CombinedSegmentIterator, DeltaBufferBuilder, DeltaBufferReader, DeltaSegment, OptionSegment, Rating, RatingBuffer, RatingSegment, - TimeDelta, TimePoint, TimeSpan, TimepointBuffer, get_best_rating_segments_of_2, get_best_rating_segments_of_3, - get_overlapping_rating_changepoints}; -use std::cmp::min; -use std::iter::{FromIterator, once}; - -/// The main align algorithm uses a long buffer of different ratings. This -/// structure provides the information to -/// express that a `rating` is at a `specific timepoint` in that buffer. -struct RatingLocation { - pub rating: Rating, - pub location: TimePoint, -} - - -/// Use this trait if you want more detailed information about the progress of the align operation -/// (which might take some seconds). -pub trait ProgressHandler { - /// Will be called one time before `inc()` is called. `steps` is the - /// number of times `inc()` will be called. - /// - /// The number of steps is around the number of lines in the "incorrect" subtitle. - /// Be aware that this number can be zero! - #[allow(unused_variables)] - fn init(&mut self, steps: i64) {} - - /// We made (small) progress! - fn inc(&mut self) {} - - /// Will be called after the last `inc()`, when `inc()` was called `steps` times. - fn finish(&mut self) {} -} - -/// The "main" structure which holds the infomation needed to align the subtitles to each other. -pub struct Aligner { - /// List of incorrect subtitles which are aligned with this library. This - /// list will always be non-empty. - list: Vec, - - /// The fixed reference subtitles. This list will always be non-empty. - reference: Vec, - - /// Contains the range in which the incorrect subtitles can be moved. - buffer_timespan: TimeSpan, - - /// The bonus rating for two consecutive subtitles which preserve the - /// orginal space (which get - /// shifted by the same `TimeDelta`)/the penalty for introducing splits in - /// the alignment. - nosplit_bonus: Rating, - - /// Progress handler provided by the user of this crate. - progress_handler_opt: Option>, -} - -#[derive(Clone, Copy, PartialEq, Eq, Debug)] -/// In its core, the algorithm uses a dynamic programming approach and -/// generates a new value/rating in -/// the internal table by comparing already-existing ratings at three different -/// positions in the table. -/// It then chooses the highest rating and does some processing dependent on -/// the case. -/// -/// These three choices is where to put the/new current timespan: leave the -/// timespan on the position from the -/// previous iteration, repositon its start to the current timepoint, or -/// reposition it to the non-split -/// position (to get the nosplit bonus). -/// -/// This enumeration represents the three different choices the algorithm can -/// select from. -enum Choice { - /// Reposition start of timespan to current timepoint. - Reposition, - - /// Leave timespan on same position like in previous iteration. - Fixed, - - /// Reposition timespan so no split is introduced (gives `nosplit bonus`). - NosplitReposition, -} - - -impl Aligner { - /// In each list no time span should intersect any other and both list are - /// sorted by starting times. - pub fn new( - list: Vec, - reference: Vec, - nopsplit_bonus_normalized: f64, - mut progress_handler_opt: Option>, - ) -> Option { - if let Some(ref mut progress_handler) = progress_handler_opt { - progress_handler.init(list.len() as i64); - } - - if list.is_empty() || reference.is_empty() { - if let Some(ref mut progress_handler) = progress_handler_opt { - progress_handler.finish(); - } - return None; - } - - // this is the timespan length which can contain all incorrect subtitles - let list_timespan = list.last().unwrap().end() - list.first().unwrap().start(); - - // It might be possible that all corrected subtiles fit in the reference list - // timeframe. It they don't - // we need to provide extra space, so that the produting corrected subtitles - // still fit into the - // whole [start, end] timeframe. Because `list_timespan` is the length of the - // whole incorrect subtitle file, - // we can just extend the reference timeframe by `list_timespan` on both ends. - let start = reference.first().unwrap().start() - list_timespan; - let end = reference.last().unwrap().end() + list_timespan; - - // For each segment the full rating can only be 1. So the maximum rating - // without the nosplit bonus is `min(list.len(), reference.len())`. So to get - // from the normalized rating `[0, 1]` to a unnormalized rating (where only - // values between `[0, max_rating]` are interesting) we multiply by - // `min(list.len(), reference.len())`. - let nopsplit_bonus_unnormalized = min(list.len(), reference.len()) as f64 * nopsplit_bonus_normalized; - - // quick check for integrity - assert!(start < end); - - Some(Aligner { - list: list, - reference: reference, - buffer_timespan: TimeSpan::new(start, end), - nosplit_bonus: Rating::nosplit_bonus(nopsplit_bonus_unnormalized), - progress_handler_opt: progress_handler_opt, - }) - } - - pub fn get_start(&self) -> TimePoint { - self.buffer_timespan.start() - } - - pub fn get_end(&self) -> TimePoint { - self.buffer_timespan.end() - } - - pub fn get_buffer_length(&self) -> u64 { - u64::from(self.buffer_timespan.len()) - } - - pub fn align_all_spans(&mut self) -> Vec { - let mut all_spanstart_buffers: Vec = Vec::new(); - let mut last_rating_buffer: RatingBuffer = RatingBuffer::init_with_one_segment(Rating::zero(), Rating::zero(), self.get_buffer_length()); - - // iterator that removes the first element and adds a None value to the end -> - // provedes the "next" span - for (i, time_span) in self.list.iter().cloned().enumerate() { - // compute the space between this span and the next span - let next_span_opt = self.list.get(i + 1); - let optimal_startdiff_opt = next_span_opt.map(|next_span| next_span.start() - time_span.start()); - let (rating_buffer, span_positions_buffer) = self.align_new_span(last_rating_buffer, time_span, optimal_startdiff_opt); - - // the rating buffer is only needed for the next lane, but the last span - // position have to be remembered to get the deltas for each subtitle at the end - last_rating_buffer = rating_buffer; - all_spanstart_buffers.push(span_positions_buffer); - - // inform user we have done one step - if let Some(ref mut progress_handler) = self.progress_handler_opt { - progress_handler.inc(); - } - } - - // find the index in the last rating buffer (which represents all spans) with - // maximum rating - which is the last index because the ratings rise monotonous. - let mut best_end = self.get_end() - TimeDelta::one(); - - // because we can read each interval ends at the span start of next span, we - // just have to go backwards from span start to span start - let mut time_span_starts = Vec::new(); - for last_span_positions in all_spanstart_buffers.into_iter().rev() { - let mut reader = DeltaBufferReader::new(&last_span_positions, self.get_start()); - best_end = reader.read_by_timepoint(best_end); - time_span_starts.push(best_end); - } - - // inform user we are done with the work - if let Some(ref mut progress_handler) = self.progress_handler_opt { - progress_handler.finish(); - } - - time_span_starts = time_span_starts.into_iter().rev().collect(); - self.list - .iter() - .zip(time_span_starts.iter()) - .map(|(&original_time_span, &new_start)| { - new_start - original_time_span.start() - }) - .collect() - } - - /// Returns the align rating of n + 1 time spans from the align rating of n - /// time spans. - /// - /// total_timespan_time: the sum of all timespan lengths NOT including the - /// new span - fn align_new_span( - &self, - prev_rating_buffer: RatingBuffer, - new_span: TimeSpan, - optimal_startdiff_opt: Option, - ) -> (RatingBuffer, TimepointBuffer) { - - assert!(prev_rating_buffer.len() == self.get_buffer_length()); - - let overlapping_rating = self.get_compressed_overlapping_ratings(self.get_start(), self.get_end(), new_span.len()); - - // for an "end" the repositon rating is the best rating where the new span ends - // at that "end" - let rating_by_repositioning = RatingBuffer::combined_add(&overlapping_rating, &prev_rating_buffer); - - match optimal_startdiff_opt { - Some(optimal_startdiff) => self.get_next_lane_with_nosplit(&rating_by_repositioning, optimal_startdiff), - None => self.get_next_lane_without_nosplit(&rating_by_repositioning), - } - } - - /// The algorithm creates a NxU matrix where N is the number of incorrect - /// subtitles and U is the timespan all subtitles together. On a high - /// level, each N+1 row (giving the next N+1xU matrix) can be computed by - /// the NxU matrix and the previous values on the N+1 row. I call this new - /// matrix-row "lane". - fn get_next_lane( - &self, - // will be ignored if `get_maxrat_segments` never return Choice::NosplitReposition - optimal_startdiff: TimeDelta, - reposition_rating_iter: I1, - nosplit_rating_iter: I2, - get_maxrat_segments: F, - ) -> (RatingBuffer, TimepointBuffer) - where - I1: Iterator, - I2: Iterator>, - - // The three rating segemnts are the "reposition rating" the "fixed rating" and the "nosplit rating". - // - // The last parameter is the "absolute best choice" in the last loop. Because these absolute best (== one maxrat segment), - // choice is probably the same in the next loop, we can do optimizations with it. - F: Fn(RatingSegment, - RatingSegment, - Option, - Option) -> ArrayVec<[(RatingSegment, Choice); 3]>, - { - - let mut rating_builder = DeltaBufferBuilder::::new(); - let mut spanstart_builder = DeltaBufferBuilder::::new(); - let mut segstart_timepoint = self.get_start(); - let mut past_max: RatingLocation = RatingLocation { - rating: Rating::zero(), - location: segstart_timepoint, - }; - let mut last_absolute_best_choice: Option = None; - - for (reposition_rating_seg, nosplit_reposition_rating) in CombinedSegmentIterator::new(reposition_rating_iter, nosplit_rating_iter) { - - if reposition_rating_seg.is_decreasing() && reposition_rating_seg.first_value() > past_max.rating { - // the first rating value is the new maximum, but after that, the rating - // decreases - // -> the fixed spanstart should point to the first rating - past_max.rating = reposition_rating_seg.first_value(); - past_max.location = segstart_timepoint; - } - - let fixed_rating_seg = DeltaSegment::new(past_max.rating, Rating::zero(), reposition_rating_seg.len()); - - let maxrat_segs = get_maxrat_segments( - reposition_rating_seg, - fixed_rating_seg, - nosplit_reposition_rating, - last_absolute_best_choice, - ); - - last_absolute_best_choice = if maxrat_segs.len() == 1 { - Some(maxrat_segs[0].1) - } else { - None - }; - - - // = segment with maximal rating - for (maxrat_seg, choice) in maxrat_segs { - // depending on the best maxrat-choice (choice that leads to the segment with - // maximum rating), - // the spanstart has to set differently -> maximum rating comes into being with - // that spanstart - let (spanstart_seg_timepoint, spanstart_seg_delta) = match choice { - Choice::Reposition => (segstart_timepoint, TimeDelta::one()), - Choice::NosplitReposition => (segstart_timepoint - optimal_startdiff, TimeDelta::one()), - Choice::Fixed => (past_max.location, TimeDelta::zero()), - }; - - let new_spanstart_segment = DeltaSegment::new( - spanstart_seg_timepoint, - spanstart_seg_delta, - maxrat_seg.len(), - ); - - // add the best rating and the associated spanstarts to the buffer builders - rating_builder.add_segment(maxrat_seg); - spanstart_builder.add_segment(new_spanstart_segment); - - segstart_timepoint += TimeDelta::one() * maxrat_seg.len() as i64; - } - - // the reposition rating is constant or increases (decreasing case has already been handled - // at the start of the loop), so the next "past_max.rating" might be at the end of of the - // repositon rating segment - if past_max.rating < reposition_rating_seg.last_value() { - // segstart_timepoint points to beginning of next segment, so minus one and we have the - // end of the current segment - past_max.rating = reposition_rating_seg.last_value(); - past_max.location = segstart_timepoint - TimeDelta::one(); - } - - } - - (rating_builder.get_buffer(), spanstart_builder.get_buffer()) - } - - fn get_next_lane_without_nosplit(&self, reposition_rating_buffer: &RatingBuffer) -> (RatingBuffer, TimepointBuffer) { - self.get_next_lane( - TimeDelta::zero(), // ignored - reposition_rating_buffer.iter_segments().cloned(), - once(OptionSegment::NoneSeg::( - self.get_buffer_length(), - )), - - // do not compare with nosplit segment - Self::get_segments_and_choices_with_nosplit, - ) - - } - - fn get_segments_and_choices_without_nosplit( - reposition_rating_seg: RatingSegment, - fixed_rating_seg: RatingSegment, - _: Option, - ) -> ArrayVec<[(RatingSegment, Choice); 3]> { - - let compared_segs = [reposition_rating_seg, fixed_rating_seg]; - let corresponding_choices = [Choice::Reposition, Choice::Fixed]; - get_best_rating_segments_of_2(compared_segs, corresponding_choices) - } - - fn get_segments_and_choices_with_nosplit( - reposition_rating_seg: RatingSegment, - fixed_rating_seg: RatingSegment, - nosplit_rating_seg_opt: Option, - last_absolute_best_choice: Option, - ) -> ArrayVec<[(RatingSegment, Choice); 3]> { - - let nosplit_rating_seg = match nosplit_rating_seg_opt { - Some(x) => x, - None => { - return Self::get_segments_and_choices_without_nosplit( - reposition_rating_seg, - fixed_rating_seg, - last_absolute_best_choice, - ); - } - }; - - // Two consecutive segment parts often have the same result. So if we use the - // the result from the - // last loop, we can easily check for a single result instead of having to - // compute it expensively. - match last_absolute_best_choice { - Some(Choice::Reposition) => { - if reposition_rating_seg.is_greatequal(nosplit_rating_seg) && reposition_rating_seg.is_greatequal(fixed_rating_seg) { - return ArrayVec::from_iter( - [(reposition_rating_seg, Choice::Reposition)] - .into_iter() - .cloned(), - ); - } - } - Some(Choice::NosplitReposition) => { - if nosplit_rating_seg.is_greatequal(reposition_rating_seg) && nosplit_rating_seg.is_greatequal(fixed_rating_seg) { - return ArrayVec::from_iter( - [(nosplit_rating_seg, Choice::NosplitReposition)] - .into_iter() - .cloned(), - ); - } - } - Some(Choice::Fixed) => { - if fixed_rating_seg.is_greatequal(reposition_rating_seg) && fixed_rating_seg.is_greatequal(nosplit_rating_seg) { - return ArrayVec::from_iter([(fixed_rating_seg, Choice::Fixed)].into_iter().cloned()); - } - } - None => {} - } - - let compared_segs = [reposition_rating_seg, nosplit_rating_seg, fixed_rating_seg]; - let corresponding_choices = [Choice::Reposition, Choice::NosplitReposition, Choice::Fixed]; - get_best_rating_segments_of_3(compared_segs, corresponding_choices) - } - - fn get_next_lane_with_nosplit(&self, reposition_rating_buffer: &RatingBuffer, optimal_startdiff: TimeDelta) -> (RatingBuffer, TimepointBuffer) { - - // Get an iterator where each lookup at position - // "shifted_bonus_segments_iter[x]" is equivalent - // to "rating_by_repositioning[x - space_to_next] + bonus_time". - // This is the rating for a nosplit alignment. To achive that "x - - // space_to_next" we add a dummy segment to the front. - let dummy_segment = OptionSegment::NoneSeg(optimal_startdiff.into()); - let bonus_segments_iter = reposition_rating_buffer.iter_segments().map(|&x| { - OptionSegment::SomeSeg(x + self.nosplit_bonus) - }); - let shifted_bonus_segments_iter = once(dummy_segment).chain(bonus_segments_iter); - - self.get_next_lane( - optimal_startdiff, - reposition_rating_buffer.iter_segments().cloned(), - shifted_bonus_segments_iter, - Self::get_segments_and_choices_with_nosplit, - ) - } - - /// Requires "start1 <= start2". Returns the compressed rating vector for - /// the overlapping ratings of a timespan of length - /// "length" on all start position from "start1" to "start2". - /// - /// This function has O(n) runtime, where n is the number of spans in the - /// reference list. - - fn get_compressed_overlapping_ratings(&self, start1: TimePoint, start2: TimePoint, length: TimeDelta) -> RatingBuffer { - // If we fix one timespan and let an other timespan variable, we get such a - // curve for the rating: - // - // / --------- \ - // / \ - // ------- -------------------------- - // - // at first the rating be zero, then rise linearly, then it will be constant - // for a time and then fall to zero again - // - // The next function will return these special changepoints and their - // "delta"-change (delta-delta). - // Because the timespans in "self.reference" are sorted and non overlapping, - // the changepoints of a certain type (first rise, start of constant, ...) - // will also be sorted. That means we only have to compare the current first - // changepoints of each type to get the really first - // changepoint. We then apply this changepoint-delta to the current total delta - // and add the segment with the - // previous total delta to the buffer. This way we get the segments with the - // same delta very efficently in O(n). - assert!(start1 <= start2); - - let mut builder = DeltaBufferBuilder::new(); - let mut timepoints: [Vec<(Rating, TimePoint)>; 4] = [Vec::new(), Vec::new(), Vec::new(), Vec::new()]; - for &ref_ts in &self.reference { - let changepoints = get_overlapping_rating_changepoints(length, ref_ts); - timepoints[0].push(changepoints[0]); - timepoints[1].push(changepoints[1]); - timepoints[2].push(changepoints[2]); - timepoints[3].push(changepoints[3]); - } - - // this is a vector of 4 iterators, each iterating over the contents of - // "timepoints[0]" to "timepoints[3]" - let mut iterators: ArrayVec<[_; 4]> = timepoints.into_iter() - .cloned() - .map(|v| v.into_iter().peekable()) - .collect(); - let mut first_timepoint: Option = None; - let mut last_timepoint: Option = None; - let mut current_abs = Rating::zero(); - let mut current_delta = Rating::zero(); - loop { - // unpack the first value of each iterator - let next_timepoints: ArrayVec<[(usize, (Rating, TimePoint)); 4]> = - iterators.iter_mut() - .enumerate() - .map(|(i, iter)| iter.peek().map(|&v| (i, v))) - .filter_map(|opt| opt) - .collect(); - - // take the first next timepoint - let next_changepoint_opt = next_timepoints.into_iter().min_by_key::( - |a| (a.1).1, - ); - - // because each original array had the same length, all iterators should end at - // the same time - let (next_id, (next_rating_delta, next_timepoint)) = match next_changepoint_opt { - Some(next_changepoint) => next_changepoint, - None => break, - }; - - - if first_timepoint.is_none() { - first_timepoint = Some(next_timepoint) - }; - - // add the new segment with the current_delta - if let Some(last_timepoint) = last_timepoint { - let len: u64 = u64::from(next_timepoint - last_timepoint); - builder.add_segment(DeltaSegment::new(current_abs, current_delta, len)); - current_abs += current_delta * len as i64; - } - - current_delta += next_rating_delta; - last_timepoint = Some(next_timepoint); - - // "next_id" contains the index of the iterator which contains - // "next_changepoint" -> pop that from the front so we don't have a endless loop - iterators[next_id].next(); - } - - // the rating values are continuous, so the first value of a segment is the - // last value of the previous segment. - // To avoid having each of these segment-break values two times in the buffer, - // every segments stops one timepoint - // before the real segment end. The real segment end is then the first value of - // the next value. - // - // The last rating has to be 0, so we extend the last segment with the missing - // timepoint. - assert_eq!(current_abs, Rating::zero()); - assert_eq!(current_delta, Rating::zero()); - builder.add_segment(DeltaSegment::new(current_abs, current_delta, 1)); - - match (first_timepoint, last_timepoint) { - (Some(first_timepoint), Some(_)) => { - builder.get_buffer().with_new_borders( - i64::from(start1 - first_timepoint), - i64::from(start2 - start1), - ) - } - _ => unreachable!(), // lists in aligner should be non-empty - } - } - - - /// Computes the same buffer as the fast variant (hopefully) and is shorter and easier to debug, but slower. - #[cfg(test)] - fn get_compressed_overlapping_ratings_slow(&self, start1: TimePoint, start2: TimePoint, length: TimeDelta) -> RatingBuffer { - let istart1: i64 = start1.into(); - let istart2: i64 = start2.into(); - let mut rating_buffer_builder = DeltaBufferBuilder::new(); - - for istart in istart1..istart2 { - let start = TimePoint::from(istart); - let span = TimeSpan::new(start, start + length); - - // summation of ratings of current time span with each reference time span - let rating: Rating = self.reference - .iter() - .map(|ref_ts| { - let num_overlapping_segments: i64 = ref_ts.get_overlapping_length(span).into(); - let single_segment_rating = Rating::from_overlapping_spans(span.len(), ref_ts.len()); - single_segment_rating * num_overlapping_segments - }) - .sum(); - - rating_buffer_builder.add_segment(RatingSegment::new(rating, Rating::zero(), 1)); - } - - rating_buffer_builder.get_buffer() - } -} - - -#[cfg(test)] -mod tests { - use super::*; - use rand; - use rand::Rng; - use tests::get_random_prepared_test_time_spans; - - fn get_dummy_aligner() -> Aligner { - loop { - let reference_ts = get_random_prepared_test_time_spans(); - let incorrect_ts = get_random_prepared_test_time_spans(); - - // new will return None, if both lists are empty -> highly unlikely - if let Some(a) = Aligner::new(reference_ts, incorrect_ts, 0.03, None) { - return a; - } - } - } - - #[test] - /// Aligns random timespans to each other and calls aligner. General test whether any internal - /// assertions are invalidated. - fn run_aligner() { - for _ in 0..20 { - get_dummy_aligner().align_all_spans(); - } - } - - #[test] - /// `get_compressed_overlapping_ratings()` is highly optimized -> compare the results of slow and fast - /// implemntations. - fn get_compressed_overlapping_ratings() { - let mut rng = rand::thread_rng(); - - for _ in 0..30 { - let aligner = get_dummy_aligner(); - let len: i64 = (rng.next_u32() % 100) as i64; - let rating_buffer1 = aligner.get_compressed_overlapping_ratings( - aligner.get_start(), - aligner.get_end(), - TimeDelta::one() * len, - ); - let rating_buffer2 = aligner.get_compressed_overlapping_ratings_slow( - aligner.get_start(), - aligner.get_end(), - TimeDelta::one() * len, - ); - assert_eq!( - rating_buffer1.iter().collect::>(), - rating_buffer2.iter().collect::>() - ); - } - } - -} diff --git a/src/internal/delta_compression.rs b/src/internal/delta_compression.rs deleted file mode 100644 index 25167f8..0000000 --- a/src/internal/delta_compression.rs +++ /dev/null @@ -1,765 +0,0 @@ -// This file is part of the Rust library and binary `aligner`. -// -// Copyright (C) 2017 kaegi -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - - -use internal::{Rating, TimeDelta, TimePoint}; - -use std::fmt::Display; -use std::iter::Peekable; -use std::ops::{Mul, Sub}; -use std::ops::Add; -use std::slice::Iter; - -pub type TimepointBuffer = DeltaBuffer; -pub type RatingBuffer = DeltaBuffer; - -// pub type TimepointSegment = DeltaSegment; -pub type RatingSegment = DeltaSegment; - -// ////////////////////////////////////////////////////////////////////////////////////////////////// -// ZERO TRAIT - -pub trait Zero { - fn zero() -> Self; -} - -impl Zero for TimeDelta { - fn zero() -> TimeDelta { - TimeDelta::zero() - } -} - -impl Zero for Rating { - fn zero() -> Rating { - Rating::zero() - } -} - - -// ///////////////////////////////////////////////////////////////////////////////////////////////// -// DELTA SEGMENT -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -/// T is the actual value, D is the delta type -pub struct DeltaSegment { - start: T, - delta: D, - len: u64, -} - -impl DeltaSegment { - pub fn new(start: T, delta: D, len: u64) -> DeltaSegment { - DeltaSegment { - delta: delta, - start: start, - len: len, - } - } - - pub fn with_new_length(seg: DeltaSegment, len: u64) -> DeltaSegment { - Self::new(seg.start, seg.delta, len) - } - - - pub fn delta(&self) -> D - where - D: Copy, - { - self.delta - } - - pub fn len(&self) -> u64 { - self.len - } - - pub fn is_decreasing(&self) -> bool - where - D: Copy + Zero + PartialOrd, - { - self.delta() < D::zero() - } -} - -impl DeltaSegment -where - T: Add + Copy, - D: Mul + Copy, -{ - pub fn first_value(&self) -> T { - self.start - } - - pub fn last_value(&self) -> T { - self.value_at_index(self.len as i64 - 1) - } - - pub fn value_at_index(&self, i: i64) -> T { - self.start + self.delta * i - } - - /// Create a new delta segment containing all elements in `(from, to)` - /// where `from` is included and `to` excluded. - pub fn subseg(&self, from: u64, to: u64) -> DeltaSegment { - assert!(from < to); - assert!(to <= self.len()); - DeltaSegment { - start: self.value_at_index(from as i64), - delta: self.delta(), - len: to - from, - } - } - - pub fn is_greatequal(&self, other: DeltaSegment) -> bool - where - T: Ord, - { - self.first_value() >= other.first_value() && self.last_value() >= other.last_value() - } - - pub fn split_to_end(&self, from: u64) -> DeltaSegment { - assert!(from <= self.len()); - DeltaSegment::new( - self.value_at_index(from as i64), - self.delta(), - self.len() - from as u64, - ) - } - - pub fn split_from_begin_to(&self, to: u64) -> DeltaSegment { - assert!(to <= self.len()); - DeltaSegment::new(self.first_value(), self.delta(), to) - } -} - -impl Add for DeltaSegment -where - T: Add + Copy, - D: Mul + Copy, -{ - type Output = DeltaSegment; - fn add(self, rhs: D) -> DeltaSegment { - DeltaSegment::new(self.first_value() + rhs, self.delta(), self.len()) - } -} - -// ////////////////////////////////////////////////////////////////////////////////////////////////// -// DELTA COMPRESSION BUFFER -#[derive(Clone, Debug)] -pub struct DeltaBuffer { - data: Vec>, - length: u64, -} - -impl From> for DeltaBuffer -where - T: Add + Sub + Eq + Copy, - D: Mul + Copy + Eq, -{ - fn from(seg: DeltaSegment) -> DeltaBuffer { - DeltaBuffer::init_with_one_segment(seg.first_value(), seg.delta(), seg.len()) - } -} - -impl>> From for DeltaBuffer -where - T: Add + Sub + Eq + Copy, - D: Mul + Copy + Eq, -{ - fn from(i: I) -> DeltaBuffer { - let mut builder = DeltaBufferBuilder::new(); - for seg in i { - builder.add_segment(seg); - } - builder.get_buffer() - } -} - -impl DeltaBuffer -where - T: Add + Sub + Eq + Copy, - D: Mul + Copy + Eq, -{ - pub fn new() -> DeltaBuffer { - DeltaBuffer { - data: Vec::new(), - length: 0, - } - } - - pub fn init_with_one_segment(start: T, delta: D, len: u64) -> DeltaBuffer { - DeltaBuffer { - data: vec![DeltaSegment::new(start, delta, len)], - length: len, - } - } - - pub fn len(&self) -> u64 { - self.length - } - - - /// This function will return a new buffer from "new_start" to "new_end" - /// from the current data, which get - /// filled left and right with the first/last value of the entire buffer. - pub fn with_new_borders(&self, new_start: i64, new_length: i64) -> DeltaBuffer - where - D: Zero, - { - // XXX: do not use intermediate buffer? (measure performance impact first) - assert!(new_length >= 0); - assert!(!self.data.is_empty()); - let first_value = self.first_value().unwrap(); - let last_value = self.last_value().unwrap(); - - let mut buffer = if new_start < 0 { - self.extended_front(DeltaSegment::new(first_value, D::zero(), -new_start as u64)) - } else { - self.truncated_front(new_start as u64) - }; - - - buffer = buffer.fixed_length(DeltaSegment::new(last_value, D::zero(), new_length as u64)); - - buffer - } - - pub fn first_value(&self) -> Option { - self.data.first().map(|&first_segment| { - first_segment.first_value() - }) - } - - pub fn last_value(&self) -> Option { - self.data.last().map( - |&last_segment| last_segment.last_value(), - ) - } - - pub fn extended_front(&self, seg: DeltaSegment) -> DeltaBuffer { - let mut builder = DeltaBufferBuilder::new(); - builder.add_segment(seg); - builder.add_buffer(self); - builder.get_buffer() - } - - pub fn truncated_front(&self, num_entries: u64) -> DeltaBuffer { - let mut builder = DeltaBufferBuilder::new(); - builder.add_buffer_from(num_entries, self); - builder.get_buffer() - } - - /// The default value will be used to create a vector with segments from an - /// empty self.data. - /// For "new_length >= length" this will just create a copy. - pub fn truncated(&self, new_length: u64) -> DeltaBuffer { - let mut builder = DeltaBufferBuilder::new(); - builder.add_buffer_until(new_length, self); - builder.get_buffer() - } - - /// Returns the buffer with an additional segment. - pub fn extended_with(&self, seg: DeltaSegment) -> DeltaBuffer { - let mut builder = DeltaBufferBuilder::new(); - builder.add_buffer(self); - builder.add_segment(seg); - builder.get_buffer() - } - - /// Set the length exactly. Truncate segments or extend the missing part - /// with new segment - /// with given values. - pub fn fixed_length(&self, seg: DeltaSegment) -> DeltaBuffer { - if self.len() > seg.len() { - self.truncated(seg.len()) - } else if self.len() < seg.len() { - self.extended_with(seg.split_to_end(self.len())) - } else { - self.clone() - } - } - - #[cfg(test)] - pub fn iter(&self) -> DeltaBufferIter - where - T: Add + Copy, - D: Mul + Copy, - { - DeltaBufferIter { reader: DeltaBufferReader::new(self, TimePoint::from(0)) } - } - - pub fn iter_segments(&self) -> Iter> { - self.data.iter() - } - - /// Both Buffers have to have same total length. - pub fn combine_fast(&self, other: &DeltaBuffer, mut f: F) -> DeltaBuffer - where - F: FnMut(T, D, T, D) -> (T, D), - { - let mut builder = DeltaBufferBuilder::new(); - for (seg1, seg2) in CombinedSegmentIterator::new( - self.iter_segments().cloned(), - other.iter_segments().cloned(), - ) - { - let seglen = seg1.len(); - assert!(seg2.len() == seglen); - - let (new_t, new_d) = f( - seg1.first_value(), - seg1.delta(), - seg2.first_value(), - seg2.delta(), - ); - - // this will handle optimizations where we join two segments - builder.add_segment(DeltaSegment::new(new_t, new_d, seglen)); - } - - builder.get_buffer() - } - - pub fn combined_add(&self, other: &DeltaBuffer) -> DeltaBuffer - where - T: Add, - D: Add, - { - self.combine_fast(other, |t1: T, d1: D, t2: T, d2: D| (t1 + t2, d1 + d2)) - } - - #[allow(dead_code)] - pub fn write_to_file(&self, path: String) -> ::std::io::Result<()> - where - D: Display, - T: Display, - { - use std::fs::File; - use std::io::prelude::*; - let mut f = File::create(path)?; - for segments in &self.data { - f.write_all( - format!( - "{}, {}, {}\n", - segments.first_value(), - segments.delta(), - segments.len() - ) - .as_bytes(), - )?; - } - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use internal::Rating; - use rand; - use rand::Rng; - use std::cmp::min; - use std::convert::From; - - fn random_buffer() -> RatingBuffer { - let mut rng = rand::thread_rng(); - - let mut builder = DeltaBufferBuilder::new(); - let len = rng.next_u32() % 10; - let mut fulllen = 0; - for _ in 0..len { - let seglen = rng.next_u32() as u64 % 100 + 1; - builder.add_segment(DeltaSegment::new( - Rating::from(rng.next_u32() as i64 % 2000 - 1000), - Rating::from(rng.next_u32() as i64 % 2000 - 1000), - seglen, - )); - fulllen += seglen; - } - let buffer = builder.get_buffer(); - - assert_eq!(buffer.len(), fulllen); - assert_eq!(buffer.iter().count() as u64, fulllen); - - buffer - } - - #[test] - fn combine_add() { - for _ in 0..100 { - let buffer1 = random_buffer(); - let buffer2 = random_buffer(); - let buffer3 = buffer1.combined_add(&buffer2); - assert!(buffer3.len() == min(buffer1.len(), buffer2.len())); - for (v1, (v2, v3)) in buffer1.iter().zip(buffer2.iter().zip(buffer3.iter())) { - assert!(v1 + v2 == v3); - } - } - } - - fn test_truncation(buffer: &RatingBuffer, len: i64) { - if len < 0 { - return; - } - let len = len as u64; - let new_buffer = buffer.truncated(len); - let new_len = min(len, buffer.len()); - - assert_eq!(new_buffer.len(), new_len); - assert_eq!(new_buffer.iter().count() as u64, new_len); - - for (v1, v2) in buffer.iter().zip(new_buffer.iter()) { - assert_eq!(v1, v2); - } - } - - #[test] - fn truncate() { - for _ in 0..100 { - let buffer = random_buffer(); - test_truncation(&buffer, 0); - test_truncation(&buffer, buffer.len() as i64 / 2); - test_truncation(&buffer, buffer.len() as i64 - 1); - test_truncation(&buffer, buffer.len() as i64); - test_truncation(&buffer, buffer.len() as i64 + 1); - test_truncation(&buffer, buffer.len() as i64 * 2); - } - } -} - -// ///////////////////////////////////////////////////////////////////////////////////////////////// -// DELTA COMPRESSION BUILDER - -pub struct DeltaBufferBuilder { - inner: DeltaBuffer, -} - -impl DeltaBufferBuilder -where - T: Add + Sub + Eq + Copy, - D: Mul + Copy + Eq, -{ - pub fn new() -> DeltaBufferBuilder { - DeltaBufferBuilder { inner: DeltaBuffer::new() } - } - - pub fn add_segment(&mut self, seg: DeltaSegment) { - self.add_segment_intern(seg.start, seg.delta, seg.len); - } - - fn add_segment_intern(&mut self, t: T, d: D, len: u64) { - if len == 0 { - return; - } - - self.inner.length += len; - - if let Some(last_segment) = self.inner.data.last_mut() { - let anticipated_timepoint = last_segment.start + last_segment.delta * last_segment.len as i64; - if (last_segment.delta == d || len == 1) && t == anticipated_timepoint { - last_segment.len += len; - return; // extend only - } else if last_segment.len == 1 { - let new_delta = t - last_segment.start; - if len == 1 || new_delta == d { - last_segment.delta = new_delta; - last_segment.len += len; - return; // extend only - } - } - } - - self.inner.data.push(DeltaSegment { - delta: d, - start: t, - len: len, - }); - } - - pub fn add_buffer_from(&mut self, index: u64, buffer: &DeltaBuffer) { - let mut index = index as i64; - for &segment in &buffer.data { - if index <= 0 { - self.add_segment(segment) - } else if index > 0 && index < segment.len as i64 { - self.add_segment(segment.split_to_end(index as u64)); - } else { - // index >= segment.len - } - - // change start delta so it is relative to next segment - index -= segment.len as i64; - } - } - - - pub fn add_buffer(&mut self, buffer: &DeltaBuffer) { - for &segment in &buffer.data { - self.add_segment(segment) - } - } - - pub fn add_buffer_until(&mut self, index: u64, buffer: &DeltaBuffer) { - let mut index: i64 = index as i64; - for &segment in &buffer.data { - if index <= 0 { - return; - } else if index > 0 && index < segment.len as i64 { - self.add_segment(segment.split_from_begin_to(index as u64)); - } else { - // index >= segment.len - self.add_segment(segment); - } - - // change start delta so it is relative to next segment - index -= segment.len as i64; - } - } - - pub fn get_buffer(self) -> DeltaBuffer { - self.inner - } -} - -// ///////////////////////////////////////////////////////////////////////////////////////////////// -// DELTA COMPRESSION READER - -pub struct DeltaBufferReader<'a, T: 'a, D: 'a> { - iter: Peekable>>, - last_query: TimePoint, - query_rest: u64, -} - -impl<'a, T, D> DeltaBufferReader<'a, T, D> -where - T: Add + Copy, - D: Mul + Copy, -{ - pub fn new(buffer: &DeltaBuffer, first_timepoint: TimePoint) -> DeltaBufferReader { - let iter = buffer.data.iter(); - DeltaBufferReader { - iter: iter.peekable(), - last_query: first_timepoint, - query_rest: 0, - } - } - - pub fn read_by_timepoint(&mut self, t: TimePoint) -> T { - let delta = t - self.last_query; - self.read_by_delta(delta) - } - - pub fn read_by_delta(&mut self, d: TimeDelta) -> T { - assert!(d >= TimeDelta::zero()); - self.read_by_delta_safe(d).unwrap_or_else(|| { - panic!( - "DeltaBuffer::read_by_delta(): out of bounds access (delta is {})", - d - ) - }) - } - - fn read_by_delta_safe(&mut self, d: TimeDelta) -> Option { - self.last_query += d; - self.query_rest += u64::from(d); - loop { - { - let segment = match self.iter.peek() { - Some(segment) => segment, - None => return None, - }; - - if self.query_rest < segment.len { - return Some(segment.value_at_index(self.query_rest as i64)); - } - - self.query_rest -= segment.len; - } - self.iter.next(); - } - } - - #[cfg(test)] - pub fn read_current_safe(&mut self) -> Option { - let query_rest = self.query_rest; - self.iter.peek().map(|segment| { - segment.start + segment.delta * query_rest as i64 - }) - } -} - -// ///////////////////////////////////////////////////////////////////////////////////////////////// -// BUFFER ITERATOR - -#[cfg(test)] -pub struct DeltaBufferIter<'a, T: 'a, D: 'a> { - reader: DeltaBufferReader<'a, T, D>, -} - -#[cfg(test)] -impl<'a, T, D> Iterator for DeltaBufferIter<'a, T, D> -where - T: Add + Copy, - D: Mul + Copy, -{ - type Item = T; - - fn next(&mut self) -> Option { - let result = self.reader.read_current_safe(); - self.reader.read_by_delta_safe(TimeDelta::one()); - result - } -} - -// ///////////////////////////////////////////////////////////////////////////////////////////////// -// COMBINED SEGMENT ITERATOR - -pub trait Segment { - type Item; - fn len(self) -> u64; - fn split_from(self, start_index: u64, len: u64) -> Self::Item; -} - -impl Segment for DeltaSegment -where - T: Add + Copy, - D: Mul + Copy, -{ - type Item = DeltaSegment; - - #[inline] - fn len(self) -> u64 { - DeltaSegment::len(&self) - } - - #[inline] - fn split_from(self, start_index: u64, len: u64) -> DeltaSegment { - DeltaSegment::new(self.value_at_index(start_index as i64), self.delta(), len) - } -} - -/// Iterator that steps through two buffers simultanously. Each step goes until -/// the next beginning/end -/// of a segment in either buffer. -pub struct CombinedSegmentIterator -where - I1: Iterator, - I2: Iterator, - K1: Segment, - K2: Segment, -{ - pos1: u64, - pos2: u64, - segment_iter_1: Peekable, - segment_iter_2: Peekable, -} - -impl CombinedSegmentIterator -where - I1: Iterator, - I2: Iterator, - K1: Segment + Copy, - K2: Segment + Copy, -{ - pub fn new(i1: I1, i2: I2) -> CombinedSegmentIterator { - CombinedSegmentIterator { - pos1: 0, - pos2: 0, - segment_iter_1: i1.peekable(), - segment_iter_2: i2.peekable(), - } - } -} - -impl Iterator for CombinedSegmentIterator -where - I1: Iterator, - I2: Iterator, - K1: Segment + Copy, - K2: Segment + Copy, -{ - type Item = (K1::Item, K2::Item); - - #[inline] - fn next(&mut self) -> Option<(K1::Item, K2::Item)> { - let (segment1, segment2) = { - let segment1_opt = self.segment_iter_1.peek(); - let segment2_opt = self.segment_iter_2.peek(); - - match (segment1_opt, segment2_opt) { - (Some(a), Some(b)) => (*a, *b), - _ => return None, - } - }; - - let rest1 = ::len(segment1) - self.pos1; - let rest2 = ::len(segment2) - self.pos2; - let orig_pos1 = self.pos1; - let orig_pos2 = self.pos2; - - let step = if rest1 < rest2 { - self.segment_iter_1.next(); - self.pos1 = 0; - self.pos2 += rest1; - rest1 - } else if rest2 < rest1 { - self.segment_iter_2.next(); - self.pos1 += rest2; - self.pos2 = 0; - rest2 - } else { - // rest2 == rest1 - self.segment_iter_1.next(); - self.segment_iter_2.next(); - self.pos1 = 0; - self.pos2 = 0; - rest1 - }; - - let t1 = ::split_from(segment1, orig_pos1, step); - let t2 = ::split_from(segment2, orig_pos2, step); - - Some((t1, t2)) - } -} - -#[derive(Clone, Copy)] -pub enum OptionSegment { - NoneSeg(u64), - SomeSeg(K), -} - -impl Segment for OptionSegment -where - K: Segment, -{ - type Item = Option; - - #[inline] - fn len(self) -> u64 { - match self { - OptionSegment::NoneSeg(len) => len, - OptionSegment::SomeSeg(seg) => seg.len(), - } - } - - #[inline] - fn split_from(self, start_index: u64, len: u64) -> Option { - match self { - OptionSegment::NoneSeg(_) => None, - OptionSegment::SomeSeg(seg) => Some(seg.split_from(start_index, len)), - } - } -} diff --git a/src/internal/mod.rs b/src/internal/mod.rs deleted file mode 100644 index 554f12a..0000000 --- a/src/internal/mod.rs +++ /dev/null @@ -1,31 +0,0 @@ -// This file is part of the Rust library and binary `aligner`. -// -// Copyright (C) 2017 kaegi -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - - -mod delta_compression; -mod time_types; -mod rating_type; -mod aligner; -mod timespan_ops; -mod utils; - -pub use self::aligner::*; -pub use self::delta_compression::*; -pub use self::rating_type::*; -pub use self::time_types::*; -pub use self::timespan_ops::*; -pub use self::utils::*; diff --git a/src/internal/rating_type.rs b/src/internal/rating_type.rs deleted file mode 100644 index 637a9c2..0000000 --- a/src/internal/rating_type.rs +++ /dev/null @@ -1,132 +0,0 @@ -// This file is part of the Rust library and binary `aligner`. -// -// Copyright (C) 2017 kaegi -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - - -use internal::TimeDelta; -use std; -use std::cmp::max; -use std::ops::*; - -// these objects determine the precision/length of the rating (i32/i64) - lower -// values take less space and time, higher values have higher precision -type RatingIntern = i64; -const RATING_PRECISION: RatingIntern = (1 << 32); - -/// Use an integer for internal rating, because we add MANY small values which -/// lead to precision issues for floats -#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq)] -pub struct Rating(RatingIntern); -impl std::fmt::Display for Rating { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{}", self.0 as f64 / RATING_PRECISION as f64) - } -} - - -impl Rating { - pub fn zero() -> Rating { - Rating(0) - } - pub fn from_overlapping_spans(a: TimeDelta, b: TimeDelta) -> Rating { - let max: RatingIntern = max(a, b).into(); - // if min == 0 || max == 0 { return Self::zero() } - - // the score is "score := min/max" so it ges a score of 1 if both spans are equally long. - // The score is added in all overlapping segments, so if we don't devide by a length value, we - // end up with "score * num_overlapping_segments" which is dependent on the choosen resolution - // and will priorize longer time spans. - // - // By deviding this value by "min", which is maximum number of overlapping segments, we can - // "normalize" the score and get the final rating. But now we have "score/min == (min/max)/min == 1/max". - // - // The total score is now "overlapping_percentage_in_min / 100 * score". - let x = RATING_PRECISION / max; - - Rating(x as RatingIntern) - } - - pub fn nosplit_bonus(unnormalized: f64) -> Rating { - Rating((RATING_PRECISION as f64 * unnormalized) as RatingIntern) - } -} - -// There is no absolute `Rating` so, there should be no way to construct one. -// This is pretty nifty for testing though. -#[cfg(test)] -impl From for Rating { - fn from(f: i64) -> Rating { - Rating(f as RatingIntern) - } -} - - -impl Add for Rating { - type Output = Rating; - fn add(self, c: Rating) -> Rating { - Rating(self.0 + c.0) - } -} -impl Sub for Rating { - type Output = Rating; - fn sub(self, c: Rating) -> Rating { - Rating(self.0 - c.0) - } -} -impl AddAssign for Rating { - fn add_assign(&mut self, c: Rating) { - self.0 += c.0; - } -} -impl SubAssign for Rating { - fn sub_assign(&mut self, c: Rating) { - self.0 -= c.0; - } -} -impl std::iter::Sum for Rating { - fn sum(iter: I) -> Rating - where - I: Iterator, - { - Rating(iter.map(|c| c.0).sum()) - } -} -impl Mul for Rating { - type Output = Rating; - fn mul(self, rhs: u64) -> Rating { - Rating(self.0 * rhs as RatingIntern) - } -} -impl Mul for Rating { - type Output = Rating; - fn mul(self, rhs: i64) -> Rating { - Rating(self.0 * rhs as RatingIntern) - } -} -impl Div for Rating { - type Output = i64; - fn div(self, rhs: Rating) -> i64 { - (self.0 / rhs.0) as i64 - } -} - -impl Neg for Rating { - type Output = Rating; - - fn neg(self) -> Rating { - Rating(-self.0) - } -} diff --git a/src/internal/utils.rs b/src/internal/utils.rs deleted file mode 100644 index c9cfc31..0000000 --- a/src/internal/utils.rs +++ /dev/null @@ -1,381 +0,0 @@ -// This file is part of the Rust library and binary `aligner`. -// -// Copyright (C) 2017 kaegi -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - - -use arrayvec::ArrayVec; -use internal::{DeltaSegment, Rating, RatingSegment, TimeDelta, TimePoint, TimeSpan}; -use std::iter::FromIterator; - -/// Returns the timepoints at which the rating delta changes if we move one -/// subtitle compared to -/// an other. -/// -/// If we fix one timespan and let an other timespan variable, we get such a -/// curve for the rating: -/// -/// ```text -/// -/// / --------- \ -/// / \ -/// ------- -------------------------- -/// ``` -/// -/// At first the rating be zero, then rise linearly, then it will be constant -/// for a time and then fall to zero again. This function computes these 4 -/// special timepoints. -pub fn get_overlapping_rating_changepoints(length: TimeDelta, constspan: TimeSpan) -> [(Rating /* Delta */, TimePoint); 4] { - - let start_of_rise = constspan.start() - length; - let end_of_rise = constspan.end() - length; - let start_of_fall = constspan.start(); - let end_of_fall = constspan.end(); - - let timepoints: [TimePoint; 4] = if end_of_rise <= start_of_fall { - [start_of_rise, end_of_rise, start_of_fall, end_of_fall] - } else { - [start_of_rise, start_of_fall, end_of_rise, end_of_fall] - }; - - assert!(timepoints[0] <= timepoints[1]); - assert!(timepoints[1] <= timepoints[2]); - assert!(timepoints[2] <= timepoints[3]); - - let rise_delta = Rating::from_overlapping_spans(length, constspan.len()); - - [ - (rise_delta, timepoints[0]), - (-rise_delta, timepoints[1]), - (-rise_delta, timepoints[2]), - (rise_delta, timepoints[3]), - ] -} - -/// Creates a new vector with `prev` as first element and the sorted elements -/// of `a`. -pub fn sort_arrayvec3(prev: u64, a: ArrayVec<[u64; 3]>) -> ArrayVec<[u64; 4]> { - #[allow(collapsible_if)] - match a.len() { - 0 => ArrayVec::from_iter([prev].into_iter().cloned()), - 1 => ArrayVec::from_iter([prev].into_iter().cloned().chain(a.into_iter())), - 2 => { - if a[0] <= a[1] { - ArrayVec::from_iter([prev].into_iter().cloned().chain(a.into_iter())) - } else { - ArrayVec::from_iter([prev].into_iter().cloned().chain(a.into_iter().rev())) - } - } - 3 => { - if a[0] <= a[1] { - if a[1] <= a[2] { - ArrayVec::from([prev, a[0], a[1], a[2]]) - } else if a[0] <= a[2] { - ArrayVec::from([prev, a[0], a[2], a[1]]) - } else { - ArrayVec::from([prev, a[2], a[0], a[1]]) - } - } else { - // here: a[1] <= a[0] - - if a[0] <= a[2] { - ArrayVec::from([prev, a[1], a[0], a[2]]) - } else if a[1] <= a[2] { - ArrayVec::from([prev, a[1], a[2], a[0]]) - } else { - ArrayVec::from([prev, a[2], a[1], a[0]]) - } - } - } - _ => panic!("ArrayVec<[T; 3]>::len() is greater than 3"), - } -} - -/// Removes any duplicate elements from the vector. -/// -/// Requires a sorted non-empty array vector. -fn dedup_arrayvec4(a: ArrayVec<[u64; 4]>) -> ArrayVec<[u64; 4]> { - let mut last = a[0]; - let mut result = ArrayVec::new(); - result.push(last); - for elem in a { - if last != elem { - result.push(elem); - last = elem; - } - } - result -} - -pub fn subseg_by_max_start(segs: [DeltaSegment; 2], ids: [ID; 2], start: u64, end: u64) -> (RatingSegment, ID) -where - ID: Copy + Eq, -{ - if (segs[0].value_at_index(start as i64), segs[0].delta()) >= (segs[1].value_at_index(start as i64), segs[1].delta()) { - (segs[0].subseg(start, end), ids[0]) - } else { - (segs[1].subseg(start, end), ids[1]) - } -} - - -/// Given two segments, it computes the subsegments which have the best rating -/// in their span. -/// -/// The parameter `ids` is there to identify which segment the subsegment came -/// from. The length -/// of all given spans have to be same, the sum of all returned segments is the -/// orginal length. -pub fn get_best_rating_segments_of_2(segs: [DeltaSegment; 2], ids: [ID; 2]) -> ArrayVec<[(DeltaSegment, ID); 3]> -where - ID: Copy + Eq, -{ - let len = segs[0].len(); - assert!(len == segs[1].len()); - - match get_switch_point(segs[0], segs[1]) { - None => ArrayVec::from_iter([subseg_by_max_start(segs, ids, 0, len)].iter().cloned()), - Some(point) => { - ArrayVec::from_iter( - [ - subseg_by_max_start(segs, ids, 0, point), - subseg_by_max_start(segs, ids, point, len), - ] - .iter() - .cloned(), - ) - } - } -} - - -/// Given three segments, it computes the subsegments which have the best -/// rating in their span. -/// -/// The parameter `ids` is there to identify which segment the subsegment came -/// from. The length -/// of all given spans have to be same, the sum of all returned segments is the -/// orginal length. -pub fn get_best_rating_segments_of_3(segs: [DeltaSegment; 3], ids: [ID; 3]) -> ArrayVec<[(DeltaSegment, ID); 3]> -where - ID: Copy + Eq, -{ - let seg1 = segs[0]; - let seg2 = segs[1]; - let seg3 = segs[2]; - - let id1 = ids[0]; - let id2 = ids[1]; - let id3 = ids[2]; - - - let len = seg1.len(); - assert!(len == seg2.len()); - assert!(len == seg3.len()); - - // get best ratings segments of first 2 segments - let mut switch_points: ArrayVec<[u64; 3]> = ArrayVec::new(); - if let Some(split_point) = get_switch_point(seg1, seg2) { - switch_points.push(split_point); - } - if let Some(split_point) = get_switch_point(seg1, seg3) { - switch_points.push(split_point); - } - if let Some(split_point) = get_switch_point(seg2, seg3) { - switch_points.push(split_point); - } - - let switch_points = sort_arrayvec3(0, switch_points); - let switch_points = dedup_arrayvec4(switch_points); - - let mut segments: ArrayVec<[(RatingSegment, ID); 3]> = ArrayVec::new(); - - let next_points = switch_points.iter().cloned().skip(1).chain( - Some(len).into_iter(), - ); - for (switch_point, next_point) in switch_points.iter().cloned().zip(next_points) { - // get the best segment for the current index - // -> first compare their rating, in case they are the same, compare the deltas - let value1 = (seg1.value_at_index(switch_point as i64), seg1.delta()); - let value2 = (seg2.value_at_index(switch_point as i64), seg2.delta()); - let value3 = (seg3.value_at_index(switch_point as i64), seg3.delta()); - - #[allow(collapsible_if)] - let (current_best_segment, current_id) = if value1 < value2 { - if value2 < value3 { - (seg3, id3) - } else { - (seg2, id2) - } - } else { - if value1 < value3 { - (seg3, id3) - } else { - (seg1, id1) - } - }; - - if let Some(last_ref) = segments.last_mut() { - if last_ref.1 == current_id { - last_ref.0 = RatingSegment::with_new_length((*last_ref).0, last_ref.0.len() + next_point - switch_point); - continue; - } - } - - segments.push(( - current_best_segment.subseg(switch_point, next_point), - current_id, - )); - } - - assert!(segments.len() < 4); - - segments -} - -/// Requires 'seg1.len() == seg2.len()'. Returns the split point index if it is -/// within "(0, len)" (borders excluded), otherwise `None`. -/// The switch point is the first index where one segment overtakes the other. -#[allow(if_same_then_else)] -fn get_switch_point(seg1: DeltaSegment, seg2: DeltaSegment) -> Option { - let len = seg1.len(); - assert!(len == seg2.len()); - let (f1, f2, l1, l2) = ( - seg1.first_value(), - seg2.first_value(), - seg1.last_value(), - seg2.last_value(), - ); - - if f1 <= f2 && l1 <= l2 { - // segment1 is always smaller than segment2 - None - } else if f2 <= f1 && l2 <= l1 { - // segment2 is always smaller than segment1 - None - } else { - let (d1, d2) = (seg1.delta(), seg2.delta()); - - // solve "t1 + d1 * x = t2 + d2 * x" for x - // => "x = (t1 - t2) / (d2 - d1)" - // - // because this is a interger division (giving us x_int): x_int <= x < x_int + 1 - // - // switch point is then "(x_int + 1)" which is the first index where the second - // segment is better than the - // original - let switch_point: u64 = ((f1 - f2) / (d2 - d1) + 1) as u64; - assert!(0 < switch_point); - assert!(switch_point <= len); - - Some(switch_point) - } - -} - -#[cfg(test)] -mod tests { - use arrayvec::ArrayVec; - use internal::*; - use rand; - use rand::Rng; - use std::ops::Range; - - fn get_random_rating_segment(s: Range, d: Range, len: Range) -> RatingSegment { - let mut rng = rand::thread_rng(); - let vs = rng.gen_range(s.start, s.end + 1); - let vd = rng.gen_range(d.start, d.end + 1); - let vlen = rng.gen_range(len.start, len.end + 1); - DeltaSegment::new(Rating::from(vs), Rating::from(vd), vlen) - } - - // Test `get_best_rating_segments_of_3` by validating it with `validate_best_segments`. - #[test] - fn test_get_best_segments3() { - // genrate test data - let gen_segment = || get_random_rating_segment(-100..100, -100..100, 100..100); - let data_vec: Vec<_> = (0..2000) - .map(|_| [gen_segment(), gen_segment(), gen_segment()]) - .collect(); - - for test_segs in data_vec { - let ids = [0, 1, 2]; - let best_segments: ArrayVec<[(RatingSegment, i32); 3]> = get_best_rating_segments_of_3(test_segs, ids); - validate_best_segments( - test_segs.into_iter() - .map(|&seg| RatingBuffer::from(seg)) - .collect(), - best_segments, - ); - } - } - - // Test `get_best_rating_segments_of_2` by validating it with `validate_best_segments`. - #[test] - fn test_get_best_segments2() { - - // genrate test data - let gen_segment = || get_random_rating_segment(-100..100, -100..100, 100..100); - let data_vec: Vec<_> = (0..2000).map(|_| [gen_segment(), gen_segment()]).collect(); - - for test_segs in data_vec { - let ids = [0, 1]; - let best_segments: ArrayVec<[(RatingSegment, i32); 3]> = get_best_rating_segments_of_2(test_segs, ids); - println!("Test segments: {:?}", test_segs); - println!("Best segments: {:?}", best_segments); - println!(); - validate_best_segments( - test_segs.into_iter() - .map(|&seg| RatingBuffer::from(seg)) - .collect(), - best_segments, - ); - } - } - - /// Checks whether the current segment from `best_segs` always holds the maximum value at that position from all buffers in `segs`. - fn validate_best_segments(segs: Vec, best_segs: ArrayVec<[(RatingSegment, i32); 3]>) { - assert!(segs.len() > 0); - let len = segs[0].len(); - for seg in &segs { - assert!(seg.len() == len); - } - assert_eq!( - len, - best_segs.iter() - .map(|&(ref rating_buffer, _)| rating_buffer.len()) - .sum() - ); - - // a vector of iterators (each representing a rating buffer) - let mut iters: Vec<_> = segs.iter().map(|seg_ref| seg_ref.iter()).collect(); - - for (best_seg, id) in best_segs { - // go through all values in this supposedly "best" segment - for best_value_by_best_segment in RatingBuffer::from(best_seg).iter() { - // compute the maxium value by comparing all raings at the current position - let separate_ratings: Vec = iters.iter_mut().map(|iter| iter.next().unwrap()).collect(); - let real_max: Rating = separate_ratings.iter().cloned().max().unwrap(); - - // assert that the maximum rating really is the maximum rating - assert_eq!(real_max, best_value_by_best_segment); - - // require that the - assert_eq!(real_max, separate_ratings[id as usize]); - } - } - } - -} diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index f7f57bc..0000000 --- a/src/main.rs +++ /dev/null @@ -1,460 +0,0 @@ -// This file is part of the Rust library and binary `aligner`. -// -// Copyright (C) 2017 kaegi -// -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, either version 3 of the License, or -// (at your option) any later version. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// You should have received a copy of the GNU General Public License -// along with this program. If not, see . - - -#![allow(unknown_lints)] // for clippy - -#[macro_use] -extern crate error_chain; -extern crate aligner; -extern crate clap; -extern crate pbr; -extern crate subparse; -extern crate encoding; - -const PKG_VERSION: Option<&'static str> = option_env!("CARGO_PKG_VERSION"); -const PKG_NAME: Option<&'static str> = option_env!("CARGO_PKG_NAME"); -const PKG_DESCRIPTION: Option<&'static str> = option_env!("CARGO_PKG_DESCRIPTION"); - -// Alg* stands for algorithm (the internal aligner algorithm types) - -use aligner::{ProgressHandler, TimeDelta as AlgTimeDelta, TimePoint as AlgTimePoint, TimeSpan as AlgTimeSpan, align}; -use clap::{App, Arg}; -use pbr::ProgressBar; -use std::cmp::{max, min}; -use std::fs::File; -use std::io::{Read, Write}; -use std::str::FromStr; - -mod binary; - -pub use binary::errors; -pub use binary::errors::*; -pub use binary::errors::ErrorKind::*; - -// subparse -use subparse::{SubtitleEntry, SubtitleFormat, get_subtitle_format_err, parse_bytes}; -use subparse::timetypes::*; - -#[derive(Default)] -struct ProgressInfo { - progress_bar: Option>, -} - -impl ProgressHandler for ProgressInfo { - fn init(&mut self, steps: i64) { - self.progress_bar = Some(ProgressBar::new(steps as u64)); - } - fn inc(&mut self) { - self.progress_bar.as_mut().unwrap().inc(); - } - fn finish(&mut self) { - self.progress_bar.as_mut().unwrap().finish_println("\n"); - } -} - -fn read_file_to_bytes(path: &str) -> Result> { - let mut file = File::open(path).map_err(|e| Error::from(Io(e))).chain_err( - || { - FileOperation(path.to_string()) - }, - )?; - let mut v = Vec::new(); - file.read_to_end(&mut v) - .map_err(|e| Error::from(Io(e))) - .chain_err(|| FileOperation(path.to_string()))?; - Ok(v) -} - -fn write_data_to_file(path: &str, d: Vec) -> Result<()> { - let mut file = File::create(path) - .map_err(|e| Error::from(Io(e))) - .chain_err(|| FileOperation(path.to_string()))?; - file.write_all(&d) - .map_err(|e| Error::from(Io(e))) - .chain_err(|| FileOperation(path.to_string()))?; - Ok(()) -} - -fn timing_to_alg_timepoint(t: TimePoint, interval: i64) -> AlgTimePoint { - assert!(interval > 0); - AlgTimePoint::from(t.msecs() / interval) -} - -fn alg_delta_to_delta(t: AlgTimeDelta, interval: i64) -> TimeDelta { - assert!(interval > 0); - let time_int: i64 = t.into(); - TimeDelta::from_msecs(time_int * interval) -} - -fn timings_to_alg_timespans(v: &[TimeSpan], interval: i64) -> Vec { - v.iter() - .cloned() - .map(|timespan| { - AlgTimeSpan::new_safe( - timing_to_alg_timepoint(timespan.start, interval), - timing_to_alg_timepoint(timespan.end, interval), - ) - }) - .collect() -} -fn alg_deltas_to_timing_deltas(v: &[AlgTimeDelta], interval: i64) -> Vec { - v.iter() - .cloned() - .map(|x| alg_delta_to_delta(x, interval)) - .collect() -} - -/// Groups consecutive timespans with the same delta together. -fn get_subtitle_delta_groups(mut v: Vec<(AlgTimeDelta, TimeSpan)>) -> Vec<(AlgTimeDelta, Vec)> { - v.sort_by_key(|t| min((t.1).start, (t.1).end)); - - let mut result: Vec<(AlgTimeDelta, Vec)> = Vec::new(); - - for (delta, original_timespan) in v { - let mut new_block = false; - - if let Some(last_tuple_ref) = result.last_mut() { - if delta == last_tuple_ref.0 { - last_tuple_ref.1.push(original_timespan); - } else { - new_block = true; - } - } else { - new_block = true; - } - - if new_block { - result.push((delta, vec![original_timespan])); - } - } - - result -} - - -/// Will return an array where the start time of an subtitle is always less than the end time (will switch incorrect ones). -fn corrected_timings(v: Vec) -> Vec { - v.into_iter() - .map(|timespan| { - TimeSpan::new( - min(timespan.start, timespan.end), - max(timespan.start, timespan.end), - ) - }) - .collect() -} - -/// Every delta that where `start + delta`, is negative will be adjusted so that `start + delta` is zero. This avoids -/// invalid files for formats that don't support negative timestamps. -fn get_truncated_deltas(timespans: &[TimeSpan], deltas: Vec) -> Vec { - deltas.into_iter() - .zip(timespans.iter().cloned()) - .map(|(delta, timespan)| if (delta + timespan.start) - .is_negative() - { - TimePoint::from_msecs(0) - timespan.start - } else { - delta - }) - .collect() -} - -/// Prints warning. -fn pwarning<'a, T: Into>>(s: T) { - println!("WW: {}", s.into()); -} - -/// Prints info. -fn pinfo<'a, T: Into>>(s: T) { - println!("II: {}", s.into()); -} - -/// Prints error. -fn perror<'a, T: Into>>(s: T) { - println!("EE: {}", s.into()); -} - -/// Does reading, parsing and nice error handling for a f64 clap parameter. -fn unpack_clap_number_f64(matches: &clap::ArgMatches, parameter_name: &'static str) -> Result { - let paramter_value_str: &str = matches.value_of(parameter_name).unwrap(); - FromStr::from_str(paramter_value_str).chain_err(|| { - ArgumentParseError(parameter_name, paramter_value_str.to_string()) - }) -} - -/// Does reading, parsing and nice error handling for a f64 clap parameter. -fn unpack_clap_number_i64(matches: &clap::ArgMatches, parameter_name: &'static str) -> Result { - let paramter_value_str: &str = matches.value_of(parameter_name).unwrap(); - FromStr::from_str(paramter_value_str).chain_err(|| { - ArgumentParseError(parameter_name, paramter_value_str.to_string()) - }) -} - -fn run() -> Result<()> { - let matches = App::new(PKG_NAME.unwrap_or("unkown (not compiled with cargo)")) - .version(PKG_VERSION.unwrap_or("unknown (not compiled with cargo)")) - .about(PKG_DESCRIPTION.unwrap_or("unknown (not compiled with cargo)")) - .arg(Arg::with_name("reference-sub-file") - .help("Path to the reference subtitle file") - .required(true)) - .arg(Arg::with_name("incorrect-sub-file") - .help("Path to the incorrect subtitle file") - .required(true)) - .arg(Arg::with_name("output-file-path") - .help("Path to corrected subtitle file") - .required(true)) - .arg(Arg::with_name("split-penalty") - .short("p") - .long("split-penalty") - .value_name("floating point number from 0 to 100") - .help("Determines how eager the algorithm is to avoid splitting of the subtitles. 100 means that all lines will be shifted by the same offset, while 0 will produce MANY segments with different offsets. Values from 0.1 to 20 are the most useful.") - .default_value("4")) - .arg(Arg::with_name("interval") - .short("i") - .long("interval") - .value_name("integer in milliseconds") - .help("The smallest recognized time interval, smaller numbers make the alignment more accurate, greater numbers make aligning faster.") - .default_value("1")) - .arg(Arg::with_name("allow-negative-timestamps") - .short("n") - .long("allow-negative-timestamps") - .help("Negative timestamps can lead to problems with the output file, so by default 0 will be written instead. This option allows you to disable this behavior.")) - .arg(Arg::with_name("sub-fps-ref") - .long("sub-fps-ref") - .value_name("floating-point number in frames-per-second") - .default_value("30") - .help("Specifies the frames-per-second for the accompanying video of MicroDVD `.sub` files (MicroDVD `.sub` files store timing information as frame numbers). Only affects the reference subtitle file.")) - .arg(Arg::with_name("sub-fps-inc") - .long("sub-fps-inc") - .value_name("floating-point number in frames-per-second") - .default_value("30") - .help("Specifies the frames-per-second for the accompanying video of MicroDVD `.sub` files (MicroDVD `.sub` files store timing information as frame numbers). Only affects the incorrect subtitle file.")) - .arg(Arg::with_name("encoding-ref") - .long("encoding-ref") - .default_value("utf-8") - .help("Charset encoding of the reference subtitle file.")) - .arg(Arg::with_name("encoding-inc") - .long("encoding-inc") - .default_value("utf-8") - .help("Charset encoding of the incorrect subtitle file.")) - .after_help("This program works with .srt, .ass/.ssa, .idx and .sub files. The corrected file will have the same format as the incorrect file.") - .get_matches(); - - let incorrect_file_path = matches.value_of("incorrect-sub-file").unwrap(); - let reference_file_path = matches.value_of("reference-sub-file").unwrap(); - let output_file_path = matches.value_of("output-file-path").unwrap(); - - let interval: i64 = unpack_clap_number_i64(&matches, "interval")?; - if interval < 1 { - return Err(Error::from(ExpectedPositiveNumber(interval))).chain_err(|| Error::from(InvalidArgument("interval"))); - } - - let split_penalty: f64 = unpack_clap_number_f64(&matches, "split-penalty")?; - if split_penalty < 0.0 || split_penalty > 100.0 { - return Err(Error::from(ValueNotInRange(split_penalty, 0.0, 100.0))).chain_err(|| Error::from(InvalidArgument("split-penalty"))); - } - - let sub_fps_ref: f64 = unpack_clap_number_f64(&matches, "sub-fps-ref")?; - let sub_fps_inc: f64 = unpack_clap_number_f64(&matches, "sub-fps-inc")?; - - let allow_negative_timestamps = matches.is_present("allow-negative-timestamps"); - - let encoding_label_ref = matches.value_of("encoding-ref").unwrap(); - let encoding_label_inc = matches.value_of("encoding-inc").unwrap(); - - let encoding_ref = encoding::label::encoding_from_whatwg_label(encoding_label_ref) - .ok_or_else(|| { - Error::from(UnknownEncoding(encoding_label_ref.to_string())) - })?; - let encoding_inc = encoding::label::encoding_from_whatwg_label(encoding_label_inc) - .ok_or_else(|| { - Error::from(UnknownEncoding(encoding_label_inc.to_string())) - })?; - - let reference_sub_data = read_file_to_bytes(reference_file_path)?; - let incorrect_sub_data = read_file_to_bytes(incorrect_file_path)?; - - let reference_file_format = get_subtitle_format_err(reference_file_path, &reference_sub_data) - .chain_err(|| ErrorKind::FileOperation(reference_file_path.to_string()))?; - let incorrect_file_format = get_subtitle_format_err(incorrect_file_path, &incorrect_sub_data) - .chain_err(|| ErrorKind::FileOperation(incorrect_file_path.to_string()))?; - let output_file_format = get_subtitle_format_err(output_file_path, &incorrect_sub_data) - .chain_err(|| ErrorKind::FileOperation(output_file_path.to_string()))?; // HACK: to hint the right output format, the input data is provided - - // this program internally stores the files in a non-destructable way (so - // formatting is preserved) but has no abilty to convert between formats - if incorrect_file_format != output_file_format { - return Err( - DifferentOutputFormat( - incorrect_file_path.to_string(), - output_file_path.to_string(), - ) - .into(), - ); - } - - let timed_reference_file = parse_bytes( - reference_file_format, - &reference_sub_data, - encoding_ref, - sub_fps_inc, - ) - .chain_err(|| FileOperation(reference_file_path.to_string()))?; - let timed_incorrect_file = parse_bytes( - incorrect_file_format, - &incorrect_sub_data, - encoding_inc, - sub_fps_ref, - ) - .chain_err(|| FileOperation(incorrect_file_path.to_string()))?; - - let timings_reference = corrected_timings( - timed_reference_file.get_subtitle_entries()? - .into_iter() - .map(|subentry| subentry.timespan) - .collect(), - ); - let timings_incorrect = corrected_timings( - timed_incorrect_file.get_subtitle_entries()? - .into_iter() - .map(|subentry| subentry.timespan) - .collect(), - ); - - let alg_reference_timespans = timings_to_alg_timespans(&timings_reference, interval); - let alg_incorrect_timespans = timings_to_alg_timespans(&timings_incorrect, interval); - - let alg_deltas = align( - alg_incorrect_timespans.clone(), - alg_reference_timespans, - split_penalty / 100.0, - Some(Box::new(ProgressInfo::default())), - ); - let mut deltas = alg_deltas_to_timing_deltas(&alg_deltas, interval); - - // list of original subtitles lines which have the same timings - let shift_groups: Vec<(AlgTimeDelta, Vec)> = get_subtitle_delta_groups( - alg_deltas.iter() - .cloned() - .zip(timings_incorrect.iter().cloned()) - .collect(), - ); - - for (shift_group_delta, shift_group_lines) in shift_groups { - // computes the first and last timestamp for all lines with that delta - // -> that way we can provide the user with an information like - // "100 subtitles with 10min length" - let min_max_opt = shift_group_lines.iter().fold(None, |last_opt, subline| { - let new_min = subline.start; - let new_max = subline.end; - if let Some((last_min, last_max)) = last_opt { - Some((min(last_min, new_min), max(last_max, new_max))) - } else { - Some((new_min, new_max)) - } - }); - - let (min, max) = match min_max_opt { - Some(v) => v, - None => unreachable!(), - }; - - pinfo(format!( - "shifted block of {} subtitles with length {} by {}", - shift_group_lines.len(), - max - min, - alg_delta_to_delta(shift_group_delta, interval) - )); - } - - - if timings_reference.is_empty() { - println!(""); - pwarning("reference file has no subtitle lines"); - } - if timings_incorrect.is_empty() { - println!(""); - pwarning("file with incorrect subtitles has no lines"); - } - - let writing_negative_timespans = deltas.iter().zip(timings_incorrect.iter()).any(|(&delta, - ×pan)| { - (delta + timespan.start).is_negative() - }); - if writing_negative_timespans { - println!(""); - pwarning( - "some subtitles now have negative timings, which can cause invalid subtitle files", - ); - if allow_negative_timestamps { - pwarning( - "negative timestamps will be written to file, because you passed '-n' or '--allow-negative-timestamps'", - ); - } else { - pwarning( - "negative subtitles will therefore be set to zero by default; pass '-n' or '--allow-negative-timestamps' to disable this behavior", - ); - deltas = get_truncated_deltas(&timings_incorrect, deltas); - } - } - - // .idx only has start timepoints (the subtitle is shown until the next subtitle starts) - so retiming with gaps might - // produce errors - if output_file_format == SubtitleFormat::VobSubIdx { - println!(""); - pwarning( - "writing to an '.idx' file can lead to unexpected results due to restrictions of this format", - ); - } - - // incorrect file -> correct file - let shifted_timespans: Vec = timings_incorrect.iter() - .zip(deltas.iter()) - .map(|(×pan, &delta)| SubtitleEntry::from(timespan + delta)) - .collect(); - - // write corrected files - let mut correct_file = timed_incorrect_file.clone(); - correct_file.update_subtitle_entries(&shifted_timespans)?; - write_data_to_file(output_file_path, correct_file.to_data()?)?; - - Ok(()) -} - -fn main() { - match run() { - Ok(_) => std::process::exit(0), - Err(e) => { - perror(format!("error: {}", e)); - - for e in e.iter().skip(1) { - perror(format!("caused by: {}", e)); - } - - // The backtrace is not always generated. Try to this with `RUST_BACKTRACE=1`. - if let Some(backtrace) = e.backtrace() { - perror(format!("backtrace: {:?}", backtrace)); - } else { - perror( - "note: run program with `env RUST_BACKTRACE=1` for a backtrace", - ); - } - std::process::exit(1); - } - } -} diff --git a/test_data/opensubtitles_download_helper/download_subtitles_for_movie.py b/test_data/opensubtitles_download_helper/download_subtitles_for_movie.py new file mode 100755 index 0000000..7662044 --- /dev/null +++ b/test_data/opensubtitles_download_helper/download_subtitles_for_movie.py @@ -0,0 +1,177 @@ +#!/usr/bin/python3 + +from pythonopensubtitles.opensubtitles import OpenSubtitles +import sys +import json +import zlib +import base64 +import os + +import pprint +pp = pprint.PrettyPrinter(indent=4) + + +if len(sys.argv) < 2: + print('Expected video name as command line argument!') + sys.exit(1) + +film_name = sys.argv[1] + +def make_parents(filename): + if not os.path.exists(os.path.dirname(filename)): + try: + os.makedirs(os.path.dirname(filename)) + except OSError as exc: # Guard against race condition + if exc.errno != errno.EEXIST: + raise + + +def decompress(data, encoding): + """ + Convert a base64-compressed subtitles file back to a string. + + :param data: the compressed data + :param encoding: the encoding of the original file (e.g. utf-8, latin1) + """ + try: + return zlib.decompress(base64.b64decode(data), + 16 + zlib.MAX_WBITS).decode(encoding) + except UnicodeDecodeError as e: + print(e) + return + +def download_subtitles(ost, ids, encoding, override_filenames=None, + output_directory='.', override_directories=None, + extension='srt', + return_decoded_data=False): + override_filenames = override_filenames or {} + override_directories = override_directories or {} + successful = {} + + # OpenSubtitles will accept a maximum of 20 IDs for download + if len(ids) > 20: + print("Cannot download more than 20 files at once.", + file=sys.stderr) + ids = ids[:20] + + response = ost.xmlrpc.DownloadSubtitles(ost.token, ids) + status = response.get('status').split()[0] + encoded_data = response.get('data') if '200' == status else None + + if not encoded_data: + return None + + for item in encoded_data: + subfile_id = item['idsubtitlefile'] + + decoded_data = decompress(item['data'], encoding) + + if not decoded_data: + print("An error occurred while decoding subtitle " + "file ID {}.".format(subfile_id), file=sys.stderr) + elif return_decoded_data: + successful[subfile_id] = decoded_data + else: + fname = override_filenames.get(subfile_id, + subfile_id + '.' + extension) + directory = override_directories.get(subfile_id, + output_directory) + fpath = os.path.join(directory, fname) + make_parents(fpath) + + try: + with open(fpath, 'w', encoding='utf-8') as f: + f.write(decoded_data) + successful[subfile_id] = fpath + except IOError as e: + print("There was an error writing file {}.".format(fpath), + file=sys.stderr) + print(e) + + return successful or None + + +def query_yes_no(question, default="yes"): + """Ask a yes/no question via raw_input() and return their answer. + + "question" is a string that is presented to the user. + "default" is the presumed answer if the user just hits . + It must be "yes" (the default), "no" or None (meaning + an answer is required of the user). + + The "answer" return value is True for "yes" or False for "no". + """ + valid = {"yes": True, "y": True, "ye": True, + "no": False, "n": False} + exit_cmd = ['q', 'Q', 'Quit', 'quit', 'exit'] + if default is None: + prompt = " [y/n/q] " + elif default == "yes": + prompt = " [Y/n/q] " + elif default == "no": + prompt = " [y/N/q] " + else: + raise ValueError("invalid default answer: '%s'" % default) + + while True: + choice = input(question + prompt).lower() + if default is not None and choice == '': + return valid[default] + elif choice in exit_cmd: + sys.exit(0) + elif choice in valid: + return valid[choice] + else: + sys.stdout.write("Please respond with 'yes' or 'no' " + "(or 'y' or 'n').\n") + +film_name_normalized = film_name.lower().replace(" ", "-") + +sub_format_to_ending = { + 'srt': 'srt', + 'ssa': 'ass', + 'ass': 'ass', + } + +ost = OpenSubtitles() +token = ost.login('', '') +print('OpenSubtitles token: %s' % token); +print() + +data = ost.search_movies_on_imdb(film_name) +for film in data['data']: + if 'from_redis' in film and film['from_redis'] == 'false': + continue + print('%s [IMDB-ID: %s]' % (film['title'], film['id'])) + answer = query_yes_no('Download subtitles for this movie?') + print() + if answer is True: + imdb_id = film['id'] + subtitles = ost.search_subtitles([{'imdbid': imdb_id, 'sublanguageid':'eng'}]) + for subtitle_idx, subtitle in enumerate(subtitles): + #pp.pprint(subtitle) + # convert into JSON: + + # find subtitle ending (srt, ass, ...) + if subtitle['SubFormat'] not in sub_format_to_ending: + sub_info_json = json.dumps(subtitle, indent=4) + print(sub_info_json) + print('Unreckognized subtitle format \'%s\'! Skipping this subtitle!' % subtitle['SubFormat']) + continue + sub_ending = sub_format_to_ending[sub_format_to_ending[subtitle['SubFormat']]] + + sub_info_json = json.dumps(subtitle, indent=4) + print(sub_info_json) + + sub_filename = '{}-{:0>04}.{}'.format(film_name_normalized, subtitle_idx, sub_ending) + sub_id = subtitle['IDSubtitleFile'] + data = download_subtitles(ost, [sub_id], + subtitle['SubEncoding'], + override_filenames={sub_id: sub_filename}, + output_directory='../database/%s' % film_name_normalized + ) + print(data) + + sys.exit(0) + +