From f7ec5ad500ef82c65fe71c1ef296835695d760bf Mon Sep 17 00:00:00 2001 From: Andrei Horak Date: Wed, 10 Jul 2024 18:30:05 +0200 Subject: [PATCH 1/6] More improvements on WAV parser --- lib/parsers/wav_parser.rb | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/lib/parsers/wav_parser.rb b/lib/parsers/wav_parser.rb index 2c1e4cfa..7c238353 100644 --- a/lib/parsers/wav_parser.rb +++ b/lib/parsers/wav_parser.rb @@ -21,30 +21,27 @@ def call(io) # The specification does not require the Format chunk to be the first chunk # after the RIFF header. # https://www.mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html + fmt_processed = false + data_processed = false fmt_data = {} data_size = 0 - total_sample_frames = nil loop do chunk_type, chunk_size = safe_read(io, 8).unpack('a4l') case chunk_type when 'fmt ' # watch out: the chunk ID of the format chunk ends with a space fmt_data = unpack_fmt_chunk(io, chunk_size) + fmt_processed = true when 'data' data_size = chunk_size - when 'fact' - total_sample_frames = safe_read(io, 4).unpack('l').first - safe_skip(io, chunk_size - 4) + data_processed = true else # Skip this chunk until a known chunk is encountered safe_skip(io, chunk_size) end - rescue FormatParser::IOUtils::InvalidRead - # We've reached EOF, so it's time to make the most out of the metadata we - # managed to parse - break + break if fmt_processed && data_processed end - file_info(fmt_data, data_size, total_sample_frames) + file_info(fmt_data, data_size) end def unpack_fmt_chunk(io, chunk_size) @@ -70,9 +67,9 @@ def unpack_fmt_chunk(io, chunk_size) } end - def file_info(fmt_data, data_size, sample_frames) + def file_info(fmt_data, data_size) # NOTE: Each sample includes information for each channel - sample_frames ||= data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8) if fmt_data[:channels] > 0 && fmt_data[:bits_per_sample] > 0 + sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8) if fmt_data[:channels] > 0 && fmt_data[:bits_per_sample] > 0 duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f if fmt_data[:sample_rate] > 0 FormatParser::Audio.new( format: :wav, From a696652d735b03276f8ec71d68afeac3c7410c89 Mon Sep 17 00:00:00 2001 From: Andrei Horak Date: Thu, 11 Jul 2024 11:33:32 +0200 Subject: [PATCH 2/6] Calculate duration based on byte rate --- lib/parsers/wav_parser.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/parsers/wav_parser.rb b/lib/parsers/wav_parser.rb index 7c238353..87894c61 100644 --- a/lib/parsers/wav_parser.rb +++ b/lib/parsers/wav_parser.rb @@ -70,7 +70,7 @@ def unpack_fmt_chunk(io, chunk_size) def file_info(fmt_data, data_size) # NOTE: Each sample includes information for each channel sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8) if fmt_data[:channels] > 0 && fmt_data[:bits_per_sample] > 0 - duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f if fmt_data[:sample_rate] > 0 + duration_in_seconds = data_size / fmt_data[:byte_rate].to_f if fmt_data[:byte_rate] > 0 FormatParser::Audio.new( format: :wav, num_audio_channels: fmt_data[:channels], From 9db8b734094a4e7c3ece247a65fc5714a965575d Mon Sep 17 00:00:00 2001 From: Andrei Horak Date: Thu, 11 Jul 2024 11:38:34 +0200 Subject: [PATCH 3/6] Adjust test --- spec/parsers/wav_parser_spec.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/spec/parsers/wav_parser_spec.rb b/spec/parsers/wav_parser_spec.rb index 1dc7da71..336bf7fc 100644 --- a/spec/parsers/wav_parser_spec.rb +++ b/spec/parsers/wav_parser_spec.rb @@ -20,7 +20,8 @@ expect(parse_result.format).to eq(:wav) expect(parse_result.num_audio_channels).to eq(1) expect(parse_result.audio_sample_rate_hz).to eq(8000) - expect(parse_result.media_duration_frames).to eq(110488) + # Fixture does not define bits_per_sample in the fmt chunk + expect(parse_result.media_duration_frames).to be_nil expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81) end From ce1d58653277180151ac986489c47e3974c45c33 Mon Sep 17 00:00:00 2001 From: Andrei Horak Date: Thu, 11 Jul 2024 12:02:34 +0200 Subject: [PATCH 4/6] Use sample_rate for determining duration --- lib/parsers/wav_parser.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/parsers/wav_parser.rb b/lib/parsers/wav_parser.rb index 87894c61..42c53e93 100644 --- a/lib/parsers/wav_parser.rb +++ b/lib/parsers/wav_parser.rb @@ -70,7 +70,7 @@ def unpack_fmt_chunk(io, chunk_size) def file_info(fmt_data, data_size) # NOTE: Each sample includes information for each channel sample_frames = data_size / (fmt_data[:channels] * fmt_data[:bits_per_sample] / 8) if fmt_data[:channels] > 0 && fmt_data[:bits_per_sample] > 0 - duration_in_seconds = data_size / fmt_data[:byte_rate].to_f if fmt_data[:byte_rate] > 0 + duration_in_seconds = sample_frames / fmt_data[:sample_rate].to_f if sample_frames && fmt_data[:byte_rate] > 0 FormatParser::Audio.new( format: :wav, num_audio_channels: fmt_data[:channels], From cc4b07eca6bbb3d037021e734d5aa0b2b331b9e4 Mon Sep 17 00:00:00 2001 From: Andrei Horak Date: Thu, 11 Jul 2024 12:11:33 +0200 Subject: [PATCH 5/6] Adjust test --- spec/parsers/wav_parser_spec.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/parsers/wav_parser_spec.rb b/spec/parsers/wav_parser_spec.rb index 336bf7fc..56feee09 100644 --- a/spec/parsers/wav_parser_spec.rb +++ b/spec/parsers/wav_parser_spec.rb @@ -22,7 +22,7 @@ expect(parse_result.audio_sample_rate_hz).to eq(8000) # Fixture does not define bits_per_sample in the fmt chunk expect(parse_result.media_duration_frames).to be_nil - expect(parse_result.media_duration_seconds).to be_within(0.01).of(13.81) + expect(parse_result.media_duration_seconds).to be_nil end it 'returns correct info about pcm files with more channels' do From c379942de917f807ec07087eaec8ce04fdf3a95b Mon Sep 17 00:00:00 2001 From: Andrei Horak Date: Thu, 11 Jul 2024 13:40:34 +0200 Subject: [PATCH 6/6] Bump version --- CHANGELOG.md | 3 +++ lib/format_parser/version.rb | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc6d9ef2..3826e8ff 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,6 @@ +## 2.10.0 +* Improve WAV parser by focusing on performance rather than on attempting a best-effort when extracting metadata from files that do not strictly follow the format spec. + ## 2.9.0 * Improve WAV parser by performing a best-effort when extracting metadata from files that do not strictly follow the format spec. diff --git a/lib/format_parser/version.rb b/lib/format_parser/version.rb index f65176ac..68a98111 100644 --- a/lib/format_parser/version.rb +++ b/lib/format_parser/version.rb @@ -1,3 +1,3 @@ module FormatParser - VERSION = '2.9.0' + VERSION = '2.10.0' end