Skip to content

Commit

Permalink
REW CSV parsing with tab as separator. Added unknown manufacturer err…
Browse files Browse the repository at this point in the history
…ors to prompt UI. Fixed SquigCrawler's source_name detection and raw data file names.
  • Loading branch information
jaakkopasanen committed Nov 5, 2023
1 parent 2a60e0e commit 5ce8767
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ my_results/
**/data/ignore
measurements/crinacle/*data*/
measurements/crinacle/Raw measurements (Patreon)/
measurements/crinacle/phone_book*.json
measurements/*/phone_book*.json
results/crinacle/*/*/*.csv
measurements/Rtings/crawl_graph_data_urls.json
measurements/*/raw_data/
Expand Down
11 changes: 3 additions & 8 deletions autoeq/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
float_pattern = r'-?\d+(?:\.\d+)?'
autoeq_pattern = re.compile(rf'{header_pattern}(?:\n{float_pattern}(?:,{float_pattern})+)+')
rew_float_pattern = rf'(?:{float_pattern}|\?)'
rew_pattern = re.compile(rf'^(?:\*.*\n)*\* Freq\(Hz\), SPL\(dB\), Phase\(degrees\)\n(?:{rew_float_pattern}, {rew_float_pattern}, {rew_float_pattern})+\n*')
rew_space_pattern = re.compile(rf'^(?:\*.*\n)*\* Freq\(Hz\) SPL\(dB\) Phase\(degrees\)(?:\n{rew_float_pattern} {rew_float_pattern} {rew_float_pattern})+')
rew_pattern = re.compile(rf'^(?:\*.*\n)*\* Freq\(Hz\)(?:, ?| |\t)SPL\(dB\)(?:, ?| |\t)Phase\(degrees\)\n(?:{rew_float_pattern}(?:, ?| |\t){rew_float_pattern}(?:, ?| |\t){rew_float_pattern})+\n*')
#rew_space_pattern = re.compile(rf'^(?:\*.*\n)*\* Freq\(Hz\) SPL\(dB\) Phase\(degrees\)(?:\n{rew_float_pattern} {rew_float_pattern} {rew_float_pattern})+')
crinacle_pattern = re.compile(rf'[\s\n]?Frequency\tdB\tUnweighted(?:\n{float_pattern}\t{float_pattern})+[.\n]?')


Expand Down Expand Up @@ -86,12 +86,7 @@ def parse_csv(csv):

if rew_pattern.match(csv) or crinacle_pattern.match(csv):
# These two have all sort of junk in them but the first column is frequency and the second SPL, so all good
csv = '\n'.join([line for line in lines if numeric_start.match(line)])
lines = csv.split('\n')

if rew_space_pattern.match(csv):
csv = '\n'.join([line for line in lines if numeric_start.match(line) and '?' not in line])
csv = csv.replace(' ', '\t')
csv = '\n'.join([re.sub(r'(?:, ?| |\t)', '\t', line) for line in lines if numeric_start.match(line) and '?' not in line])
lines = csv.split('\n')

column_separator, decimal_separator = find_csv_separators(csv)
Expand Down
4 changes: 4 additions & 0 deletions dbtools/crawler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-

import sys
import urllib
from pathlib import Path
import pandas as pd
from rapidfuzz import fuzz
Expand Down Expand Up @@ -240,6 +241,9 @@ def create_prompts(self, max_prompts=100):
for item in crawled_items:
if not self.is_prompt_needed(item):
continue
name = item.source_name or urllib.parse.unquote(item.url.split('/')[-1])
if self.manufacturers.find(name)[0] is None:
print(f'Cannot detect manufacturer for: {name}')
self.prompts.append(PromptListItem(NamePrompt(item, self.prompt_callback), self.switch_prompt))
if len(self.prompts) >= max_prompts:
break
Expand Down
27 changes: 26 additions & 1 deletion dbtools/db.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 55,
"id": "8d357b06-b94d-4719-8aa9-ad4c17edfce4",
"metadata": {},
"outputs": [],
Expand All @@ -311,6 +311,31 @@
"display(crawler.widget)"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "acadfe6d-2b93-4b25-b8e2-c9058f416c33",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "5ec0a3548e164d5fa3fe0d19ff914a15",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/37 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"crawler.process(new_only=True)"
]
},
{
"cell_type": "code",
"execution_count": 14,
Expand Down
5 changes: 4 additions & 1 deletion dbtools/manufacturers.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
1MORE
64 Audio 64audio
7Hz 7Hertz
7th Acoustics
A-Audio A Audio
AAW Advanced AcousticsWerkes AAW (Advanced AcousticsWerkes)
Abyss
Expand All @@ -13,6 +14,7 @@ Adam
Advanced ADV Advanced Sound Group Advanced Sound
Aedle
AfterShokz
AFUL
Aiaiai
AirBuds
Akai
Expand Down Expand Up @@ -169,7 +171,7 @@ Ferrari
Fidue
FIIL
FiiO
Final Audio Final Audio Design Final Audio Design (FAD)
Final Audio Final Audio Design Final Audio Design (FAD) Final
fineEars
Fir Audio FiR
First Harmonic
Expand Down Expand Up @@ -247,6 +249,7 @@ JLab Audio
Jomo Audio Jomo
JQ
Just Ear Justear
Juzear
JVC Massdrop x JVC
Kaldas Research
KBEAR KBEar
Expand Down
6 changes: 3 additions & 3 deletions dbtools/squig_crawler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-

import sys
import urllib
from pathlib import Path
import numpy as np
import json
Expand Down Expand Up @@ -59,7 +60,6 @@ def parse_books(self):
Returns:
NameIndex
"""
self.measurements_path.joinpath('phone_books').mkdir(parents=True, exist_ok=True)
book_maps = {}
for db in self.dbs:
# 4620 measurements name index
Expand Down Expand Up @@ -88,7 +88,7 @@ def crawl(self):
anchor = row.find('a')
form = 'in-ear' if db['type'] == 'IEMs' else 'over-ear'
book = self.book_maps[form]
normalized_file_name = self.normalize_file_name(anchor['text'])
normalized_file_name = self.normalize_file_name(urllib.parse.unquote(anchor['href']))
item = NameItem(
url=f'{self.db_url(db)}/{anchor["href"]}',
source_name=book[normalized_file_name] if normalized_file_name in book else None,
Expand All @@ -100,7 +100,7 @@ def crawl(self):
return self.crawl_index

def raw_data_path(self, item):
return self.measurements_path.joinpath('raw_data', item.form, item.url.split('/')[-1])
return self.measurements_path.joinpath('raw_data', item.form, urllib.parse.unquote(item.url.split('/')[-1]))

def get_item_from_url(self, url):
index_item = self.name_index.find_one(url=url)
Expand Down
28 changes: 27 additions & 1 deletion tests/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,31 @@
19999.498 ? 0
"""

csv11 = """* Measurement data measured by REW V5.20.3
* Source: USB-C to 3.5mm Headphone Jack Adapter, USB-C to 3.5mm Headphone Jack Adapter, 0, volume: 0.138. Timing signal peak level -19.8 dBFS, measurement signal peak level -19.3 dBFS
* Format: 256k Log Swept Sine, 1 sweep at -12.0 dBFS using an acoustic timing reference
* Dated: Jul 10, 2023 9:36:37 PM
* REW Settings:
* C-weighting compensation: Off
* Target level: 75.0 dB
* Note: Delay -0.1027 ms (-35 mm, -1.39 in) using estimated IR delay relative to Acoustic reference on USB-C to 3.5mm Headphone Jack Adapter L with no timing offset
* Measurement: Duo L Jul 10
* Smoothing: 1/12 octave
* Frequency Step: 1/48 octave
* Start Frequency: 20.000 Hz
*
* Freq(Hz) SPL(dB) Phase(degrees)
20.000000 96.774 36.7401
20.299999 96.813 36.0714
20.600000 96.843 35.3904
"""


class TestCsv(unittest.TestCase):
def test_regex(self):
pattern_asserts = [
(csv1, autoeq_pattern), (csv2, None), (csv3, None), (csv4, None), (csv5, rew_pattern), (csv6, None),
(csv7, None), (csv8, autoeq_pattern), (csv9, None),
(csv7, None), (csv8, autoeq_pattern), (csv9, None), (csv10, rew_pattern), (csv11, rew_pattern)
]
for s, pattern in pattern_asserts:
if pattern:
Expand Down Expand Up @@ -192,3 +211,10 @@ def test_parse_csv10(self):
self.assertEqual([20.0, 20.25, 19998.498], d['frequency'], )
self.assertIn('raw', d)
self.assertEqual([68.334, 68.335, 27.402], d['raw'])

def test_parse_csv11(self):
d = parse_csv(csv11)
self.assertIn('frequency', d)
self.assertEqual([20.0, 20.299999, 20.600000], d['frequency'], )
self.assertIn('raw', d)
self.assertEqual([96.774, 96.813, 96.843], d['raw'])

0 comments on commit 5ce8767

Please sign in to comment.