-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathalign_score_audios.py
278 lines (242 loc) · 9.87 KB
/
align_score_audios.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
"""
NAME
===============================
Align Score Audios (align_score_audios.py)
BY
===============================
Matthew Blessing
LICENCE:
===============================
Creative Commons Attribution-ShareAlike 4.0 International License
https://creativecommons.org/licenses/by-sa/4.0/
ABOUT:
===============================
This script aligns a set of audio files to a score.
It requires a score's MuseScore or MusicXML file and a set of audio
files (.mp3, .wav, or .flac).
There are two ways to provide arguments when running this script from
the command line:
1. Providing audio file paths with -a:
'python3 align_score_audios.py {score file} -a {audio file 1} ...'
2. Providing audio file data with -f:
'python3 align_score_audios.py {score file} -f {audio data file}',
where the audio data file contains, e.g.:
'''
UK ldn_symph_orc.mp3 00:00:07 00:05:50
US ptsbrg_symph_orc.mp3
GER brln_symph_orc.mp3 00:12:23
'''
Notes:
Each line contains audio info separated by tabs.
Audio files have:
- A unique ID for the alignment table.
- A file path or URL.
- (Optional) A start timestamp.
- (Optional) An end timestamp.
Audio files can either have no specified time range (e.g., US
in the above example), only a start timestamp (e.g., GER), or
both start and end timestamps (e.g., UK).
"""
from __future__ import annotations
import argparse
import time
import datetime
from pathlib import Path
from hauptstimme.alignment.score_audio_alignment import *
from hauptstimme.utils import get_compressed_measure_map, ms3_convert
from hauptstimme.types import AudioData
from typing import Tuple, List
def validate_args(
args: argparse.Namespace
) -> Tuple[Path, Path, Path, List[AudioData]]:
"""
Validate the arguments parsed from the command line.
Args:
args: An object holding the arguments parsed from the command
line.
Returns:
score_mscz: The score's MuseScore file path.
score_mxl: The score's MusicXML file path.
score_mm: The score's measure map file path.
audios: A 2D list containing a list for each audio
file that contains:
audio_id: An identifier for the audio file.
audio_path: The path to or URL for the audio file.
A time range to extract from the audio file for
alignment, specified by:
start: A start timestamp.
end: An end timestamp.
desc: A description of which portion of the audio is to
be used.
Raises:
ValueError. If the score file provided is not a .mscz or .mxl
file.
ValueError: If both the -a and -f arguments are missing.
ValueError: If the start timestamp for an audio file is not
given in hh:mm:ss format.
ValueError: If no audio files were provided.
"""
score_file = args.score
score_file = validate_path(score_file)
score_file_dir = score_file.parent
if score_file.suffix == ".mscz":
score_mscz = score_file
# Get MusicXML file
score_mxl = score_file.with_suffix(".mxl")
if not score_mxl.exists():
print("Warning: The provided score has no MusicXML file.")
print("Creating MusicXML file...")
ms3_convert(
score_file_dir, "mscz", "mxl", score_file.stem
)
elif score_file.suffix == ".mxl":
score_mxl = score_file
# Get MuseScore file
score_mscz = score_file.with_suffix(".mscz")
if not score_mscz.exists():
print("Warning: The provided score has no MuseScore file.")
print("Creating MuseScore file...")
ms3_convert(
score_file_dir, "mxl", "mscz", score_file.stem
)
else:
raise ValueError("Error: The score file provided requires a " +
".mscz or .mxl extension.")
# Get measure map
score_mm = score_file.with_suffix(".mm.json")
if not score_mm.exists():
print("Warning: The provided score has no measure map.")
print("Creating measure map...")
get_compressed_measure_map(score_mscz)
audios = []
if args.audios:
audios_data = args.audios
elif args.audios_file:
audios_file = open(args.audios_file, "r")
audios_data = audios_file.readlines()
else:
raise ValueError("Error: Both the -a and -f arguments are missing.")
for audio_data in audios_data:
audio_data = audio_data.strip()
# Audio data from -f will contain info separated by tabs
audio_data = audio_data.split("\t")
num_args = len(audio_data)
ignore = False
audio_id = None
start = None
end = None
if num_args == 1:
audio_path = audio_data[0]
desc = "full audio"
elif num_args == 2:
audio_id, audio_path = audio_data
desc = "full audio"
elif num_args >= 3:
audio_id, audio_path, start = audio_data[:3]
desc = f"{start} onwards"
if num_args == 4:
end = audio_data[3]
desc = f"{start}-{end}"
else:
ignore = True
if ignore:
print(f"Warning: Excluding '{audio_path}' due to invalid data" +
f"in '{args.audios_file}'. Please run with argument -h " +
"for more information.")
else:
if audio_path.endswith((".mp3", ".wav", ".flac")):
if audio_id is None:
# Audio has no identifier, but this is needed
audio_id = input("Enter a string to represent the " +
f"audio file '{audio_path}' in the " +
"alignment table (e.g., 'Ldn_Symph_Orc'" +
" or 'Karajan1950').\n")
if start is not None:
try:
start = datetime.datetime.strptime(
start, "%H:%M:%S"
).time()
except ValueError:
raise ValueError(
f"Error: {start} is not in hh:mm:ss format."
)
if end is not None:
try:
end = datetime.datetime.strptime(
end, "%H:%M:%S"
).time()
except ValueError:
raise ValueError(
f"Error: {end} is not in hh:mm:ss format."
)
audios.append([audio_id, audio_path, start, end, desc])
else:
print(f"Warning: Excluding '{audio_path}' as it is not a " +
".mp3, .wav or .flac file.")
if len(audios) == 0:
raise ValueError("Error: No valid audio files were provided.")
audios_string = ", ".join(
[f"{audio[0]} - '{audio[1]}' ({audio[4]})" for audio in audios]
)
print(f"MuseScore file: '{score_mscz}', \n" +
f"MusicXML file: '{score_mxl}', \n" +
f"Measure map file: '{score_mm}', \n" +
f"Audio files: {audios_string}.")
time.sleep(1)
return score_mscz, score_mxl, score_mm, audios
def get_args() -> Tuple[Path, Path, Path, List[AudioData]]:
"""
Obtain the validated set of arguments parsed from the command line.
Returns:
score_mscz: The score's MuseScore file path.
score_mxl: The score's MusicXML file path.
score_mm: The score's measure map file path.
audios: A 2D list containing a list for each audio file that
contains:
audio_id: An identifier for the audio file.
audio_path: The path to or URL for the audio file.
A time range to extract from the audio file for
alignment, specified by:
start: A start timestamp.
end: An end timestamp.
desc: A description of which portion of the audio is to
be used.
"""
parser = argparse.ArgumentParser(
description=("Align one or more audio files to a score, producing " +
"an alignment table.\nMust provide the score's " +
"MuseScore file (.mszc) or MusicXML (.mxl) file, as " +
"well as either the -a or -f argument."),
formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument(
"score",
help=("The path to the score's MuseScore file (.mscz) or MusicXML" +
"file (.mxl).")
)
parser.add_argument(
"-a",
"--audios",
nargs="+",
help=("The paths (or URLs) to the audio files to align to the score" +
". Either .mp3, .wav or .flac.")
)
parser.add_argument(
"-f",
"--audios_file",
help=("The path to a .txt file containing the following information" +
" for each audio file on a separate line:\n" +
"- A unique string to identify the audio file in the " +
"alignment table (e.g., 'Ldn_Symph_Orc' or 'Karajan1950').\n" +
"- The audio file path or URL.\n" +
"- (Optional) Start and end timestamps (hh:mm:ss) " +
"indicating alignment a subset of the audio to the score. " +
"Either only the start timestamp should be provided, or both " +
"the start and end.")
)
args = parser.parse_args()
score_mscz, score_mxl, score_mm, audios = validate_args(args)
return score_mscz, score_mxl, score_mm, audios
if __name__ == "__main__":
print("\nWelcome to score-audio alignment!")
score_mscz, score_mxl, score_mm, audios = get_args()
align_score_audios(score_mxl, score_mm, audios)