Skip to content

Commit

Permalink
Violin pitch extraction and few improvements (#98)
Browse files Browse the repository at this point in the history
* kwargs for load model, violin pitch

* wrongly added model files :)

* set max numpy versions

* remove sample rate from diffsep config

* several fixes, mini-bug, and typos around

* use proper arg names for essentia melodia

* fix issues in READMEs, add violin model to docs

* sampleRate-->sample_rate in deepsrgm

* sampleRate-->sample_rate again...
  • Loading branch information
genisplaja authored Oct 16, 2024
1 parent c848f7d commit 7bed0df
Show file tree
Hide file tree
Showing 30 changed files with 771 additions and 154 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ compiam/models/structure/dhrupad_bandish_segmentation/annotations/section_bounda
compiam/models/structure/dhrupad_bandish_segmentation/annotations/cycle_boundaries/
tests/resources/mir_datasets/*
compiam/models/separation/
compiam/models/structure/
compiam/models/melody/
compiam/models/rhythm/

# For next release
compiam/models/rhythm/4wayTabla/
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ Please check the [contribution guidelines](https://mtg.github.io/compIAM/source/
We include, in this repo, [example notebooks](https://github.com/MTG/compIAM/tree/master/notebooks) for users to better understand how to use `compiam` and also showcase

## License
compIAM is Copyright 2023 Music Technology Group - Universitat Pompeu Fabra
compIAM is Copyright 2024 Music Technology Group - Universitat Pompeu Fabra

compIAM is released under the terms of the GNU Affero General Public License (v3 or later). See the COPYING file for more information. For the case of a particular tool or implementation that has a specific different licence, this is explicitly specified in the files related to this tool, and these terms must be followed.

Expand Down
5 changes: 3 additions & 2 deletions compiam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from compiam.exceptions import ModelNotDefinedError


def load_model(model_name, data_home=None, models_dict=models_dict):
def load_model(model_name, data_home=None, models_dict=models_dict, **kwargs):
"""Wrapper for loading pre-trained models.
:param model_name: name of the model, extractors, or algorithm to load.
Expand Down Expand Up @@ -41,6 +41,7 @@ def load_model(model_name, data_home=None, models_dict=models_dict):
)

module = getattr(import_module(m_dict["module_name"]), m_dict["class_name"])
m_dict["kwargs"].update(kwargs)
return module(**m_dict["kwargs"])


Expand Down Expand Up @@ -103,7 +104,7 @@ def get_model_info(model_key):
"""Get complete info in data/models_dict for a particular pre-trained model
:param model_key: model key from models_dict
:returns: infomation about a particular model.
:returns: information about a particular model.
"""
if model_key not in list(models_dict.keys()):
raise ValueError(
Expand Down
12 changes: 11 additions & 1 deletion compiam/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@
"sample_rate": 8000,
},
},
"melody:ftaresnet-carnatic-violin": {
"module_name": "compiam.melody.pitch_extraction.ftaresnet_carnatic",
"class_name": "FTAResNetCarnatic",
"kwargs": {
"model_path": os.path.join(
"models", "melody", "ftanet", "fta_carnatic_violin", "FTA-ResNet_best_version.pth"
),
"sample_rate": 44100,
},
},
"melody:cae-carnatic": {
"module_name": "compiam.melody.pattern.sancara_search",
"class_name": "CAEWrapper",
Expand Down Expand Up @@ -114,7 +124,7 @@
"saraga-8",
"saraga-8.ckpt-1",
),
"sample_rate": 22050,
#"sample_rate": 22050, # Already contained in the model config
},
},
}
Expand Down
2 changes: 1 addition & 1 deletion compiam/dunya/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

Built within the framework of the [CompMusic project](https://compmusic.upf.edu/), the Dunya corpora cover several World music traditions and can be accessed through different ways. In compIAM we provide tools to easily access the Carnatic and Hindustani corpora in Dunya.

When initializing the corpora access through compIAM you will be asked to provide an *access token*. To get the *access token* you need to register to the [Dunya web portal](https://dunya.compmusic.upf.edu/). Once your registration is validation by the CompMusic team, you can access your user account information where an access token is provided. Use this token to intialize the ``compiam.Corpora`` objects and access the corpora.
When initializing the corpora access through compIAM you will be asked to provide an *access token*. To get the *access token* you need to register to the [Dunya web portal](https://dunya.compmusic.upf.edu/). Once your registration is validation by the CompMusic team, you can access your user account information where an access token is provided. Use this token to initialize the ``compiam.Corpora`` objects and access the corpora.
4 changes: 2 additions & 2 deletions compiam/dunya/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ def save_annotation(
)

def download_mp3(self, recording_id, output_dir):
"""Download the mp3 of a document and save it to the specificed directory.
"""Download the mp3 of a document and save it to the specified directory.
:param recording_id: The MBID of the recording.
:param output_dir: Where to save the mp3 to.
Expand All @@ -338,7 +338,7 @@ def download_mp3(self, recording_id, output_dir):
return name

def download_concert(self, concert_id, output_dir):
"""Download the mp3s of all recordings in a concert and save them to the specificed directory.
"""Download the mp3s of all recordings in a concert and save them to the specified directory.
:param concert_id: The MBID of the concert.
:param location: Where to save the mp3s to.
Expand Down
2 changes: 2 additions & 0 deletions compiam/dunya/conn.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def set_token(token):


def _dunya_url_query(url, extra_headers=None):
"""Use requests to query from a given url in Dunya."""
logger.debug("query to '%s'" % url)
if not TOKEN:
raise ConnectionError("You need to authenticate with `set_token`")
Expand All @@ -71,6 +72,7 @@ def _dunya_url_query(url, extra_headers=None):


def _make_url(path, **kwargs):
"""Create URL from path."""
if "://" in HOSTNAME:
protocol, hostname = HOSTNAME.split("://")
else:
Expand Down
12 changes: 8 additions & 4 deletions compiam/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,18 @@ def read_csv(file_path):
return output[~np.isnan(output)]


def save_object(obj, filename):
with open(filename, "wb") as outp: # Overwrites any existing file.
def save_object(obj, file_path):
"""Saves object to pickle file
:param obj: an object to save to pickle file
:param file_path: path to save the object
"""
with open(file_path, "wb") as outp: # Overwrites any existing file.
pickle.dump(obj, outp, pickle.HIGHEST_PROTOCOL)


def write_json(j, path):
"""
Write json, <j>, to <path>
"""Write json, <j>, to <path>
:param j: json
:type path: json
Expand Down
16 changes: 9 additions & 7 deletions compiam/melody/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@
|---------------------------|----------------------------------|-----------|
| Melodia | Predominant pitch extraction | [1] |
| FTANet-Carnatic | Vocal pitch extraction | [2] |
| TonicIndianArtMusic | Tonic idendification | [3] |
| CAE + Matrix profile | Melodic pattern discovery | [4] |
| DEEPSRGM | DL-based raga recognition | [5] |
| FTAResNet-Carnatic-Violin | Violin pitch extraction | [3] |
| TonicIndianArtMusic | Tonic idendification | [4] |
| CAE + Matrix profile | Melodic pattern discovery | [5] |
| DEEPSRGM | DL-based raga recognition | [6] |


[1] J. Salamon and E. Gómez, "Melody extraction from polyphonic music signals using pitch contour characteristics", IEEE Transactions on Audio, Speech, and Language Processing, vol. 20, no. 6, pp. 1759–1770, 2012.

[2] G. Plaja-Roglans, T. Nuttall, L. Pearson, X. Serra, M. Miron, "Repertoire-Specific Vocal Pitch Data Generation for Improved Melodic Analysis of Carnatic Music", Transactions of the International Society for Music Information Retrieval, vol. 6, no. 1, pp. 13–26, 2023.
[2, 3] G. Plaja-Roglans, T. Nuttall, L. Pearson, X. Serra, M. Miron, "Repertoire-Specific Vocal Pitch Data Generation for Improved Melodic Analysis of Carnatic Music", Transactions of the International Society for Music Information Retrieval, vol. 6, no. 1, pp. 13–26, 2023.

[3] J. Salamon, S. Gulati, and X. Serra, "A Multipitch Approach to Tonic Identification in Indian Classical Music", in International Society for Music Information Retrieval Conference (ISMIR 12), 2012.
[4] J. Salamon, S. Gulati, and X. Serra, "A Multipitch Approach to Tonic Identification in Indian Classical Music", in International Society for Music Information Retrieval Conference (ISMIR 12), 2012.

[4] T. Nuttall, G. Plaja-Roglans, L. Pearson, and X. Serra, "In search of sañcaras: tradition-informed repeated melodic pattern recognition in Carnatic Music", in International Society for Music Information Retrieval Conference (ISMIR 22), 2022.
[5] T. Nuttall, G. Plaja-Roglans, L. Pearson, and X. Serra, "In search of sañcaras: tradition-informed repeated melodic pattern recognition in Carnatic Music", in International Society for Music Information Retrieval Conference (ISMIR 22), 2022.

[5] S. Madhusudhan and G. Chowdhary, "DeepSRGM - Sequence Classification and Ranking in Indian Classical Music Via Deep Learning", in International Society for Music Information Retrieval Conference (ISMIR 20), 2020.
[6] S. Madhusudhan and G. Chowdhary, "DeepSRGM - Sequence Classification and Ranking in Indian Classical Music Via Deep Learning", in International Society for Music Information Retrieval Conference (ISMIR 20), 2020.
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@


class Complex(nn.Module):
"""CAE model."""
def __init__(self, n_in, n_out, dropout=0.5, learn_norm=False):
super(Complex, self).__init__()

Expand Down
1 change: 1 addition & 0 deletions compiam/melody/pitch_extraction/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from compiam.melody.pitch_extraction.melodia import Melodia
from compiam.melody.pitch_extraction.ftanet_carnatic import FTANetCarnatic
from compiam.melody.pitch_extraction.ftaresnet_carnatic import FTAResNetCarnatic


# Show user the available tools
Expand Down
3 changes: 2 additions & 1 deletion compiam/melody/pitch_extraction/ftanet_carnatic/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def __init__(self, model_path=None, sample_rate=8000, gpu="-1"):
:param model_path: path to file to the model weights.
:param sample_rate: Sample rate to which the audio is sampled for extraction.
:param gpu: Id of the available GPU to use (-1 by default, to run on CPU), use string: '0', '1', etc.
"""
### IMPORTING OPTIONAL DEPENDENCIES
try:
Expand Down Expand Up @@ -276,7 +277,7 @@ def predict(
(defaulted to 5, increase if enough computational power, reduce if
needed).
:param out_step: particular time-step duration if needed at output
:param gpu: Id of the available GPU to use (-1 by default, to run on CPU)
:param gpu: Id of the available GPU to use (-1 by default, to run on CPU), use string: '0', '1', etc.
:returns: a 2-D list with time-stamps and pitch values per timestamp.
"""
## Setting up GPU if any
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,3 +112,18 @@ def get_est_arr(model, x_list, y_list, batch_size):
est_arr = est(preds, CenFreq, y)

return est_arr


def std_normalize(data):
"""Standardize the input data.
:param data: input data.
:returns: standardized data.
"""
data = data.astype(np.float64)
mean = np.mean(data)
std = np.std(data)
data = data.copy() - mean
if std != 0.:
data = data / std
return data.astype(np.float32)
Loading

0 comments on commit 7bed0df

Please sign in to comment.