MMDS: A general-purpose multimodal dataset wrapper.
Project description
MMDS: A general-purpose multimodal dataset wrapper
This project is under construction, API may change from time to time.
Installation
Latest
pip install mmds --pre
Example Usage
from mmds import MultimodalDataset, MultimodalSample
from mmds.modalities import RgbsModality, WavModality, MelModality, F0Modality
from mmds.utils.spectrogram import LogMelSpectrogram
from pathlib import Path
try:
import youtube_dl
import ffmpeg
from torchvision import transforms
except:
raise ImportError(
"This demo requires youtube_dl, ffmpeg-python and torchvision, "
"install them now: pip install youtube_dl ffmpeg-python torchvision"
)
def download():
Path("data").mkdir(exist_ok=True)
ydl_opts = {
"postprocessors": [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "192",
}
],
"postprocessor_args": ["-ar", "16000"],
"outtmpl": "data/%(id)s.%(ext)s",
"keepvideo": True,
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(["https://www.youtube.com/watch?v=BaW_jenozKc"])
path = Path("data/BaW_jenozKc")
if not path.exists():
path.mkdir(exist_ok=True)
(
ffmpeg.input("data/BaW_jenozKc.mp4")
.filter("fps", fps="25")
.output("data/BaW_jenozKc/%06d.png", start_number=0)
.overwrite_output()
.run(quiet=True)
)
class MyMultimodalSample(MultimodalSample):
def generate_info(self):
wav_modality = self.get_modality_by_name("wav")
rgbs_modality = self.get_modality_by_name("rgbs")
return dict(
t0=0,
t1=wav_modality.duration / 10,
original_wav_seconds=wav_modality.duration,
original_rgbs_seconds=rgbs_modality.duration,
)
class MyMultimodalDataset(MultimodalDataset):
Sample = MyMultimodalSample
def main():
download()
dataset = MyMultimodalDataset(
["BaW_jenozKc"],
modality_factories=[
RgbsModality.create_factory(
name="rgbs",
root="data",
suffix="*.png",
sample_rate=25,
transform=transforms.Compose(
[
transforms.Resize((28, 28)),
transforms.ToTensor(),
transforms.Normalize(0.5, 1),
],
),
),
WavModality.create_factory(
name="wav",
root="data",
suffix=".mp3",
sample_rate=16_000,
),
MelModality.create_factory(
name="mel",
root="data",
suffix=".mel.npz",
mel_fn=LogMelSpectrogram(sample_rate=16_000),
base_modality_name="wav",
),
F0Modality.create_factory(
name="f0",
root="data",
suffix=".f0.npz",
mel_fn=LogMelSpectrogram(sample_rate=16_000),
base_modality_name="wav",
),
],
)
sample = dataset[0]
print(sample)
if __name__ == "__main__":
main()
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
Close
Hashes for mmds-0.0.1.dev20211003151540.tar.gz
Algorithm | Hash digest | |
---|---|---|
SHA256 | 8094f5fbb1cf03bdbf323499dcf1295942ca580a54afcd11ad240ccecade55d0 |
|
MD5 | 8719d79dd800301a4a28abf299b933d6 |
|
BLAKE2b-256 | 81b64ecd057d4b79140536307e5ea7fb1d11c9f014b916a2c50d4e39b1b4b78a |
Close
Hashes for mmds-0.0.1.dev20211003151540-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | a25f3e27bd6391bdd28851654d9c238cb2772c55a5d7007f23e54e11f8d1b165 |
|
MD5 | ae88ba494596459047de9dc67ccff60c |
|
BLAKE2b-256 | 4d2f3f4faa89a4a7fc414d5596346c518cf875d3ed08cdfb89fac23104a60c31 |