MMDS: A general-purpose multimodal dataset wrapper.
Project description
MMDS: A general-purpose multimodal dataset wrapper
This project is under construction, API may change from time to time.
Installation
Stable (not stable yet though)
pip install mmds
Latest
pip install mmds --pre
Example Usage
# example.py
import timeit
from mmds import MultimodalDataset, MultimodalSample
from mmds.modalities import RgbsModality, WavModality, MelModality, F0Modality
from mmds.utils.spectrogram import LogMelSpectrogram
from pathlib import Path
from multiprocessing import Manager
try:
import youtube_dl
import ffmpeg
from torchvision import transforms
except:
raise ImportError(
"This demo requires youtube_dl, ffmpeg-python and torchvision, "
"install them now: pip install youtube_dl ffmpeg-python torchvision"
)
def download():
Path("data").mkdir(exist_ok=True)
ydl_opts = {
"postprocessors": [
{
"key": "FFmpegExtractAudio",
"preferredcodec": "mp3",
"preferredquality": "192",
}
],
"postprocessor_args": ["-ar", "16000"],
"outtmpl": "data/%(id)s.%(ext)s",
"keepvideo": True,
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download(["https://www.youtube.com/watch?v=BaW_jenozKc"])
path = Path("data/BaW_jenozKc")
if not path.exists():
path.mkdir(exist_ok=True)
(
ffmpeg.input("data/BaW_jenozKc.mp4")
.filter("fps", fps="25")
.output("data/BaW_jenozKc/%06d.png", start_number=0)
.overwrite_output()
.run(quiet=True)
)
class MyMultimodalSample(MultimodalSample):
def generate_info(self):
wav_modality = self.get_modality_by_name("wav")
rgbs_modality = self.get_modality_by_name("rgbs")
return dict(
t0=0,
t1=wav_modality.duration / 10,
original_wav_seconds=wav_modality.duration,
original_rgbs_seconds=rgbs_modality.duration,
)
class MyMultimodalDataset(MultimodalDataset):
Sample = MyMultimodalSample
def main():
download()
# optional multiprocessing cache manager
manager = Manager()
dataset = MyMultimodalDataset(
["BaW_jenozKc"],
modality_factories=[
RgbsModality.create_factory(
name="rgbs",
root="data",
suffix="*.png",
sample_rate=25,
transform=transforms.Compose(
[
transforms.Resize((28, 28)),
transforms.ToTensor(),
transforms.Normalize(0.5, 1),
],
),
cache=manager.dict(),
),
WavModality.create_factory(
name="wav",
root="data",
suffix=".mp3",
sample_rate=16_000,
cache=manager.dict(),
),
MelModality.create_factory(
name="mel",
root="data",
suffix=".mel.npz",
mel_fn=LogMelSpectrogram(sample_rate=16_000),
base_modality_name="wav",
cache=manager.dict(),
),
F0Modality.create_factory(
name="f0",
root="data",
suffix=".f0.npz",
mel_fn=LogMelSpectrogram(sample_rate=16_000),
base_modality_name="wav",
cache=manager.dict(),
),
],
)
# first load
print(timeit.timeit(lambda: dataset[0], number=1))
# second load
print(timeit.timeit(lambda: dataset[0], number=1))
print(dataset[0])
if __name__ == "__main__":
main()
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
Built Distribution
File details
Details for the file mmds-0.0.1.dev20211003155625.tar.gz
.
File metadata
- Download URL: mmds-0.0.1.dev20211003155625.tar.gz
- Upload date:
- Size: 12.6 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/3.4.2 importlib_metadata/4.8.1 pkginfo/1.7.1 requests/2.26.0 requests-toolbelt/0.9.1 tqdm/4.62.3 CPython/3.9.7
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | bfbafd98f76f6d6192a258e595c93fa339d445ed36e8e52e42a73506fec161c6 |
|
MD5 | 6f9be84bc5ceb263b30c4eb5035c5f40 |
|
BLAKE2b-256 | da80da88d96dac8ca03bd82520661096ca32cd615dfa4809e2bf037c2c3be22f |
File details
Details for the file mmds-0.0.1.dev20211003155625-py3-none-any.whl
.
File metadata
- Download URL: mmds-0.0.1.dev20211003155625-py3-none-any.whl
- Upload date:
- Size: 15.2 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/3.4.2 importlib_metadata/4.8.1 pkginfo/1.7.1 requests/2.26.0 requests-toolbelt/0.9.1 tqdm/4.62.3 CPython/3.9.7
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | cabfd8c9dabae609d77d1e3eca18cf78148660721ded78e6012306ea46265370 |
|
MD5 | a2de62ea783a040543e7f8a3098342ee |
|
BLAKE2b-256 | f02341f0dbe1a182a7cbf15514c7672cc8e7da435299737de514da4c49ef4591 |