MLBOM documentation tool
Project description
MLBOMDoc
MLBOMDOC is a human-readable document generator for an ML-BOM (ML Bill of Materials). MLBOMs document Machine Learning model components which are typically contained within an SBOM (Software Bill of Materials). MLBOMs are supported for CycloneDX.
Installation
To install use the following command:
pip install mlbomdoc
Alternatively, just clone the repo and install dependencies using the following command:
pip install -U -r requirements.txt
The tool requires Python 3 (3.8+). It is recommended to use a virtual python environment especially
if you are using different versions of python. virtualenv
is a tool for setting up virtual python environments which
allows you to have all the dependencies for the tool set up in a single environment, or have different environments set
up for testing using different versions of Python.
Usage
usage: mlbomdoc [-h] [-i INPUT_FILE] [--debug] [-f {console,json,markdown,pdf}] [-o OUTPUT_FILE] [-V]
MLBOMdoc generates documentation for a MLBOM.
options:
-h, --help show this help message and exit
-V, --version show program's version number and exit
Input:
-i INPUT_FILE, --input-file INPUT_FILE
Name of MLBOM file
Output:
--debug add debug information
-f {console,json,markdown,pdf}, --format {console,json,markdown,pdf}
Output format (default: output to console)
-o OUTPUT_FILE, --output-file OUTPUT_FILE
output filename (default: output to stdout)
Operation
The --input-file
option is used to specify the MLBOM to be processed. The format of the SBOM is determined according to
the following filename conventions.
SBOM | Format | Filename extension |
---|---|---|
CycloneDX | JSON | .json |
The --output-file
option is used to control the destination of the output generated by the tool. The
default is to report to the console, but it can also be stored in a file (specified using --output-file
option).
Example
Given the following MLBOM (test.json), the following output is produced to the console.
NOTE that the data is purely fictitious in order to demonstrate the capability of the tool.
{
"$schema": "http://cyclonedx.org/schema/bom-1.5.schema.json",
"bomFormat": "CycloneDX",
"specVersion": "1.5",
"serialNumber": "urn:uuid:997191f5-6c2b-4572-9a73-5e0f2d03cedd",
"version": 1,
"metadata": {
"timestamp": "2024-01-02T11:02:22Z",
"tools": {
"components": [
{
"name": "lib4sbom",
"version": "0.6.0",
"type": "application"
}
]
},
"component": {
"type": "application",
"bom-ref": "CDXRef-DOCUMENT",
"name": "MLApp"
}
},
"components": [
{
"type": "library",
"bom-ref": "1-glibc",
"name": "glibc",
"version": "2.15",
"supplier": {
"name": "gnu"
},
"cpe": "cpe:/a:gnu:glibc:2.15",
"licenses": [
{
"license": {
"id": "GPL-3.0-only",
"url": "https://www.gnu.org/licenses/gpl-3.0-standalone.html"
}
}
]
},
{
"type": "operating-system",
"bom-ref": "2-almalinux",
"name": "almalinux",
"version": "9.0",
"supplier": {
"name": "alma"
},
"cpe": "cpe:/o:alma:almalinux:9.0",
"licenses": [
{
"license": {
"id": "Apache-2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
}
}
]
},
{
"type": "library",
"bom-ref": "3-glibc",
"name": "glibc",
"version": "2.29",
"supplier": {
"name": "gnu"
},
"cpe": "cpe:/a:gnu:glibc:2.29",
"licenses": [
{
"license": {
"id": "GPL-3.0-only",
"url": "https://www.gnu.org/licenses/gpl-3.0-standalone.html"
}
}
],
"properties": [
{
"name": "language",
"value": "C"
}
]
},
{
"type": "library",
"bom-ref": "4-tomcat",
"name": "tomcat",
"version": "9.0.46",
"supplier": {
"name": "apache"
},
"cpe": "cpe:/a:apache:tomcat:9.0.46",
"licenses": [
{
"license": {
"id": "Apache-2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
}
}
]
},
{
"type": "machine-learning-model",
"bom-ref": "5-resnet-50",
"name": "resnet-50",
"version": "1.5",
"supplier": {
"name": "microsoft"
},
"description": "ResNet (Residual Network) is a convolutional neural network that democratized the concepts of residual learning and skip connections. This enables to train much deeper models.",
"licenses": [
{
"license": {
"id": "Apache-2.0",
"url": "https://www.apache.org/licenses/LICENSE-2.0"
}
}
],
"modelCard": {
"bom-ref": "5-resnet-50-model",
"modelParameters": {
"approach": {
"type": "supervised"
},
"task": "classification",
"architectureFamily": "Convolutional neural network",
"modelArchitecture": "ResNet-50",
"datasets": [
{
"type": "dataset",
"name": "ImageNet",
"contents": {
"url": "https://huggingface.co/datasets/imagenet-1k"
},
"classification": "public",
"sensitiveData": "no personal data",
"description": "ILSVRC 2012, commonly known as \"ImageNet\" is an image dataset organized according to the WordNet hierarchy. Each meaningful concept in WordNet, possibly described by multiple words or word phrases, is called a \"synonym set\" or \"synset\". There are more than 100,000 synsets in WordNet, majority of them are nouns (80,000+). ImageNet aims to provide on average 1000 images to illustrate each synset. Images of each concept are quality-controlled and human-annotated.",
"governance": {
"owners": [
{
"organization": {
"name": "microsoft"
},
"contact": {
"email": "sales@microsoft.com"
}
},
{
"organization": {
"name": "microsoft"
},
"contact": {
"email": "consulting@microsoft.com"
}
}
]
}
}
],
"inputs": [
{
"format": "image"
}
],
"outputs": [
{
"format": "image class"
}
]
},
"quantitativeAnalysis": {
"performanceMetrics": [
{
"type": "CPU",
"value": "10%",
"confidenceInterval": {
"lowerBound": "8",
"upperBound": "12"
}
}
],
"graphics": {
"description": "Test data",
"collection": [
{
"name": "cat",
"image": {
"contentType": "text/plain",
"encoding": "base64",
"content": "cat.jpg"
}
},
{
"name": "dog",
"image": {
"contentType": "text/plain",
"encoding": "base64",
"content": "dog.jpg"
}
}
]
}
},
"considerations": {
"users": [
"Researcher"
],
"technicalLimitations": [
"To be used in the EU.",
"To be used in the UK."
],
"ethicalConsiderations": [
{
"name": "User from prohibited location",
"mitigationStrategy": "Use geolocation to validate source of request."
}
]
},
"properties": [
{
"name": "num_channels",
"value": "3"
}
]
}
}
]
}
The following commands will generate a summary of the contents of the MLBOM to the console.
mlbomdoc --input test.json
╭───────────────╮
│ MLBOM Summary │
╰───────────────╯
┏━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Item ┃ Details ┃
┡━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ MLBOM File │ test.json │
│ MLBOM Type │ cyclonedx │
│ Version │ 1.5 │
│ Name │ MLApp │
│ Creator │ tool:lib4sbom#0.6.0 │
│ Created │ 2024-01-02T11:02:22Z │
└────────────┴──────────────────────────────────────────────────────────────┘
╭───────────────────────────╮
│ Model Details - resnet-50 │
╰───────────────────────────╯
┏━━━━━━━━━━┳━━━━━━━━━━━━┓
┃ Item ┃ Value ┃
┡━━━━━━━━━━╇━━━━━━━━━━━━┩
│ Version │ 1.5 │
│ Supplier │ microsoft │
│ License │ Apache-2.0 │
└──────────┴────────────┘
╭──────────────────╮
│ Model Parameters │
╰──────────────────╯
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Parameter ┃ Value ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ Approach │ supervised │
│ Task │ classification │
│ Architecture Family │ Convolutional neural network │
│ Model Architecture │ ResNet-50 │
│ Input │ image │
│ Output │ image class │
└─────────────────────┴──────────────────────────────┘
╭───────────────╮
│ Model Dataset │
╰───────────────╯
┏━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Parameter ┃ Value ┃
┡━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ Type │ dataset │
│ Contents URL │ https://huggingface.co/datasets/imagenet-1k │
│ Classification │ public │
│ Sensitive Data │ no personal data │
│ Description │ ILSVRC 2012, commonly known as "ImageNet" is an image dataset organized according to the WordNet hierarchy. Each meaningful concept in WordNet, possibly described by multiple words or │
│ │ word phrases, is called a "synonym set" or "synset". There are more than 100,000 synsets in WordNet, majority of them are nouns (80,000+). ImageNet aims to provide on average 1000 │
│ │ images to illustrate each synset. Images of each concept are quality-controlled and human-annotated. │
└────────────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
╭────────────────────╮
│ Dataset Governance │
╰────────────────────╯
┏━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Category ┃ Organization ┃ Contact ┃
┡━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ Owner │ microsoft │ sales@microsoft.com │
│ Owner │ microsoft │ consulting@microsoft.com │
└──────────┴──────────────┴──────────────────────────┘
╭───────────────────────╮
│ Quantitative Analysis │
╰───────────────────────╯
╭─────────────────────╮
│ Performance Metrics │
╰─────────────────────╯
┏━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
┃ Type ┃ Value ┃ Slice ┃ Lower BOund ┃ Upper Bound ┃
┡━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
│ CPU │ 10% │ │ 8 │ 12 │
└──────┴───────┴───────┴─────────────┴─────────────┘
╭──────────────────────╮
│ Graphics - Test data │
╰──────────────────────╯
┏━━━━━━┳━━━━━━━━━┓
┃ Name ┃ Content ┃
┡━━━━━━╇━━━━━━━━━┩
│ cat │ cat.jpg │
│ dog │ dog.jpg │
└──────┴─────────┘
╭────────────────╮
│ Considerations │
╰────────────────╯
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Category ┃ Value ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ Users │ Researcher │
│ Technical Limitations │ To be used in the EU. │
│ Technical Limitations │ To be used in the UK. │
│ Ethical Considerations │ User from prohibited location │
│ Ethical Considerations - Mitigation Strategy │ Use geolocation to validate source of request. │
└──────────────────────────────────────────────┴────────────────────────────────────────────────┘
╭────────────╮
│ Properties │
╰────────────╯
┏━━━━━━━━━━━━━━┳━━━━━━━┓
┃ Name ┃ Value ┃
┡━━━━━━━━━━━━━━╇━━━━━━━┩
│ num_channels │ 3 │
└──────────────┴───────┘
Licence
Licenced under the Apache 2.0 Licence.
Limitations
The tool has the following limitations
- Invalid SBOMs will result in unpredictable results.
Feedback and Contributions
Bugs and feature requests can be made via GitHub Issues.
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distributions
Built Distribution
File details
Details for the file mlbomdoc-0.1.0-py2.py3-none-any.whl
.
File metadata
- Download URL: mlbomdoc-0.1.0-py2.py3-none-any.whl
- Upload date:
- Size: 13.4 kB
- Tags: Python 2, Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/4.0.1 CPython/3.10.8
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 16bc4c37e4a0bb927542e064472681332cd22cf3c064b070af846e2a3b00c5a3 |
|
MD5 | 878e450ec0e397187238c40da3539389 |
|
BLAKE2b-256 | 2485c521018e35223951485b3ebe5d6af67551de729f391a804a6069e89365fc |