minimind

MiniMind: Lightweight and flexible AI generation library

These details have not been verified by PyPI

Project links

Homepage

Project description

MiniMind Example Code

from minimind import GPMGenerator, Sampler, SimpleTokenizer

def main():
    print("MiniMind GPMGenerator 테스트 시작!")

    # 샘플 데이터
    pairs = [
        ("안녕하세요", "안녕하세요 반갑습니다"),
        ("오늘 날씨 어때?", "오늘은 맑고 따뜻해요"),
        ("뭐 먹을래?", "저는 김치찌개 좋아해요"),
    ]

    # 샘플러 생성 (top-k 예시)
    sampler = Sampler(method='top_k', k=3)
    tokenizer = SimpleTokenizer()

    # 생성기 초기화 시 sampler 연결
    gpm = GPMGenerator(sampler=sampler, tokenizer=tokenizer)
    gpm.fit(pairs)

    # 생성 테스트
    prompt = "안녕하세요"
    response = gpm.chat(prompt, max_tokens=10)

    print("입력 프롬프트:", prompt)
    print("생성된 텍스트:", response)

if __name__ == "__main__":
    main()

from minimind import SAPGenerator
from minimind import SimpleTokenizer
def main():
    print("MiniMind SAPGenerator 테스트 시작!")

    # 간단한 데이터 샘플 (입력-출력 쌍)
    pairs = [
        ("안녕하세요", "안녕하세요"),
        ("오늘 날씨 어때?", "날씨가 좋아요"),
        ("밥 먹었어?", "네, 잘 먹었어요"),
        ("영화 볼래?", "좋아요 같이 보자"),
        ("잘 자요", "안녕히 주무세요"),
    ]

    # SAPGenerator 인스턴스 생성 및 학습
    tokenizer = SimpleTokenizer()
    sap_gen = SAPGenerator(tokenizer=tokenizer)
    sap_gen.fit(pairs)

    # 생성 테스트
    prompt = "오늘"
    print(f"입력: {prompt}")
    generated = sap_gen.chat(prompt, max_tokens=10)
    print(f"생성 결과: {generated}")

if __name__ == "__main__":
    main()

def main():
    print("MiniMind 패키지 실행 - 테스트 시작!")
    
    # 여기서 간단히 NeuralGenerator 테스트 예시 실행
    from minimind import NeuralGenerator
    from minimind import Sampler

    sampler = Sampler(method='temperature', temperature=0.8)
    
    # 더미 데이터 (토큰 인덱스 배열) 예시
    import numpy as np
    vocab_size = 100
    X_dummy = np.random.randint(0, vocab_size-1, size=(50, 10))  # 50샘플, 길이10 시퀀스
    y_dummy = np.zeros((50, vocab_size))
    for i in range(50):
        y_dummy[i, np.random.randint(0, vocab_size)] = 1.0  # 랜덤 원핫 출력
    
    ng = NeuralGenerator(vocab_size=vocab_size, epochs=3, verbose=True, sampler=sampler)
    ng.fit(X_dummy, y_dummy)
    
    prompt = np.array([1, 2, 3])  # 시작 토큰 시퀀스 예시
    generated_seq = ng.generate(prompt, max_tokens=10)
    print("생성된 시퀀스:", generated_seq)

if __name__ == "__main__":
    main()

# test_sampling.py

import numpy as np
from minimind import top_k_sampling, top_p_sampling, temperature_sampling, Sampler

def dummy_probs(size=100):
    probs = np.random.rand(size)
    return probs / probs.sum()

def test_sampling_functions():
    probs = dummy_probs()

    print("top_k_sampling:", top_k_sampling(probs, k=5))
    print("top_p_sampling:", top_p_sampling(probs, p=0.8))
    print("temperature_sampling (temp=0.5):", temperature_sampling(probs, temperature=0.5))
    print("temperature_sampling (temp=2.0):", temperature_sampling(probs, temperature=2.0))

def test_sampler_class():
    probs = dummy_probs()
    sampler = Sampler(method='top_p', p=0.9)
    print("Sampler top_p:", sampler.sample(probs))

    sampler.method = 'top_k'
    sampler.k = 3
    print("Sampler top_k:", sampler.sample(probs))

    sampler.method = 'temperature'
    sampler.temperature = 0.7
    print("Sampler temperature:", sampler.sample(probs))

if __name__ == "__main__":
    test_sampling_functions()
    test_sampler_class()

from minimind import SimpleTokenizer

tokenizer = SimpleTokenizer()

text = "Hello, 안녕하세요! Let's test the tokenizer 123."
tokens = tokenizer.tokenize(text)
print("토큰:", tokens)

reconstructed = tokenizer.detokenize(tokens)
print("복원된 문장:", reconstructed)

import os
import numpy as np
from minimind import set_seed, save_json, load_json, save_model_weights, load_model_weights, simple_logger


if __name__ == "__main__":
    # 테스트 함수들

    def test_set_seed():
        set_seed(123)
        a = np.random.rand(3)
        set_seed(123)
        b = np.random.rand(3)
        assert np.allclose(a, b), "set_seed 실패!"
        print("set_seed 테스트 통과!")

    def test_save_load_json():
        data = {'name': 'MiniMind', 'version': 1.0}
        filepath = 'test.json'
        save_json(data, filepath)
        loaded = load_json(filepath)
        assert data == loaded, "JSON 저장/로드 실패!"
        os.remove(filepath)
        print("save_json & load_json 테스트 통과!")

    def test_save_load_weights_multi_format():
        weights = {
            'W1': np.array([1, 2, 3]),
            'b1': np.array([0.1, 0.2, 0.3])
        }
        for fmt in ['npz', 'joblib', 'json']:
            filepath = f"weights_test.{fmt}"
            save_model_weights(weights, filepath, format=fmt)
            loaded = load_model_weights(filepath, format=fmt)
            for k in weights:
                assert np.allclose(weights[k], loaded[k]), f"{fmt} {k} 가중치 저장/로드 실패!"
            os.remove(filepath)
        print("멀티 포맷 가중치 저장/로드 테스트 통과!")

    def test_logger():
        simple_logger("테스트 로그 메시지")

    # 실행 테스트 모음
    test_set_seed()
    test_save_load_json()
    test_save_load_weights_multi_format()
    test_logger()

import numpy as np
from minimind import Radec # 네가 만든 클래스 파일명에 맞게 바꿔!
from minimind import Sampler

# 간단한 샘플용 토크나이저 (공백 기준)
def simple_tokenizer(text):
    return text.strip().split()

# 아주 단순 샘플 샘플러 (확률분포에서 랜덤 샘플링)

def main():

    import csv
    csv_path = "MLdata.csv"

    pairs = []
    with open(csv_path, encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            pairs.append((row['input_text'].strip(), row['output_text'].strip()))


    # 생성기 초기화
    generator = Radec(n_models=2, sampler=Sampler(), tokenizer=simple_tokenizer)

    # 학습
    print("학습 시작...")
    generator.fit(pairs[:200])
    print("학습 완료!")

    # 생성 테스트
    prompt = "오늘 날씨 어때?"
    print(f"'{prompt}'에 대한 생성 결과:")
    generated_tokens = generator.generate(prompt, max_tokens=10)
    print(" ".join(generated_tokens))

if __name__ == "__main__":
    main()

# minimind/seprod.py

import csv
import re
import numpy as np
import autograd.numpy as anp
from autograd import grad
from minimind import SeProD

# --- 토크나이저 (공백 단위) ---
def simple_tokenizer(text):
    return re.findall(r'\b\w+\b', text.lower())

# --- vocab 빌드 ---
def build_vocab(tokens, min_freq=2):
    from collections import Counter
    counter = Counter(tokens)
    vocab = [w for w, c in counter.items() if c >= min_freq]
    vocab = sorted(vocab)
    stoi = {w: i for i, w in enumerate(vocab)}
    itos = {i: w for i, w in enumerate(vocab)}
    return stoi, itos

# --- 인코딩 ---
def encode(tokens, stoi):
    return [stoi[t] for t in tokens if t in stoi]

# --- 데이터셋 생성 (패딩 + max_len) ---
def pad_seq(seq, max_len, pad_idx):
    return seq[:max_len] + [pad_idx]*(max_len - len(seq))

def load_dataset(csv_path, max_len=20, min_freq=2, max_samples=1000):
    inputs = []
    outputs = []
    all_tokens = []

    with open(csv_path, encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for i, row in enumerate(reader):
            if i >= max_samples:
                break
            inp_tokens = simple_tokenizer(row['input_text'])
            out_tokens = simple_tokenizer(row['output_text']) + ["<EOS>"]
            all_tokens.extend(inp_tokens)
            all_tokens.extend(out_tokens)
            inputs.append(inp_tokens)
            outputs.append(out_tokens)

    stoi, itos = build_vocab(all_tokens, min_freq)
    pad_idx = len(stoi)  # 패딩 토큰은 vocab 끝에 추가

    X_enc = []
    X_dec = []
    Y = []

    for inp_tokens, out_tokens in zip(inputs, outputs):
        enc_encoded = encode(inp_tokens, stoi)
        dec_encoded = encode(out_tokens[:-1], stoi)  # 디코더 입력 (out_tokens - 마지막)
        y_encoded = encode(out_tokens[1:], stoi)      # 타깃 (out_tokens shifted)

        enc_padded = pad_seq(enc_encoded, max_len, pad_idx)
        dec_padded = pad_seq(dec_encoded, max_len, pad_idx)
        y_padded = pad_seq(y_encoded, max_len, pad_idx)

        X_enc.append(enc_padded)
        X_dec.append(dec_padded)
        Y.append(y_padded)

    vocab_size = len(stoi) + 1  # 패딩 포함
    return (np.array(X_enc), np.array(X_dec), np.array(Y), stoi, itos, pad_idx, vocab_size)



# --- 텍스트 생성 (단순 greedy) ---
def generate_text(model, stoi, itos, prompt, max_len=20, pad_idx=None):
    prompt_tokens = simple_tokenizer(prompt)
    enc_input = encode(prompt_tokens, stoi)
    enc_input = enc_input[:max_len]
    pad = pad_idx if pad_idx is not None else 0
    enc_input = enc_input + [pad]*(max_len - len(enc_input))
    enc_input = np.array([enc_input])

    generated = []

    # 디코더 입력 처음은 <BOS> 대신 빈 배열 or 패딩으로 시작
    dec_input = [pad] * max_len
    dec_input = np.array([dec_input])

    for _ in range(max_len):
        probs = model.predict(enc_input, dec_input)[0]  # (seq_len, vocab_size)
        next_token = np.argmax(probs[len(generated)])
        if next_token == pad:
            break
        generated.append(next_token)
        dec_input[0, len(generated)-1] = next_token

    return " ".join([itos.get(tok, "<UNK>") for tok in generated])

# --- 메인 실행 ---
if __name__ == "__main__":
    csv_path = "C:\\Users\\yuchan\\Code\\MLdata.csv"  # 경로 조정

    max_len = 20
    X_enc, X_dec, Y, stoi, itos, pad_idx, vocab_size = load_dataset(csv_path, max_len=max_len)

    model = SeProD(vocab_size=vocab_size, embed_dim=64, hidden_dim=128, max_len=max_len, pad_idx=pad_idx)

    model.fit(X_enc, X_dec, Y, epochs=10, batch_size=64, lr=0.001)

    prompt = "안녕하세요"
    generated_text = generate_text(model, stoi, itos, prompt, max_len=max_len, pad_idx=pad_idx)

    print("Generated Text:")
    print(generated_text)

from minimind import NeuralGenerator

model = NeuralGenerator(vocab_size=100, embed_dim=32, hidden_layer_sizes=(64, 32))
model.summary()

Project details

These details have not been verified by PyPI

Project links

Homepage

Release history Release notifications | RSS feed

0.6.0

Jul 18, 2025

0.5.0

Jul 18, 2025

0.4.0

Jul 18, 2025

0.3.312

Jul 15, 2025

0.3.31

Jul 15, 2025

0.3.8

Jul 16, 2025

0.3.7

Jul 15, 2025

0.3.6

Jul 15, 2025

0.3.5

Jul 15, 2025

0.3.4

Jul 15, 2025

0.3.3

Jul 15, 2025

0.3.2

Jul 15, 2025

0.3.1

Jul 15, 2025

0.3.0

Jul 15, 2025

0.2.9

Jul 15, 2025

0.2.8

Jul 15, 2025

0.2.7

Jul 15, 2025

0.2.6

Jul 15, 2025

0.2.5

Jul 14, 2025

0.2.4

Jul 14, 2025

0.2.3

Jul 13, 2025

0.2.2

Jul 13, 2025

This version

0.2.1

Jul 13, 2025

0.2.0

Jul 13, 2025

0.1.8

Jul 13, 2025

0.1.7

Jul 13, 2025

0.1.5

Jul 13, 2025

0.1.4

Jul 13, 2025

0.1.3

Jul 13, 2025

0.1.2

Jul 13, 2025

0.1.1

Jul 13, 2025

0.1.0

Jul 13, 2025

Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

minimind-0.2.1.tar.gz (3.8 MB view details)

Uploaded Jul 13, 2025 Source

Built Distribution

If you're not sure about the file name format, learn more about wheel file names.

The dropdown lists show the available interpreters, ABIs, and platforms. Enable javascript to be able to filter the list of wheel files.

minimind-0.2.1-py3-none-any.whl (3.8 MB view details)

Uploaded Jul 13, 2025 Python 3

File details

Details for the file minimind-0.2.1.tar.gz.

File metadata

Download URL: minimind-0.2.1.tar.gz
Upload date: Jul 13, 2025
Size: 3.8 MB
Tags: Source
Uploaded using Trusted Publishing? No
Uploaded via: twine/6.1.0 CPython/3.12.2

File hashes

Hashes for minimind-0.2.1.tar.gz
Algorithm	Hash digest
SHA256	`13cb8dde5c137697daf55052ad1dd28a15ab56853409dc8ab7d6fd2f400e9061`
MD5	`71001efeb7f6abc55d5c98afb4389db7`
BLAKE2b-256	`28c2975505167705f12e2b7ff21098215243fbbbd3a203bc2e3c34a4fc32aaca`

See more details on using hashes here.

File details

Details for the file minimind-0.2.1-py3-none-any.whl.

File metadata

Download URL: minimind-0.2.1-py3-none-any.whl
Upload date: Jul 13, 2025
Size: 3.8 MB
Tags: Python 3
Uploaded using Trusted Publishing? No
Uploaded via: twine/6.1.0 CPython/3.12.2

File hashes

Hashes for minimind-0.2.1-py3-none-any.whl
Algorithm	Hash digest
SHA256	`76f00dae3981c4b3a2db4ac6058a614ffef87b5b48bed05bd4aa2eb98a3fd858`
MD5	`0fddfa136730245046e9f76bce8f7eca`
BLAKE2b-256	`07ef024c9f049f2f644a56ed960e34b0f2ced0fe05a2d488997a53a1725e2957`

See more details on using hashes here.

minimind 0.2.1

Navigation

Verified details

Maintainers

Unverified details

Project links

Meta

Classifiers

Project description

Project details

Verified details

Maintainers

Unverified details

Project links

Meta

Classifiers

Release history Release notifications | RSS feed

Download files

Source Distribution

Built Distribution

File details

File metadata

File hashes

File details

File metadata

File hashes