Tools to easily generate features using ftable
Project description
ffeature
下の方に日本語の説明があります
Overview
- Tools to easily generate features using ftable
Example usage
import ffeature
# Feature definition [ffeature]
@ffeature.add_feature("player_score")
def player_score_feature(rec, ftable_dic):
one_player = ftable_dic["player_ft"].cfilter("name", rec["name"])
if len(one_player.data) != 1: raise Exception("[error] player unique constraint error")
return one_player.data[0]["player_score"]
# Feature definition [ffeature]
@ffeature.add_feature("field")
def field_feature(rec, ftable_dic):
return rec["field"]
# When defining multiple types of features together, wrapping def in a for statement tends to cause problems with the external variable binding function (closure) when defining a function, so the following method is recommended: defining a function in the form of a "function to define features" and calling it from the outside.
def gen_simple_feature(key):
@ffeature.add_feature("simple_feature_%s"%key)
def field_feature(rec, ftable_dic):
return rec[key]
for key in ["field", "game_difficulty"]:
gen_simple_feature(key)
# List of tables used to create features
ftable_dic = {
"game_ft": ftable.FTable([
{"name": "taro", "game_difficulty": 1.2, "field": "A"},
{"name": "yusuke", "game_difficulty": 1.5, "field": "A"},
{"name": "yusuke", "game_difficulty": 1.3, "field": "B"},
{"name": "taro", "game_difficulty": 1.3, "field": "B"},
{"name": "taro", "game_difficulty": 1.0, "field": None},
]),
"player_ft": ftable.FTable([
{"name": "taro", "player_score": 120},
{"name": "yusuke", "player_score": 150},
])
}
# Create a feature table for the entire dataset (created according to the add_feature decorator) [ffeature]
feature_ft = ffeature.gen_feature_table(
ftable_dic = ftable_dic, # List of tables used to create features
rec_table = ftable_dic["game_ft"], # Table representing the records unit of the output ftable
sorted_keys = [] # Specification of ftable's sorted_keys
)
print(feature_ft)
# Process missing values [ffeature]
feature_ft = ffeature.handle_missing(
feature_ft,
mode = "delete", # delete: Skip rows with any missing values
missing_values = [None] # Values to be treated as missing values
)
print(feature_ft)
# Split data
rec_filter = lambda rec: (rec["field"] == "A")
partial_ft = ffeature.data_filter(feature_ft, rec_filter) # Extract records from ft that meet the condition [ffeature]
print(partial_ft)
概要
- ftableを使って特徴量を簡単に生成できるツール
使用例
import ffeature
# 特徴量定義 [ffeature]
@ffeature.add_feature("player_score")
def player_score_feature(rec, ftable_dic):
one_player = ftable_dic["player_ft"].cfilter("name", rec["name"])
if len(one_player.data) != 1: raise Exception("[error] player unique constraint error")
return one_player.data[0]["player_score"]
# 複数種類の特徴量をまとめて定義する際は、defをfor文にくるんでしまうと関数定義時の外部変数束縛機能 (closure機能) で不具合が起きやすいので、下記のように「特徴量を定義する関数」の形で定義して外から呼ぶ方法が推奨です。
def gen_simple_feature(key):
@ffeature.add_feature("simple_feature_%s"%key)
def field_feature(rec, ftable_dic):
return rec[key]
for key in ["field", "game_difficulty"]:
gen_simple_feature(key)
# 特徴量作成に利用するテーブルの一覧
ftable_dic = {
"game_ft": ftable.FTable([
{"name": "taro", "game_difficulty": 1.2, "field": "A"},
{"name": "yusuke", "game_difficulty": 1.5, "field": "A"},
{"name": "yusuke", "game_difficulty": 1.3, "field": "B"},
{"name": "taro", "game_difficulty": 1.3, "field": "B"},
{"name": "taro", "game_difficulty": 1.0, "field": None},
]),
"player_ft": ftable.FTable([
{"name": "taro", "player_score": 120},
{"name": "yusuke", "player_score": 150},
])
}
# 全量に対する特徴量テーブルを作成 (add_feature デコレータに従って作成) [ffeature]
feature_ft = ffeature.gen_feature_table(
ftable_dic = ftable_dic, # 特徴量作成に利用するテーブルの一覧
rec_table = ftable_dic["game_ft"], # 作成するデータのレコード単位を規定するテーブル
sorted_keys = [] # ftableのsorted_keysの指定
)
print(feature_ft)
# 欠損値を処理 [ffeature]
feature_ft = ffeature.handle_missing(
feature_ft,
mode = "delete", # delete: 1つでも欠損値がある行をスキップする
missing_values = [None] # 欠損値として扱う値
)
print(feature_ft)
# データの分割
rec_filter = lambda rec: (rec["field"] == "A")
partial_ft = ffeature.data_filter(feature_ft, rec_filter) # ftから条件を満たすレコードを抽出 [ffeature]
print(partial_ft)
Project details
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
ffeature-0.0.4.tar.gz
(5.0 kB
view details)
Built Distribution
File details
Details for the file ffeature-0.0.4.tar.gz
.
File metadata
- Download URL: ffeature-0.0.4.tar.gz
- Upload date:
- Size: 5.0 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/3.4.2 importlib_metadata/3.10.0 pkginfo/1.7.0 requests/2.22.0 requests-toolbelt/0.9.1 tqdm/4.64.1 CPython/3.8.8
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | ae7c3ec211dec47c719b7d529d3948a5d4252a19658c4b9b59bdf431ac21547e |
|
MD5 | 96d478a447ade56bb6c7ba6dec671b9f |
|
BLAKE2b-256 | 711d746cf14c391ed819a1389444b7aae1a8b8e5e14de0547b55925dbdf6bb69 |
File details
Details for the file ffeature-0.0.4-py3-none-any.whl
.
File metadata
- Download URL: ffeature-0.0.4-py3-none-any.whl
- Upload date:
- Size: 6.3 kB
- Tags: Python 3
- Uploaded using Trusted Publishing? No
- Uploaded via: twine/3.4.2 importlib_metadata/3.10.0 pkginfo/1.7.0 requests/2.22.0 requests-toolbelt/0.9.1 tqdm/4.64.1 CPython/3.8.8
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | f09b071e0811e7e5bc2b167efdad81a13aed8645a8f9840c20c3353f82cd288a |
|
MD5 | e8c9724b45d40e4d80cc51e2c9a34cb9 |
|
BLAKE2b-256 | 5f8cb18d0dbc66766dfd381f840c18069b91b5ddb13a5b6156734dc1dd725ec6 |