Skip to main content

Tools to easily generate features using ftable

Project description

ffeature

下の方に日本語の説明があります

Overview

  • Tools to easily generate features using ftable

Example usage

import ffeature

# Feature definition [ffeature]
@ffeature.add_feature("player_score")
def player_score_feature(rec, ftable_dic):
	one_player = ftable_dic["player_ft"].cfilter("name", rec["name"])
	if len(one_player.data) != 1: raise Exception("[error] player unique constraint error")
	return one_player.data[0]["player_score"]

# Feature definition [ffeature]
@ffeature.add_feature("field")
def field_feature(rec, ftable_dic):
	return rec["field"]

# When defining multiple types of features together, wrapping def in a for statement tends to cause problems with the external variable binding function (closure) when defining a function, so the following method is recommended: defining a function in the form of a "function to define features" and calling it from the outside.
def gen_simple_feature(key):
	@ffeature.add_feature("simple_feature_%s"%key)
	def field_feature(rec, ftable_dic):
		return rec[key]

for key in ["field", "game_difficulty"]:
	gen_simple_feature(key)

# List of tables used to create features
ftable_dic = {
	"game_ft": ftable.FTable([
		{"name": "taro", "game_difficulty": 1.2, "field": "A"},
		{"name": "yusuke", "game_difficulty": 1.5, "field": "A"},
		{"name": "yusuke", "game_difficulty": 1.3, "field": "B"},
		{"name": "taro", "game_difficulty": 1.3, "field": "B"},
		{"name": "taro", "game_difficulty": 1.0, "field": None},
	]),
	"player_ft": ftable.FTable([
		{"name": "taro", "player_score": 120},
		{"name": "yusuke", "player_score": 150},
	])
}

# Create a feature table for the entire dataset (created according to the add_feature decorator) [ffeature]
feature_ft = ffeature.gen_feature_table(
	ftable_dic = ftable_dic,	# List of tables used to create features
	rec_table = ftable_dic["game_ft"],	# Table representing the records unit of the output ftable
	sorted_keys = []	# Specification of ftable's sorted_keys
)
print(feature_ft)

# Process missing values [ffeature]
feature_ft = ffeature.handle_missing(
	feature_ft,
	mode = "delete",	# delete: Skip rows with any missing values
	missing_values = [None]	# Values to be treated as missing values
)
print(feature_ft)

# Split data
rec_filter = lambda rec: (rec["field"] == "A")
partial_ft = ffeature.data_filter(feature_ft, rec_filter)	# Extract records from ft that meet the condition [ffeature]
print(partial_ft)

概要

  • ftableを使って特徴量を簡単に生成できるツール

使用例

import ffeature

# 特徴量定義 [ffeature]
@ffeature.add_feature("player_score")
def player_score_feature(rec, ftable_dic):
	one_player = ftable_dic["player_ft"].cfilter("name", rec["name"])
	if len(one_player.data) != 1: raise Exception("[error] player unique constraint error")
	return one_player.data[0]["player_score"]

# 複数種類の特徴量をまとめて定義する際は、defをfor文にくるんでしまうと関数定義時の外部変数束縛機能 (closure機能) で不具合が起きやすいので、下記のように「特徴量を定義する関数」の形で定義して外から呼ぶ方法が推奨です。
def gen_simple_feature(key):
	@ffeature.add_feature("simple_feature_%s"%key)
	def field_feature(rec, ftable_dic):
		return rec[key]

for key in ["field", "game_difficulty"]:
	gen_simple_feature(key)

# 特徴量作成に利用するテーブルの一覧
ftable_dic = {
	"game_ft": ftable.FTable([
		{"name": "taro", "game_difficulty": 1.2, "field": "A"},
		{"name": "yusuke", "game_difficulty": 1.5, "field": "A"},
		{"name": "yusuke", "game_difficulty": 1.3, "field": "B"},
		{"name": "taro", "game_difficulty": 1.3, "field": "B"},
		{"name": "taro", "game_difficulty": 1.0, "field": None},
	]),
	"player_ft": ftable.FTable([
		{"name": "taro", "player_score": 120},
		{"name": "yusuke", "player_score": 150},
	])
}

# 全量に対する特徴量テーブルを作成 (add_feature デコレータに従って作成) [ffeature]
feature_ft = ffeature.gen_feature_table(
	ftable_dic = ftable_dic,	# 特徴量作成に利用するテーブルの一覧
	rec_table = ftable_dic["game_ft"],	# 作成するデータのレコード単位を規定するテーブル
	sorted_keys = []	# ftableのsorted_keysの指定
)
print(feature_ft)

# 欠損値を処理 [ffeature]
feature_ft = ffeature.handle_missing(
	feature_ft,
	mode = "delete",	# delete: 1つでも欠損値がある行をスキップする
	missing_values = [None]	# 欠損値として扱う値
)
print(feature_ft)

# データの分割
rec_filter = lambda rec: (rec["field"] == "A")
partial_ft = ffeature.data_filter(feature_ft, rec_filter)	# ftから条件を満たすレコードを抽出 [ffeature]
print(partial_ft)

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

ffeature-0.0.4.tar.gz (5.0 kB view hashes)

Uploaded Source

Built Distribution

ffeature-0.0.4-py3-none-any.whl (6.3 kB view hashes)

Uploaded Python 3

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page