Mongodb documentand python dict similarity
Project description
This package now supporting closeness amoung python dicts
This is project is using to find relationship between mongodb documents
This will be the initial version of the project
How to use:
Install package with pip
pip install closeness
See the example,
from closeness.closeness_aggregation import ClosenessAggregation
from pymongo import MongoClient
client = MongoClient()
db = client.test_database
user_collection = db.user_collection
user1 = {
'name': 'User 1',
'age': 25,
'gender': 'male',
'tags': [
"tag1",
"tag2",
"tag3",
],
'friends': [
{"user_id": "friend1", 'name': "name1"},
{"user_id": "friend2", 'name': "name2"},
{"user_id": "friend3", 'name': "name3"},
]
}
user2 = {
'name': 'User 2',
'age': 25,
'gender': 'male',
'tags': [
"tag1",
"tag2",
"tag3",
],
'friends': [
{"user_id": "friend1", 'name': "name1"},
{"user_id": "friend2", 'name': "name2"},
{"user_id": "friend3", 'name': "name3"},
]
}
user3 = {
'name': 'User 3',
'age': 30,
'gender': 'female',
'tags': [
"tag1",
],
'friends': [
{"user_id": "friend3", 'name': "name3"},
]
}
user_collection.insert([user1, user2, user3])
query_stage = {'$match': {'name': {'$ne': user1['name']}}}
ARRAY_CMP_FIELDS = [
{
'field': 'tags',
'weight': 3
}
]
ARRAY_DICT_CMP_FIELDS = [
{
'field': 'friends',
'unique': 'user_id',
'weight': .5
}
]
STRING_CMP_FIELDS = [
{
'field': 'gender',
'weight': .5
}
]
NUM_CMP_FIELDS = [
{
'field': 'age',
'from': -1,
'to': 1,
'weight': .3
}
]
OUT_PUT_FIELDS = [
'name', 'age'
]
test = ClosenessAggregation(
user1,
query_stage,
OUT_PUT_FIELDS,
limit=10,
ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS,
STRING_CMP_FIELDS=STRING_CMP_FIELDS,
NUM_CMP_FIELDS=NUM_CMP_FIELDS,
ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS,
)
aggregation_query = test.get_aggregation_pipeline(
mode=ClosenessAggregation.FUZZY
)
result = user_collection.aggregate(aggregation_query)
# {u'ok': 1.0,
# u'result': [{u'age': 25,
# u'_id': ObjectId('55c894dcb67e20612cd6ddf0'),
# u'weights': [{u'gender': 11.627906976744187,
# u'age': 6.9767441860465125,
# u'friends': 11.626615417599819,
# u'tags': 69.75969250559892}],
# u'name': u'User 2',
# u'rank': 99.99095908598945},
# {u'age': 30,
# u'_id': ObjectId('55c894dcb67e20612cd6ddf1'),
# u'weights': [{u'gender': 0,
# u'age': 0,
# u'friends': 6.456076223518085,
# u'tags': 38.73645734110851}],
# u'name': u'User 3',
# u'rank': 45.1925335646266}]}
aggregation_query = closeness_obj.get_aggregation_pipeline(
mode=ClosenessAggregation.SIMPLE
)
result = user_collection.aggregate(aggregation_query)
# {u'ok': 1.0,
# u'result': [{u'age': 25,
# u'_id': ObjectId('55c894dcb67e20612cd6ddf3'),
# u'weights': [{u'gender': 11.627906976744187,
# u'age': 6.9767441860465125,
# u'friends': 11.627906976744187,
# u'tags': 69.76744186046513}],
# u'name': u'User 2',
# u'rank': 100.00000000000001},
# {u'age': 30,
# u'_id': ObjectId('55c894dcb67e20612cd6ddf4'),
# u'weights': [{u'gender': 0,
# u'age': 0,
# u'friends': 3.8759689922480622,
# u'tags': 23.255813953488374}],
# u'name': u'User 3',
# u'rank': 27.131782945736436}]}
# By using python dicts
users = [user2, user3]
closeness_dict_obj = ClosenessDict(
user1,
users,
ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS,
STRING_CMP_FIELDS=STRING_CMP_FIELDS,
NUM_CMP_FIELDS=NUM_CMP_FIELDS,
ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS,
)
result = closeness_dict_obj.execute(
mode=ClosenessDict.SIMPLE
)
self.assertEqual(
result[0]['closeness']['rank'],
100.00000000000001)
self.assertEqual(
result[1]['closeness']['rank'],
27.131782945736436)
# [{'name': 'User 2',
# 'tags': ['tag1',
# 'tag2',
# 'tag3'],
# 'gender': 'male',
# 'age': 25,
# 'closeness': {'weightages': {'gender': 11.627906976744187,
# 'age': 6.9767441860465125,
# 'friends': 11.627906976744187,
# 'tags': 69.76744186046513},
# 'rank': 100.00000000000001},
# 'friends': ['friend1',
# 'friend2',
# 'friend3']},
# {'name': 'User 3',
# 'tags': ['tag1'],
# 'gender': 'female',
# 'age': 30,
# 'closeness': {'weightages': {'gender': 0.0,
# 'age': 0.0,
# 'friends': 3.8759689922480622,
# 'tags': 23.255813953488374},
# 'rank': 27.131782945736436},
# 'friends': ['friend3']}]
result = closeness_dict_obj.execute(
mode=ClosenessDict.FUZZY
)
self.assertEqual(
result[0]['closeness']['rank'],
100.00000000000001)
self.assertEqual(
result[1]['closeness']['rank'],
45.21963824289406)
# [{'name': 'User 2',
# 'tags': ['tag1',
# 'tag2',
# 'tag3'],
# 'gender': 'male',
# 'age': 25,
# 'closeness': {'weightages': {'gender': 11.627906976744187,
# 'age': 6.9767441860465125,
# 'friends': 11.627906976744187,
# 'tags': 69.76744186046513},
# 'rank': 100.00000000000001},
# 'friends': ['friend1',
# 'friend2',
# 'friend3']},
# {'name': 'User 3',
# 'tags': ['tag1'],
# 'gender': 'female',
# 'age': 30,
# 'closeness': {'weightages': {'gender': 0.0,
# 'age': 0.0,
# 'friends': 6.459948320413436,
# 'tags': 38.75968992248062},
# 'rank': 45.21963824289406},
# 'friends': ['friend3']}]
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
closeness-2.0.2.tar.gz
(6.5 kB
view details)
File details
Details for the file closeness-2.0.2.tar.gz
.
File metadata
- Download URL: closeness-2.0.2.tar.gz
- Upload date:
- Size: 6.5 kB
- Tags: Source
- Uploaded using Trusted Publishing? No
File hashes
Algorithm | Hash digest | |
---|---|---|
SHA256 | 480d2672aea21f0627c444521bb333235be3f825d3c4e492cb23f63ee4f21d95 |
|
MD5 | c6affbe4a5c43f8b29e7181626f0665f |
|
BLAKE2b-256 | 199027ab51cce581e4706453871049a525ab677ca0ab04db652ea2ef61b43b13 |