Skip to main content

Mongodb documentand python dict similarity

Project description

This package now supporting closeness amoung python dicts

This is project is using to find relationship between mongodb documents

This will be the initial version of the project

How to use:

Install package with pip

pip install closeness

See the example,

from closeness.closeness_aggregation import ClosenessAggregation
from pymongo import MongoClient
client = MongoClient()
db = client.test_database
user_collection = db.user_collection
user1 = {
    'name': 'User 1',
    'age': 25,
    'gender': 'male',
    'tags': [
        "tag1",
        "tag2",
        "tag3",
    ],
    'friends': [
        {"user_id": "friend1", 'name': "name1"},
        {"user_id": "friend2", 'name': "name2"},
        {"user_id": "friend3", 'name': "name3"},
    ]
}
user2 = {
    'name': 'User 2',
    'age': 25,
    'gender': 'male',
    'tags': [
        "tag1",
        "tag2",
        "tag3",
    ],
    'friends': [
        {"user_id": "friend1", 'name': "name1"},
        {"user_id": "friend2", 'name': "name2"},
        {"user_id": "friend3", 'name': "name3"},
    ]
}
user3 = {
    'name': 'User 3',
    'age': 30,
    'gender': 'female',
    'tags': [
        "tag1",
    ],
    'friends': [
        {"user_id": "friend3", 'name': "name3"},
    ]
}
user_collection.insert([user1, user2, user3])
query_stage = {'$match': {'name': {'$ne': user1['name']}}}
ARRAY_CMP_FIELDS = [
    {
        'field': 'tags',
        'weight': 3
    }
]
ARRAY_DICT_CMP_FIELDS = [
    {
        'field': 'friends',
        'unique': 'user_id',
        'weight': .5
    }
]
STRING_CMP_FIELDS = [
    {
        'field': 'gender',
        'weight': .5
    }
]
NUM_CMP_FIELDS = [
    {
        'field': 'age',
        'from': -1,
        'to': 1,
        'weight': .3
    }
]
OUT_PUT_FIELDS = [
    'name', 'age'
]
test = ClosenessAggregation(
    user1,
    query_stage,
    OUT_PUT_FIELDS,
    limit=10,
    ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS,
    STRING_CMP_FIELDS=STRING_CMP_FIELDS,
    NUM_CMP_FIELDS=NUM_CMP_FIELDS,
    ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS,
)

aggregation_query = test.get_aggregation_pipeline(
    mode=ClosenessAggregation.FUZZY
)
result = user_collection.aggregate(aggregation_query)

# {u'ok': 1.0,
#  u'result': [{u'age': 25,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf0'),
#               u'weights': [{u'gender': 11.627906976744187,
#                             u'age': 6.9767441860465125,
#                             u'friends': 11.626615417599819,
#                             u'tags': 69.75969250559892}],
#               u'name': u'User 2',
#               u'rank': 99.99095908598945},
#              {u'age': 30,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf1'),
#               u'weights': [{u'gender': 0,
#                             u'age': 0,
#                             u'friends': 6.456076223518085,
#                             u'tags': 38.73645734110851}],
#               u'name': u'User 3',
#               u'rank': 45.1925335646266}]}


aggregation_query = closeness_obj.get_aggregation_pipeline(
    mode=ClosenessAggregation.SIMPLE
)

result = user_collection.aggregate(aggregation_query)

# {u'ok': 1.0,
#  u'result': [{u'age': 25,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf3'),
#               u'weights': [{u'gender': 11.627906976744187,
#                             u'age': 6.9767441860465125,
#                             u'friends': 11.627906976744187,
#                             u'tags': 69.76744186046513}],
#               u'name': u'User 2',
#               u'rank': 100.00000000000001},
#              {u'age': 30,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf4'),
#               u'weights': [{u'gender': 0,
#                             u'age': 0,
#                             u'friends': 3.8759689922480622,
#                             u'tags': 23.255813953488374}],
#               u'name': u'User 3',
#               u'rank': 27.131782945736436}]}



# By using python dicts


users = [user2, user3]

closeness_dict_obj = ClosenessDict(
    user1,
    users,
    ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS,
    STRING_CMP_FIELDS=STRING_CMP_FIELDS,
    NUM_CMP_FIELDS=NUM_CMP_FIELDS,
    ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS,
)

result = closeness_dict_obj.execute(
    mode=ClosenessDict.SIMPLE
)

self.assertEqual(
    result[0]['closeness']['rank'],
    100.00000000000001)
self.assertEqual(
    result[1]['closeness']['rank'],
    27.131782945736436)

# [{'name': 'User 2',
#   'tags': ['tag1',
#            'tag2',
#            'tag3'],
#   'gender': 'male',
#   'age': 25,
#   'closeness': {'weightages': {'gender': 11.627906976744187,
#                                'age': 6.9767441860465125,
#                                'friends': 11.627906976744187,
#                                'tags': 69.76744186046513},
#                 'rank': 100.00000000000001},
#   'friends': ['friend1',
#               'friend2',
#               'friend3']},
#  {'name': 'User 3',
#   'tags': ['tag1'],
#   'gender': 'female',
#   'age': 30,
#   'closeness': {'weightages': {'gender': 0.0,
#                                'age': 0.0,
#                                'friends': 3.8759689922480622,
#                                'tags': 23.255813953488374},
#                 'rank': 27.131782945736436},
#     'friends': ['friend3']}]


result = closeness_dict_obj.execute(
    mode=ClosenessDict.FUZZY
)

self.assertEqual(
    result[0]['closeness']['rank'],
    100.00000000000001)
self.assertEqual(
    result[1]['closeness']['rank'],
    45.21963824289406)

# [{'name': 'User 2',
#   'tags': ['tag1',
#            'tag2',
#            'tag3'],
#   'gender': 'male',
#   'age': 25,
#   'closeness': {'weightages': {'gender': 11.627906976744187,
#                                'age': 6.9767441860465125,
#                                'friends': 11.627906976744187,
#                                'tags': 69.76744186046513},
#                 'rank': 100.00000000000001},
#   'friends': ['friend1',
#               'friend2',
#               'friend3']},
#  {'name': 'User 3',
#   'tags': ['tag1'],
#   'gender': 'female',
#   'age': 30,
#   'closeness': {'weightages': {'gender': 0.0,
#                                'age': 0.0,
#                                'friends': 6.459948320413436,
#                                'tags': 38.75968992248062},
#                 'rank': 45.21963824289406},
#     'friends': ['friend3']}]

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Files for closeness, version 2.0.2
Filename, size File type Python version Upload date Hashes
Filename, size closeness-2.0.2.tar.gz (6.5 kB) File type Source Python version None Upload date Hashes View

Supported by

AWS AWS Cloud computing Datadog Datadog Monitoring Facebook / Instagram Facebook / Instagram PSF Sponsor Fastly Fastly CDN Google Google Object Storage and Download Analytics Huawei Huawei PSF Sponsor Microsoft Microsoft PSF Sponsor NVIDIA NVIDIA PSF Sponsor Pingdom Pingdom Monitoring Salesforce Salesforce PSF Sponsor Sentry Sentry Error logging StatusPage StatusPage Status page