Skip to main content
This is a pre-production deployment of Warehouse. Changes made here affect the production instance of PyPI (pypi.python.org).
Help us improve Python packaging - Donate today!

Mongodb documentand python dict similarity

Project Description

This package now supporting closeness amoung python dicts

This is project is using to find relationship between mongodb documents

This will be the initial version of the project

How to use:

Install package with pip

pip install closeness

See the example,

from closeness.closeness_aggregation import ClosenessAggregation
from pymongo import MongoClient
client = MongoClient()
db = client.test_database
user_collection = db.user_collection
user1 = {
    'name': 'User 1',
    'age': 25,
    'gender': 'male',
    'tags': [
        "tag1",
        "tag2",
        "tag3",
    ],
    'friends': [
        {"user_id": "friend1", 'name': "name1"},
        {"user_id": "friend2", 'name': "name2"},
        {"user_id": "friend3", 'name': "name3"},
    ]
}
user2 = {
    'name': 'User 2',
    'age': 25,
    'gender': 'male',
    'tags': [
        "tag1",
        "tag2",
        "tag3",
    ],
    'friends': [
        {"user_id": "friend1", 'name': "name1"},
        {"user_id": "friend2", 'name': "name2"},
        {"user_id": "friend3", 'name': "name3"},
    ]
}
user3 = {
    'name': 'User 3',
    'age': 30,
    'gender': 'female',
    'tags': [
        "tag1",
    ],
    'friends': [
        {"user_id": "friend3", 'name': "name3"},
    ]
}
user_collection.insert([user1, user2, user3])
query_stage = {'$match': {'name': {'$ne': user1['name']}}}
ARRAY_CMP_FIELDS = [
    {
        'field': 'tags',
        'weight': 3
    }
]
ARRAY_DICT_CMP_FIELDS = [
    {
        'field': 'friends',
        'unique': 'user_id',
        'weight': .5
    }
]
STRING_CMP_FIELDS = [
    {
        'field': 'gender',
        'weight': .5
    }
]
NUM_CMP_FIELDS = [
    {
        'field': 'age',
        'from': -1,
        'to': 1,
        'weight': .3
    }
]
OUT_PUT_FIELDS = [
    'name', 'age'
]
test = ClosenessAggregation(
    user1,
    query_stage,
    OUT_PUT_FIELDS,
    limit=10,
    ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS,
    STRING_CMP_FIELDS=STRING_CMP_FIELDS,
    NUM_CMP_FIELDS=NUM_CMP_FIELDS,
    ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS,
)

aggregation_query = test.get_aggregation_pipeline(
    mode=ClosenessAggregation.FUZZY
)
result = user_collection.aggregate(aggregation_query)

# {u'ok': 1.0,
#  u'result': [{u'age': 25,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf0'),
#               u'weights': [{u'gender': 11.627906976744187,
#                             u'age': 6.9767441860465125,
#                             u'friends': 11.626615417599819,
#                             u'tags': 69.75969250559892}],
#               u'name': u'User 2',
#               u'rank': 99.99095908598945},
#              {u'age': 30,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf1'),
#               u'weights': [{u'gender': 0,
#                             u'age': 0,
#                             u'friends': 6.456076223518085,
#                             u'tags': 38.73645734110851}],
#               u'name': u'User 3',
#               u'rank': 45.1925335646266}]}


aggregation_query = closeness_obj.get_aggregation_pipeline(
    mode=ClosenessAggregation.SIMPLE
)

result = user_collection.aggregate(aggregation_query)

# {u'ok': 1.0,
#  u'result': [{u'age': 25,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf3'),
#               u'weights': [{u'gender': 11.627906976744187,
#                             u'age': 6.9767441860465125,
#                             u'friends': 11.627906976744187,
#                             u'tags': 69.76744186046513}],
#               u'name': u'User 2',
#               u'rank': 100.00000000000001},
#              {u'age': 30,
#               u'_id': ObjectId('55c894dcb67e20612cd6ddf4'),
#               u'weights': [{u'gender': 0,
#                             u'age': 0,
#                             u'friends': 3.8759689922480622,
#                             u'tags': 23.255813953488374}],
#               u'name': u'User 3',
#               u'rank': 27.131782945736436}]}



# By using python dicts


users = [user2, user3]

closeness_dict_obj = ClosenessDict(
    user1,
    users,
    ARRAY_CMP_FIELDS=ARRAY_CMP_FIELDS,
    STRING_CMP_FIELDS=STRING_CMP_FIELDS,
    NUM_CMP_FIELDS=NUM_CMP_FIELDS,
    ARRAY_DICT_CMP_FIELDS=ARRAY_DICT_CMP_FIELDS,
)

result = closeness_dict_obj.execute(
    mode=ClosenessDict.SIMPLE
)

self.assertEqual(
    result[0]['closeness']['rank'],
    100.00000000000001)
self.assertEqual(
    result[1]['closeness']['rank'],
    27.131782945736436)

# [{'name': 'User 2',
#   'tags': ['tag1',
#            'tag2',
#            'tag3'],
#   'gender': 'male',
#   'age': 25,
#   'closeness': {'weightages': {'gender': 11.627906976744187,
#                                'age': 6.9767441860465125,
#                                'friends': 11.627906976744187,
#                                'tags': 69.76744186046513},
#                 'rank': 100.00000000000001},
#   'friends': ['friend1',
#               'friend2',
#               'friend3']},
#  {'name': 'User 3',
#   'tags': ['tag1'],
#   'gender': 'female',
#   'age': 30,
#   'closeness': {'weightages': {'gender': 0.0,
#                                'age': 0.0,
#                                'friends': 3.8759689922480622,
#                                'tags': 23.255813953488374},
#                 'rank': 27.131782945736436},
#     'friends': ['friend3']}]


result = closeness_dict_obj.execute(
    mode=ClosenessDict.FUZZY
)

self.assertEqual(
    result[0]['closeness']['rank'],
    100.00000000000001)
self.assertEqual(
    result[1]['closeness']['rank'],
    45.21963824289406)

# [{'name': 'User 2',
#   'tags': ['tag1',
#            'tag2',
#            'tag3'],
#   'gender': 'male',
#   'age': 25,
#   'closeness': {'weightages': {'gender': 11.627906976744187,
#                                'age': 6.9767441860465125,
#                                'friends': 11.627906976744187,
#                                'tags': 69.76744186046513},
#                 'rank': 100.00000000000001},
#   'friends': ['friend1',
#               'friend2',
#               'friend3']},
#  {'name': 'User 3',
#   'tags': ['tag1'],
#   'gender': 'female',
#   'age': 30,
#   'closeness': {'weightages': {'gender': 0.0,
#                                'age': 0.0,
#                                'friends': 6.459948320413436,
#                                'tags': 38.75968992248062},
#                 'rank': 45.21963824289406},
#     'friends': ['friend3']}]
Release History

Release History

This version
History Node

2.0.2

History Node

2.0.1

History Node

2.0.0

History Node

1.5.2

History Node

1.5.1

History Node

1.5.0

History Node

1.3.0

History Node

1.2.95

History Node

1.2.94

History Node

1.2.93

History Node

1.2.92

History Node

1.2.91

History Node

1.2.9

History Node

1.2.8

History Node

1.2.7

History Node

1.2.6

History Node

1.2.5

History Node

1.2.4

History Node

1.2.3

History Node

1.2.2

History Node

1.2.1

History Node

1.2.0

History Node

1.1.1

History Node

1.1.0

History Node

1.0.0

Download Files

Download Files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

File Name & Checksum SHA256 Checksum Help Version File Type Upload Date
closeness-2.0.2.tar.gz (6.5 kB) Copy SHA256 Checksum SHA256 Source May 18, 2016

Supported By

WebFaction WebFaction Technical Writing Elastic Elastic Search Pingdom Pingdom Monitoring Dyn Dyn DNS Sentry Sentry Error Logging CloudAMQP CloudAMQP RabbitMQ Heroku Heroku PaaS Kabu Creative Kabu Creative UX & Design Fastly Fastly CDN DigiCert DigiCert EV Certificate Rackspace Rackspace Cloud Servers DreamHost DreamHost Log Hosting