Skip to main content

Library for the implementation of projects with the help of Case Based Reasoning

Project description

Case Based Reasoning library for Python - cbrlib

Installation

pip install cbrlib
import matplotlib.pyplot as plt
%matplotlib inline

from cbrlib.reasoning.Reasoner import InferenceOptions
from cbrlib.utils.serializer import to_json_string
from cbrlib.evaluation.AssemblyAverageEvaluator import AssemblyAverageEvaluator
from cbrlib.evaluation.LookupTableEvaluator import LookupTableEvaluator
from cbrlib.evaluation.NumberInterpolationEvaluator import NumberInterpolationMetrics, NumberInterpolation, \
    NumberInterpolationEvaluator
from cbrlib.model.AssemblyClass import AssemblyClass
from cbrlib.model.Attribute import Attribute
from cbrlib.model.EnumerationPredicate import EnumerationPredicate
from cbrlib.model.FloatClass import FloatClass
from cbrlib.model.IntegerClass import IntegerClass
from cbrlib.model.RangePredicate import RangePredicate

from cbrlib.model.StringClass import StringClass
from cbrlib.reasoning.LinearReasoner import LinearReasoner

from functools import reduce

from random import choice, randint
import timeit

#plt.style.use('ggplot')

NumberInterpolationEvaluator

We create a price_class from cbrlib.model.FloatClass and set the predicate for this class to be of cbrlib.model.RangePredicate which gives the boundary of a minimum of 10 and a miximum possible value of 10000. We also define different number interpolation evaluators.

# Define the price class with minimum value of 10 and maximum value of 100000
price_class = FloatClass('Price')
min_ = price_class.create_object(10)
max_ = price_class.create_object(100000)
price_class.set_predicate(RangePredicate(min_, max_))

# Define inperpolation metrics with a tolerance if case is lesser than query of 0
# -> Every case lower than the query will have 0 similarity
metrics = NumberInterpolationMetrics()
metrics.tolerance_if_more = 0.0
less_is_good = NumberInterpolationEvaluator('LessIsGood', min_.get_value(), max_.get_value(), metrics)

# Define the same as above but use an origin value
# The lower the query the smaller is the vicinity
metrics = NumberInterpolationMetrics()
metrics.tolerance_if_more = 0.0
metrics.origin = 10
metrics.use_origin = True
metrics.tolerance_if_more = 0.0
less_is_good_with_origin = NumberInterpolationEvaluator('LessIsGoodUseOrigin',
                                                        min_.get_value(), max_.get_value(), metrics)

metrics = NumberInterpolationMetrics()
metrics.tolerance_if_less = 0.0
metrics.origin = min_.get_value()
metrics.use_origin = True
metrics.tolerance_if_less = 0.3
metrics.tolerance_if_more = 0.1
metrics.linearity_if_less = 0.5
metrics.linearity_if_more = 3
metrics.set_interpolation_if_less(NumberInterpolation.Sigmoid)
metrics.set_interpolation_if_more(NumberInterpolation.Sigmoid)
real_behaviour = NumberInterpolationEvaluator('RealBehaviour',
                                                        min_.get_value(), max_.get_value(), metrics)

Let's generate some lists of values from the different evaluators.

query = price_class.read_object(500)

less_is_good_values = list()
less_is_good_with_origin_values = list()
real_behaviour_values = list()
steps = list()
for i in range(300, 600):
    steps.append(i)
    case = price_class.read_object(i)
    less_is_good_values.append(
        less_is_good.evaluate(query, case))
    less_is_good_with_origin_values.append(
        less_is_good_with_origin.evaluate(query, case))
    real_behaviour_values.append(real_behaviour.evaluate(query, case))

To understand the evaluators, we will now visualize them with the help of matplotlib.

figure = plt.figure(2, figsize=(20, 9.6))
plt1 = figure.add_subplot(221)
plt2 = figure.add_subplot(222)
plt3 = figure.add_subplot(223)

plt1.plot(
    steps, less_is_good_values
)
plt1.set_title('Less is good')
plt1.set_xlabel(price_class.get_id())
plt1.set_ylabel('Similarity')
plt2.plot(
    steps, less_is_good_with_origin_values
)
plt2.set_title('Less is good with use of an origin')
plt2.set_xlabel(price_class.get_id())
plt2.set_ylabel('Similarity')

plt3.plot(
    steps, real_behaviour_values
)
plt3.set_title('Real behaviour')
plt3.set_xlabel(price_class.get_id())
plt3.set_ylabel('Similarity')

plt.show()

png

Next we will add the class mileage_class, it will be of the type IntegerClass and will have the boundary of 0 and 1000000.

mileage_class = IntegerClass('Mileage')
mileage_min = 0
mileage_max = 1000000
mileage_class.set_predicate(RangePredicate(mileage_class.create_object(mileage_min),
                                           mileage_class.create_object(mileage_max)))

mileage_evaluator = NumberInterpolationEvaluator('MileageDefault', mileage_min, mileage_max)

query = mileage_class.read_object(mileage_max / 2) # Query value is 500000

mileage_values = list()
steps = list()
for i in range(mileage_min, mileage_max, int(mileage_max / 1000)):
    steps.append(i)
    mileage_values.append(mileage_evaluator.evaluate(query, mileage_class.read_object(i)))

We can take a look at the standard metrics of a NumberInterpolationMetrics object.

plt.plot(steps, mileage_values)
plt.show()

png

LookupTableEvaluator

We also can define a brands_class of Type cbrlib.model.StringClass.

brand_class = StringClass('Brand')
brands = ['Opel', 'Daimler Benz', 'BMW', 'VW']
brand_class.set_predicate(EnumerationPredicate(
                            map(lambda b: brand_class.create_object(b), brands)))

brand_similarity = {
    'Opel': {
        'VW': 0.6,
        'BMW': 0.3,
        'Daimler Benz': 0.1
    },
    'Daimler Benz': {
        'BMW': 0.8,
        'VW': 0.4,
        'Opel': 0.1
    },
    'BMW': {
        'Daimler Benz': 0.8,
        'VW': 0.6,
        'Opel': 0.1
    },
    'VW': {
        'BMW': 0.8,
        'Daimler Benz': 0.8,
        'Opel': 0.6,
    }
}
brand_evaluator = LookupTableEvaluator('Brand', brand_similarity)

If we want to know how similar the Brand BMW to VW is, we now can ask the brand evaluator:

print(f'BMW is {brand_evaluator.evaluate(brand_class.read_object("BMW"), brand_class.read_object("VW")) * 100}% similar to VW')
BMW is 60.0% similar to VW

AssemblyAverageEvaluator

Or we can use the cbrlib.model.AssemblyClass to define an UsedCar descriptor and find a used car with a cbrlib.reasoner.LinearReasoner

used_car_class = AssemblyClass('UsedCar', [
        Attribute('price', price_class),
        Attribute('mileage', mileage_class),
        Attribute('brand', brand_class)
    ])

used_car_evaluator = AssemblyAverageEvaluator('UsedCarEvaluator',
                             {
                                'price': {
                                    'weight': 2, # The price is twice as important than the rest
                                    'evaluator': real_behaviour
                                    },
                                'mileage': {
                                    'weight': 1,
                                    'evaluator': mileage_evaluator
                                },
                                'brand': {
                                    'weight': 1,
                                    'evaluator': brand_evaluator
                                }
                            })

cars = [
    {
        'brand': 'Opel',
        'price': 6600,
        'mileage': 68000
    },
    {
        'brand': 'BMW',
        'price': 8600,
        'mileage': 110000
    },
    {
        'brand': 'Daimler Benz',
        'price': 11000,
        'mileage': 21000
    },
    {
        'brand': 'VW',
        'price': 2799,
        'mileage': 550000
    },
]
reasoner = LinearReasoner(
    map(lambda c: used_car_class.read_object(c), cars), used_car_evaluator)

# Ask the reasoner now for a used car with the price of 7000 and a mileage of 75000

candidates = reasoner.infer(used_car_class.read_object({
    'price': 7000,
    'mileage': 120000
}))
print(to_json_string(candidates, indent=4))
[
    {
        "similarity": 0.9168199170617668,
        "case": {
            "brand": "Opel",
            "mileage": 68000,
            "price": 6600.0
        }
    },
    {
        "similarity": 0.32666666666666666,
        "case": {
            "brand": "BMW",
            "mileage": 110000,
            "price": 8600.0
        }
    },
    {
        "similarity": 0.26733333333333337,
        "case": {
            "brand": "Daimler Benz",
            "mileage": 21000,
            "price": 11000.0
        }
    },
    {
        "similarity": 0.04666666666666667,
        "case": {
            "brand": "VW",
            "mileage": 550000,
            "price": 2799.0
        }
    }
]

Performance

Let us now create a cbrlib.reasoner.Reasoner from cbrlib.reasoner.LinearReasoner with a set of 1000000 random used cars.

used_car_class = AssemblyClass('UsedCar', [
        Attribute('price', price_class),
        Attribute('mileage0', mileage_class), # We create multiple number classes
        Attribute('mileage1', mileage_class), # to make some 'complex' description
        Attribute('mileage2', mileage_class), # for performance test.
        Attribute('mileage3', mileage_class),
        Attribute('mileage4', mileage_class),
        Attribute('brand', brand_class)
    ])
cars = []
for c in range(1000000):
    cars.append({
        'price': randint(10, 100000),
        'mileage0': randint(0, 1000000),
        'mileage1': randint(0, 1000000),
        'mileage2': randint(0, 1000000),
        'mileage3': randint(0, 1000000),
        'mileage4': randint(0, 1000000),
        'brand': choice(brands)
    })
reasoner = LinearReasoner(
    map(lambda c: used_car_class.read_object(c), cars),
    AssemblyAverageEvaluator('UsedCarEvaluator',
                             {
                                'price': {
                                    'weight': 2,
                                    'evaluator': real_behaviour
                                    },
                                'mileage': {
                                    'weight': 1,
                                    'evaluator': mileage_evaluator
                                },
                                'brand': {
                                    'weight': 1,
                                    'evaluator': brand_evaluator
                                }
                            }))

price = randint(10, 100000)      # Also lets create
mileage0 = randint(0, 1000000)   # create random query values
mileage1 = randint(0, 1000000)
mileage2 = randint(0, 1000000)
mileage3 = randint(0, 1000000)
mileage4 = randint(0, 1000000)
brand = choice(brands)

loops = 100000
repetitions = 5
task = timeit.Timer(lambda: reasoner.infer(
    used_car_class.read_object({
        'price': price,
        'mileage0': mileage0,
        'mileage1': mileage1,
        'mileage2': mileage2,
        'mileage3': mileage3,
        'mileage4': mileage4,
        'brand': brand
    }), InferenceOptions({
        'skip': 0,
        'limit': 10
    })
    ))

measures = task.repeat(repetitions, loops)
best = reduce(lambda c, n: min(c, n), measures)

# Caution it runs about a minute!
print(f'{loops} loops, best of {repetitions}: {best*1000} ms per loop')
100000 loops, best of 5: 1851.9436290080193 ms per loop

Project details


Download files

Download the file for your platform. If you're not sure which to choose, learn more about installing packages.

Source Distribution

cbrlib-1.1.1.tar.gz (67.0 kB view hashes)

Uploaded Source

Built Distribution

cbrlib-1.1.1-py3-none-any.whl (36.2 kB view hashes)

Uploaded Python 3

Supported by

AWS AWS Cloud computing and Security Sponsor Datadog Datadog Monitoring Fastly Fastly CDN Google Google Download Analytics Microsoft Microsoft PSF Sponsor Pingdom Pingdom Monitoring Sentry Sentry Error logging StatusPage StatusPage Status page