# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals
import jubatus
import jubatus.embedded
from .base import GenericSchema, BaseDataset, BaseService, GenericConfig
from .compat import *
[ドキュメント]class Schema(GenericSchema):
"""
Schema for Recommender service.
"""
ID = 'i'
[ドキュメント] def __init__(self, mapping, fallback=None):
self._id_key = self._get_unique_mapping(mapping, fallback, self.ID, 'ID', True)
super(Schema, self).__init__(mapping, fallback)
[ドキュメント]class Dataset(BaseDataset):
"""
Dataset for Recommender service.
"""
@classmethod
def _predict(cls, row):
return Schema.predict(row, False)
[ドキュメント]class Recommender(BaseService):
"""
Recommender service.
"""
[ドキュメント] @classmethod
def name(cls):
return 'recommender'
@classmethod
def _client_class(cls):
return jubatus.recommender.client.Recommender
@classmethod
def _embedded_class(cls):
return jubatus.embedded.Recommender
[ドキュメント] def clear_row(self, dataset):
"""
Removes the given rows from the recommendation table.
"""
cli = self._client()
for (idx, (row_id, d)) in dataset:
if row_id is None:
raise RuntimeError('dataset must have `id`.')
result = cli.clear_row(row_id)
yield (idx, row_id, result)
[ドキュメント] def update_row(self, dataset):
"""
Update data points to the recommender model using the given dataset.
"""
cli = self._client()
for (idx, (row_id, d)) in dataset:
if row_id is None:
raise RuntimeError('datasets must have `id`')
result = cli.update_row(row_id, d)
yield (idx, row_id, result)
[ドキュメント] def complete_row_from_id(self, dataset):
"""
Returns data points from the row id in the recommender model,
with missing value completed by predicted value.
"""
cli = self._client()
for (idx, (row_id, d)) in dataset:
if row_id is None:
raise RuntimeError('Non ID-based datasets must use `complete_row_from_datum`')
result = cli.complete_row_from_id(row_id)
yield (idx, row_id, result)
[ドキュメント] def complete_row_from_datum(self, dataset):
"""
Returns data points from the datum in the recommender model,
with missing value completed by predicted value.
"""
cli = self._client()
for (idx, (row_id, d)) in dataset:
result = cli.complete_row_from_datum(d)
yield (idx, row_id, result)
[ドキュメント] def similar_row_from_id(self, dataset, size=10):
"""
Returns similar data points from the row id in the recommender model.
"""
cli = self._client()
for (idx, (row_id, d)) in dataset:
if row_id is None:
raise RuntimeError('Non ID-based datasets must use `similar_row_from_datum`')
result = cli.similar_row_from_id(row_id, size)
yield (idx, row_id, result)
[ドキュメント] def similar_row_from_id_and_score(self, dataset, score=0.8):
"""
Returns rows which are most similar to the row id and have a greater similarity score than score.
"""
cli = self._client()
for (idx, (row_id, d)) in dataset:
if row_id is None:
raise RuntimeError('Non ID-based datasets must use `similar_row_from_datum_and_score`')
result = cli.similar_row_from_id_and_score(row_id, score)
yield (idx, row_id, result)
[ドキュメント] def similar_row_from_id_and_rate(self, dataset, rate=0.1):
"""
Returns the top rate of all the rows which are most similar to the row id.
For example, return the top 10% of all the rows when 0.1 is specified as rate.
The rate must be in (0, 1].
"""
if rate <= 0.0 or 1.0 < rate:
raise ValueError('rate must be in (0, 1], but {}'.format(rate))
cli = self._client()
for (idx, (row_id, d)) in dataset:
if row_id is None:
raise RuntimeError('Non ID-based datasets must use `similar_row_from_datum_and_rate`')
result = cli.similar_row_from_id_and_rate(row_id, rate)
yield (idx, row_id, result)
[ドキュメント] def similar_row_from_datum(self, dataset, size=10):
"""
Returns similar data points from the datum in the recommender model.
"""
cli = self._client()
for (idx, (row_id, d)) in dataset:
result = cli.similar_row_from_datum(d, size)
yield (idx, row_id, result)
[ドキュメント] def similar_row_from_datum_and_score(self, dataset, score=0.8):
"""
Returns rows which are most similar to row and have a greater similarity score than score.
"""
cli = self._client()
for (idx, (row_id, d)) in dataset:
result = cli.similar_row_from_datum_and_score(d, score)
yield (idx, row_id, result)
[ドキュメント] def similar_row_from_datum_and_rate(self, dataset, rate=0.1):
"""
Returns the top rate of all the rows which are most similar to row.
For example, return the top 10% of all the rows when 0.1 is specified as rate.
The rate must be in (0, 1].
"""
if rate <= 0.0 or 1.0 < rate:
raise ValueError('rate must be in (0, 1], but {}'.format(rate))
cli = self._client()
for (idx, (row_id, d)) in dataset:
result = cli.similar_row_from_datum_and_rate(d, rate)
yield (idx, row_id, result)
[ドキュメント] def decode_row(self, dataset):
"""
Returns data points in the row id.
"""
cli = self._client()
for (idx, (row_id, d)) in dataset:
if row_id is None:
raise RuntimeError('Each data in datasets must has `row_id`')
result = cli.decode_row(row_id)
yield (idx, row_id, result)
[ドキュメント]class Config(GenericConfig):
"""
Configuration to run Recommender service.
"""
[ドキュメント] @classmethod
def methods(cls):
return ['lsh', 'euclid_lsh', 'minhash', 'inverted_index',
'inverted_index_euclid', 'nearest_neighbor_recommender']
@classmethod
def _default_method(cls):
return 'lsh'
@classmethod
def _default_parameter(cls, method):
if method in ('inverted_index', 'inverted_index_euclid'):
return None
elif method in ('minhash'):
return {
'hash_num': 128,
}
elif method in ('lsh', 'euclid_lsh'):
return {
'hash_num': 128,
'threads': -1,
}
elif method in ('nearest_neighbor_recommender'):
return {
'method': 'euclid_lsh',
'parameter': {
'threads': -1, # use number of logical CPU cores
'hash_num': 128,
},
}
else:
raise RuntimeError('unknown method: {0}'.format(method))