jubakit.recommender のソースコード

# -*- coding: utf-8 -*-

from __future__ import absolute_import, division, print_function, unicode_literals

import jubatus
import jubatus.embedded

from .base import GenericSchema, BaseDataset, BaseService, GenericConfig
from .compat import *

[ドキュメント]class Schema(GenericSchema): """ Schema for Recommender service. """ ID = 'i'
[ドキュメント] def __init__(self, mapping, fallback=None): self._id_key = self._get_unique_mapping(mapping, fallback, self.ID, 'ID', True) super(Schema, self).__init__(mapping, fallback)
[ドキュメント] def transform(self, row): """ Recommender schema transforms the row into Datum, its associated ID. """ row_id = row.get(self._id_key, None) if row_id is not None: row_id = unicode_t(row_id) d = self._transform_as_datum(row, None, [self._id_key]) return (row_id, d)
[ドキュメント]class Dataset(BaseDataset): """ Dataset for Recommender service. """ @classmethod def _predict(cls, row): return Schema.predict(row, False)
[ドキュメント]class Recommender(BaseService): """ Recommender service. """
[ドキュメント] @classmethod def name(cls): return 'recommender'
@classmethod def _client_class(cls): return jubatus.recommender.client.Recommender @classmethod def _embedded_class(cls): return jubatus.embedded.Recommender
[ドキュメント] def clear_row(self, dataset): """ Removes the given rows from the recommendation table. """ cli = self._client() for (idx, (row_id, d)) in dataset: if row_id is None: raise RuntimeError('dataset must have `id`.') result = cli.clear_row(row_id) yield (idx, row_id, result)
[ドキュメント] def update_row(self, dataset): """ Update data points to the recommender model using the given dataset. """ cli = self._client() for (idx, (row_id, d)) in dataset: if row_id is None: raise RuntimeError('datasets must have `id`') result = cli.update_row(row_id, d) yield (idx, row_id, result)
[ドキュメント] def complete_row_from_id(self, dataset): """ Returns data points from the row id in the recommender model, with missing value completed by predicted value. """ cli = self._client() for (idx, (row_id, d)) in dataset: if row_id is None: raise RuntimeError('Non ID-based datasets must use `complete_row_from_datum`') result = cli.complete_row_from_id(row_id) yield (idx, row_id, result)
[ドキュメント] def complete_row_from_datum(self, dataset): """ Returns data points from the datum in the recommender model, with missing value completed by predicted value. """ cli = self._client() for (idx, (row_id, d)) in dataset: result = cli.complete_row_from_datum(d) yield (idx, row_id, result)
[ドキュメント] def similar_row_from_id(self, dataset, size=10): """ Returns similar data points from the row id in the recommender model. """ cli = self._client() for (idx, (row_id, d)) in dataset: if row_id is None: raise RuntimeError('Non ID-based datasets must use `similar_row_from_datum`') result = cli.similar_row_from_id(row_id, size) yield (idx, row_id, result)
[ドキュメント] def similar_row_from_id_and_score(self, dataset, score=0.8): """ Returns rows which are most similar to the row id and have a greater similarity score than score. """ cli = self._client() for (idx, (row_id, d)) in dataset: if row_id is None: raise RuntimeError('Non ID-based datasets must use `similar_row_from_datum_and_score`') result = cli.similar_row_from_id_and_score(row_id, score) yield (idx, row_id, result)
[ドキュメント] def similar_row_from_id_and_rate(self, dataset, rate=0.1): """ Returns the top rate of all the rows which are most similar to the row id. For example, return the top 10% of all the rows when 0.1 is specified as rate. The rate must be in (0, 1]. """ if rate <= 0.0 or 1.0 < rate: raise ValueError('rate must be in (0, 1], but {}'.format(rate)) cli = self._client() for (idx, (row_id, d)) in dataset: if row_id is None: raise RuntimeError('Non ID-based datasets must use `similar_row_from_datum_and_rate`') result = cli.similar_row_from_id_and_rate(row_id, rate) yield (idx, row_id, result)
[ドキュメント] def similar_row_from_datum(self, dataset, size=10): """ Returns similar data points from the datum in the recommender model. """ cli = self._client() for (idx, (row_id, d)) in dataset: result = cli.similar_row_from_datum(d, size) yield (idx, row_id, result)
[ドキュメント] def similar_row_from_datum_and_score(self, dataset, score=0.8): """ Returns rows which are most similar to row and have a greater similarity score than score. """ cli = self._client() for (idx, (row_id, d)) in dataset: result = cli.similar_row_from_datum_and_score(d, score) yield (idx, row_id, result)
[ドキュメント] def similar_row_from_datum_and_rate(self, dataset, rate=0.1): """ Returns the top rate of all the rows which are most similar to row. For example, return the top 10% of all the rows when 0.1 is specified as rate. The rate must be in (0, 1]. """ if rate <= 0.0 or 1.0 < rate: raise ValueError('rate must be in (0, 1], but {}'.format(rate)) cli = self._client() for (idx, (row_id, d)) in dataset: result = cli.similar_row_from_datum_and_rate(d, rate) yield (idx, row_id, result)
[ドキュメント] def decode_row(self, dataset): """ Returns data points in the row id. """ cli = self._client() for (idx, (row_id, d)) in dataset: if row_id is None: raise RuntimeError('Each data in datasets must has `row_id`') result = cli.decode_row(row_id) yield (idx, row_id, result)
[ドキュメント]class Config(GenericConfig): """ Configuration to run Recommender service. """
[ドキュメント] @classmethod def methods(cls): return ['lsh', 'euclid_lsh', 'minhash', 'inverted_index', 'inverted_index_euclid', 'nearest_neighbor_recommender']
@classmethod def _default_method(cls): return 'lsh' @classmethod def _default_parameter(cls, method): if method in ('inverted_index', 'inverted_index_euclid'): return None elif method in ('minhash'): return { 'hash_num': 128, } elif method in ('lsh', 'euclid_lsh'): return { 'hash_num': 128, 'threads': -1, } elif method in ('nearest_neighbor_recommender'): return { 'method': 'euclid_lsh', 'parameter': { 'threads': -1, # use number of logical CPU cores 'hash_num': 128, }, } else: raise RuntimeError('unknown method: {0}'.format(method))