Source code for rsd.graph

#
# graph.py
#
# Copyright (c) 2016 Junpei Kawamoto
#
# This file is part of rgmining-rsd.
#
# rgmining-rsd is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# rgmining-rsd is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
#
"""Implementation of RSD.
"""
from __future__ import absolute_import
import networkx as nx
import numpy as np

from common import memoized


def _scale(v):
    """Scaling a given value.

    The output is defined by

    .. math::

        {\\rm scale}(v) = \\frac{2}{1 + \\exp(-v)} - 1

    Args:
      v: Input value.

    Returns:
      Output value defined above.
    """
    e = np.exp(-v)
    return 2. / (1. + e) - 1.


class _Node(object):
    """Abstract class of review graph.

    Args:
      graph: the graph object this node will belong to.
      name: name of this node.

    Attributes:
      name: name of this node.
    """
    __slots__ = ("_g", "name")

    def __init__(self, graph, name=None):
        if not isinstance(graph, ReviewGraph):
            raise ValueError(
                "Given graph isn't an instance of ReviewGraph:", graph)

        self._g = graph
        if name:
            self.name = name
        else:
            self.name = super(_Node, self).__str__()

    def __eq__(self, other):
        if not isinstance(other, type(self)):
            return False
        return self.name == other.name

    def __hash__(self):
        return 13 * hash(type(self)) + 17 * hash(self.name)


[docs]class Reviewer(_Node): """A node class representing a reviewer. Args: graph: Graph object this reviewer belongs to. name: Name of this reviewer. anomalous: Initial anomalous score (default: None). Attributes: trustiness: a float value in [0, 1] which represents trustiness of this reviewer. """ def __init__(self, graph, name=None, anomalous=None): super(Reviewer, self).__init__(graph, name) # If an initial anomalous score is given, use it. self.trustiness = 1.0 - anomalous if anomalous else 0.5 @property def anomalous_score(self): """Returns the anomalous score of this reviewer. The anomalous score is defined by 1 - trustiness. """ return 1.0 - self.trustiness
[docs] def update_trustiness(self): """Update trustiness of this reviewer. The updated trustiness of a reviewer :math:`u` is defined by .. math:: {\\rm trustiness}(u) = \\frac{2}{1 + \\exp(-\\sum_{r \\in R(u)} {\\rm honesty(r)} )} - 1 where :math:`R(u)` is a set of reviews the reviewer :math:`u` posts. Returns; absolute difference between the old trustiness and updated one. """ sum_h = 0. for re in self._g.retrieve_reviews_by_reviewer(self): sum_h += re.honesty new = _scale(sum_h) diff = abs(self.trustiness - new) self.trustiness = new return diff
def __str__(self): return "{0}: {1}".format(self.name, self.anomalous_score)
[docs]class Product(_Node): """A node class representing a product. Args: graph: Graph object this product belongs to. name: Name of this product. Attributes: reliability: a float value in [0, 1], which represents reliability of this product. """ def __init__(self, graph, name=None): super(Product, self).__init__(graph, name) self.reliability = 0.5 @property def summary(self): """Summary of reviews. This value is same as reliability. Original algorithm uses *reliability* but our algorithm uses *summary*. For convenience, both properties remain. """ return self.reliability
[docs] def update_reliability(self): """Update product's reliability. The new reliability is defined by .. math:: {\\rm reliability}(p) = \\frac{2}{1 + e^{-\\theta}} - 1, \\quad \\theta = \\sum_{r \\in R(p)} {\\rm trustiness}(r)({\\rm review}(r, p) - \\hat{s}), where :math:`R(p)` is a set of reviewers product *p* receives, trustiness is defined in :meth:`Reviewer.trustiness`, review(*r*, *p*) is the review score reviewer *r* has given to product *p*, and :math:`\\hat{s}` is the median of review scores. Returns: absolute difference between old reliability and new one. """ res = 0. reviews = self._g.retrieve_reviews_by_product(self) s = np.median([re.rating for re in reviews]) for re in reviews: for r in self._g.retrieve_reviewers(re): res += r.trustiness * (re.rating - s) new = _scale(res) diff = abs(self.reliability - new) self.reliability = new return diff
def __str__(self): return "{0}: {1}".format(self.name, self.summary)
[docs]class Review(object): """A graph entity representing a review. Attributes: rating: rating score of this review. honesty: honesty score. aggreement: aggreement score. time: time this review posted. """ def __init__(self, graph, time, rating): self._g = graph self.time = time self.rating = rating self.honesty = 0.5 self.agreement = 0.5
[docs] def update_honesty(self): """Update honesty of this review. The updated honesty of this review :math:`r` is defined by .. math:: {\\rm honesty}(r) = |{\\rm reliability}(P(r))| \\times {\\rm agreement}(r) where :math:`P(r)` is the product this review posted. Returns: absolute difference between old honesty and new one. """ res = 0. for p in self._g.retrieve_products(self): res += abs(p.reliability) * self.agreement diff = abs(self.honesty - res) self.honesty = res return diff
[docs] def update_agreement(self, delta): """Update agreement of this review. This process considers reviews posted in a close time span of this review. More precisely, let :math:`t` be the time when this review posted and :math:`\\delta` be the time span, only reviews of which posted times are in :math:`[t - \\delta, t+\\delta]` are considered. The updated agreement of a review :math:`r` will be computed with such reviews by .. math:: {\\rm agreement}(r) = \\frac{2}{1 + \\exp( \\sum_{v \\in R_{+}} {\\rm trustiness}(v) - \\sum_{v \\in R_{-}} {\\rm trustiness}(v) )} - 1 where :math:`R_{+}` is a set of reviews close to the review :math:`r`, i.e. the difference between ratings are smaller than or equal to delta, :math:`R_{-}` is the other reviews. The trustiness of a review means the trustiness of the reviewer who posts the review. Args: delta: a time span :math:`\\delta`. Only reviews posted in the span will be considered for this update. Returns: absolute difference between old agreement and new one. """ score_diff = 1. / 5. agree, disagree = self._g.retrieve_reviews(self, delta, score_diff) res = 0. for re in agree: for r in self._g.retrieve_reviewers(re): res += r.trustiness for re in disagree: for r in self._g.retrieve_reviewers(re): res -= r.trustiness new = _scale(res) diff = abs(self.agreement - new) self.agreement = new return diff
def __str__(self): return "Review (time={0}, rating={1}, agreement={2}, honesty={3})".format( self.time, self.rating, self.agreement, self.honesty)
[docs]class ReviewGraph(object): """Review graph is a bipartite graph of which one set of nodes represent reviewers and the other set of nodes represent products. Each edge has a label representing a review. Attributes: graph: graph object of networkx. reviewers: a collection of reviewers. products: a collection of products. reviews: a collection of reviews. """ def __init__(self, theta): """ Construct bipartite graph. Args: theta: A parameter for updating. """ self.graph = nx.DiGraph() self.reviewers = [] self.products = [] self.reviews = [] self._theta = theta self._delta = None @property def delta(self): """Time delta. This value is defined by :math:`\\delta = (t_{\\rm max} - t_{\\rm min}) \\times \\theta`, where :math:`t_{\\rm max}, t_{\\rm min}` are the maximum time, minimum time of all reviews, respectively, :math:`\\theta` is the given parameter defining time ratio. """ if not self._delta: min_time = min([r.time for r in self.reviews]) max_time = max([r.time for r in self.reviews]) self._delta = (max_time - min_time) * self._theta return self._delta
[docs] def new_reviewer(self, name=None, anomalous=None): """ Create a new reviewer. Args: name: the name of the new review. Returns: A new reviewer instance. """ n = Reviewer(self, name=name, anomalous=anomalous) self.graph.add_node(n) self.reviewers.append(n) return n
[docs] def new_product(self, name=None): """ Create a new product. Args: name: The name of the new product. Returns: A new product instance. """ n = Product(self, name) self.graph.add_node(n) self.products.append(n) return n
[docs] def add_review(self, reviewer, product, review, time=None): """ Add a new review. Args: reviewer: An instance of Reviewer. product: An instance of Product. review: A real number representing review score. time: An integer representing reviewing time. (optional) Returns: the new review object. """ if not isinstance(reviewer, Reviewer): raise ValueError( "Given reviewer isn't an instance of Reviewer:", reviewer) elif not isinstance(product, Product): raise ValueError( "Given product isn't an instance of Product:", product) if not time: re = Review(self, len(self.reviews), review) else: re = Review(self, time, review) self.graph.add_node(re) self.reviews.append(re) self.graph.add_edge(reviewer, re) self.graph.add_edge(re, product) return re
[docs] @memoized def retrieve_reviewers(self, review): """ Find reviewers associated with a review. Args: review: A review instance. Returns: A list of reviewers associated with the review. """ if not isinstance(review, Review): raise ValueError( "Given review isn't an instance of Review:", review) return self.graph.predecessors(review)
[docs] @memoized def retrieve_products(self, review): """Find products associated with a review. Args: review: A review instance. Returns: A list of products associated with the given review. """ if not isinstance(review, Review): raise ValueError( "Given review isn't an instance of Review:", review) return self.graph.successors(review)
[docs] @memoized def retrieve_reviews_by_reviewer(self, reviewer): """ Find reviews given by a reviewer. Args: reviewer: Reviewer Returns: A list of reviews given by the reviewer. """ if not isinstance(reviewer, Reviewer): raise ValueError( "Given reviewer isn't an instance of Reviewer:", reviewer) return self.graph.successors(reviewer)
[docs] @memoized def retrieve_reviews_by_product(self, product): """ Find reviews to a product. Args: product: Product Returns: A list of reviews to the product. """ if not isinstance(product, Product): raise ValueError( "Given product isn't an instance of Product:", product) return self.graph.predecessors(product)
[docs] def retrieve_reviews(self, review, time_diff=None, score_diff=0.25): """Find agree and disagree reviews. This method retrieve two groups of reviews. Agree reviews have similar scores to a given review. On the other hands disagree reviews have different scores. Args: review: A review instance. time_diff: An integer. score_diff: An float value. Returns: A tuple consists of (a list of agree reviews, a list of disagree reviews) """ if not time_diff: time_diff = float("inf") agree, disagree = [], [] for p in self.retrieve_products(review): for re in self.retrieve_reviews_by_product(p): if re == review: continue if abs(re.time - review.time) < time_diff: if abs(re.rating - review.rating) < score_diff: agree.append(re) else: disagree.append(re) return agree, disagree
[docs] def update(self): """ Update reviewers' anomalous scores and products' summaries. This update process consists of four steps; 1. Update honesties of reviews (See also :meth:`Review.update_honesty`), 2. Update trustinesses of reviewers (See also :meth:`Reviewer.update_trustiness`), 3. Update reliablities of products (See also :meth:`Product.update_reliability`), 4. Update agreements of reviews (See also :meth:`Review.update_agreement`). Returns: summation of maximum absolute updates for the above four steps. """ diff = max(re.update_honesty() for re in self.reviews) diff += max(r.update_trustiness() for r in self.reviewers) diff += max(p.update_reliability() for p in self.products) diff += max(re.update_agreement(self.delta) for re in self.reviews) return diff