Source code for rsd.graph

#
#  graph.py
#
#  Copyright (c) 2016-2022 Junpei Kawamoto
#
#  This file is part of rgmining-rsd.
#
#  rgmining-rsd is free software: you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  rgmining-rsd is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
#
"""Implementation of RSD.
"""
from collections.abc import Collection
from functools import cache
from typing import Final, NamedTuple, Optional

import networkx as nx
import numpy as np


def _scale(v: float) -> float:
    """Scaling a given value.

    The output is defined by

    .. math::

        {\\rm scale}(v) = \\frac{2}{1 + \\exp(-v)} - 1

    Args:
      v: Input value.

    Returns:
      Output value defined above.
    """
    e = np.exp(-v)
    return float(2.0 / (1.0 + e) - 1.0)


[docs]class Node: """Abstract class of review graph. Args: graph: the graph object this node will belong to. name: name of this node. """ name: Final[str] """Name of this node.""" _g: Final["ReviewGraph"] """Graph object this node belongs to.""" __slots__ = ("name", "_g") def __init__(self, graph: "ReviewGraph", name: Optional[str] = None) -> None: self._g = graph self.name = name if name else super().__str__() def __eq__(self, other: object) -> bool: if not isinstance(other, type(self)): return False return self.name == other.name def __hash__(self) -> int: return 13 * hash(type(self)) + 17 * hash(self.name)
[docs]class Reviewer(Node): """A node class representing a reviewer. Args: graph: Graph object this reviewer belongs to. name: Name of this reviewer. anomalous: Initial anomalous score (default: None). """ trustiness: float """A float value in [0, 1] which represents trustiness of this reviewer.""" __slots__ = ("trustiness",) def __init__(self, graph: "ReviewGraph", name: Optional[str] = None, anomalous: Optional[float] = None) -> None: super().__init__(graph, name) # If an initial anomalous score is given, use it. self.trustiness = 1.0 - anomalous if anomalous else 0.5 @property def anomalous_score(self) -> float: """Returns the anomalous score of this reviewer. The anomalous score is defined by 1 - trustiness. """ return 1.0 - self.trustiness
[docs] def update_trustiness(self) -> float: """Update trustiness of this reviewer. The updated trustiness of a reviewer :math:`u` is defined by .. math:: {\\rm trustiness}(u) = \\frac{2}{1 + \\exp(-\\sum_{r \\in R(u)} {\\rm honesty(r)} )} - 1 where :math:`R(u)` is a set of reviews the reviewer :math:`u` posts. Returns; absolute difference between the old trustiness and updated one. """ sum_h = 0.0 for re in self._g.retrieve_reviews_by_reviewer(self): sum_h += re.honesty new = _scale(sum_h) diff = abs(self.trustiness - new) self.trustiness = new return diff
def __str__(self) -> str: return f"{self.name}: {self.anomalous_score}"
[docs]class Product(Node): """A node class representing a product. Args: graph: Graph object this product belongs to. name: Name of this product. """ reliability: float """A float value in [0, 1], which represents reliability of this product.""" __slots__ = ("reliability",) def __init__(self, graph: "ReviewGraph", name: Optional[str] = None): super(Product, self).__init__(graph, name) self.reliability = 0.5 @property def summary(self) -> float: """Summary of reviews. This value is same as reliability. Original algorithm uses *reliability* but our algorithm uses *summary*. For convenience, both properties remain. """ return self.reliability
[docs] def update_reliability(self) -> float: """Update product's reliability. The new reliability is defined by .. math:: {\\rm reliability}(p) = \\frac{2}{1 + e^{-\\theta}} - 1, \\quad \\theta = \\sum_{r \\in R(p)} {\\rm trustiness}(r)({\\rm review}(r, p) - \\hat{s}), where :math:`R(p)` is a set of reviewers product *p* receives, trustiness is defined in :meth:`Reviewer.trustiness`, review(*r*, *p*) is the review score reviewer *r* has given to product *p*, and :math:`\\hat{s}` is the median of review scores. Returns: absolute difference between old reliability and new one. """ res = 0.0 reviews = self._g.retrieve_reviews_by_product(self) s = float(np.median([re.rating for re in reviews])) for re in reviews: for r in self._g.retrieve_reviewers(re): res += r.trustiness * (re.rating - s) new = _scale(res) diff = abs(self.reliability - new) self.reliability = new return diff
def __str__(self) -> str: return f"{self.name}: {self.summary}"
[docs]class Review: """A graph entity representing a review. Args: graph: Graph object this product belongs to. time: When this review is posted. rating: Rating of this review. """ rating: Final[float] """Rating score of this review.""" honesty: float """Honesty score.""" agreement: float """Agreement score.""" time: Final[int] """Time when this review posted.""" _g: Final["ReviewGraph"] __slots__ = ("rating", "honesty", "agreement", "time", "_g") def __init__(self, graph: "ReviewGraph", time: int, rating: float) -> None: self._g = graph self.time = time self.rating = rating self.honesty = 0.5 self.agreement = 0.5
[docs] def update_honesty(self) -> float: """Update honesty of this review. The updated honesty of this review :math:`r` is defined by .. math:: {\\rm honesty}(r) = |{\\rm reliability}(P(r))| \\times {\\rm agreement}(r) where :math:`P(r)` is the product this review posted. Returns: absolute difference between old honesty and new one. """ res = 0.0 for p in self._g.retrieve_products(self): res += abs(p.reliability) * self.agreement diff = abs(self.honesty - res) self.honesty = res return diff
[docs] def update_agreement(self, delta: float) -> float: """Update agreement of this review. This process considers reviews posted in a close time span of this review. More precisely, let :math:`t` be the time when this review posted and :math:`\\delta` be the time span, only reviews of which posted times are in :math:`[t - \\delta, t+\\delta]` are considered. The updated agreement of a review :math:`r` will be computed with such reviews by .. math:: {\\rm agreement}(r) = \\frac{2}{1 + \\exp( \\sum_{v \\in R_{+}} {\\rm trustiness}(v) - \\sum_{v \\in R_{-}} {\\rm trustiness}(v) )} - 1 where :math:`R_{+}` is a set of reviews close to the review :math:`r`, i.e. the difference between ratings are smaller than or equal to delta, :math:`R_{-}` is the other reviews. The trustiness of a review means the trustiness of the reviewer who posts the review. Args: delta: a time span :math:`\\delta`. Only reviews posted in the span will be considered for this update. Returns: absolute difference between old agreement and new one. """ score_diff = 1.0 / 5.0 agree, disagree = self._g.retrieve_reviews(self, delta, score_diff) res = 0.0 for re in agree: for r in self._g.retrieve_reviewers(re): res += r.trustiness for re in disagree: for r in self._g.retrieve_reviewers(re): res -= r.trustiness new = _scale(res) diff = abs(self.agreement - new) self.agreement = new return diff
def __str__(self) -> str: return f"Review (time={self.time}, rating={self.rating}, agreement={self.agreement}, honesty={self.honesty})"
[docs]class ReviewSet(NamedTuple): """Pair of agreed reviews and disagreed reviews.""" agree: Collection[Review] """Collection of agreed reviews.""" disagree: Collection[Review] """Collection of disagreed reviews."""
[docs]class ReviewGraph: """A bipartite graph of which one set of nodes represent reviewers and the other set of nodes represent products. Each edge has a label representing a review. Args: theta: A parameter for updating. See `the paper <https://ieeexplore.ieee.org/document/6137345?arnumber=6137345>`__ for more details. """ graph: Final[nx.DiGraph] """Graph object of networkx.""" reviewers: Final[list[Reviewer]] """Collection of reviewers.""" products: Final[list[Product]] """Collection of products.""" reviews: Final[list[Review]] """Collection of reviews.""" _theta: Final[float] """Parameter of the algorithm.""" _delta: Optional[float] """Cached time delta.""" def __init__(self, theta: float) -> None: self.graph = nx.DiGraph() self.reviewers = [] self.products = [] self.reviews = [] self._theta = theta self._delta = None @property def delta(self) -> float: """Time delta. This value is defined by :math:`\\delta = (t_{\\rm max} - t_{\\rm min}) \\times \\theta`, where :math:`t_{\\rm max}, t_{\\rm min}` are the maximum time, minimum time of all reviews, respectively, :math:`\\theta` is the given parameter defining time ratio. """ if not self._delta: min_time = min([r.time for r in self.reviews]) max_time = max([r.time for r in self.reviews]) self._delta = (max_time - min_time) * self._theta return self._delta
[docs] def new_reviewer(self, name: Optional[str] = None, anomalous: Optional[float] = None) -> Reviewer: """Create a new reviewer. Args: name: the name of the new reviewer. anomalous: the anomalous score of the new reviewer. Returns: A new reviewer instance. """ n = Reviewer(self, name=name, anomalous=anomalous) self.graph.add_node(n) self.reviewers.append(n) return n
[docs] def new_product(self, name: Optional[str] = None) -> Product: """Create a new product. Args: name: The name of the new product. Returns: A new product instance. """ n = Product(self, name) self.graph.add_node(n) self.products.append(n) return n
[docs] def add_review(self, reviewer: Reviewer, product: Product, review: float, time: Optional[int] = None) -> Review: """Add a new review. Args: reviewer: An instance of Reviewer. product: An instance of Product. review: A real number representing review score. time: An integer representing reviewing time. (optional) Returns: the new review object. """ if not time: re = Review(self, len(self.reviews), review) else: re = Review(self, time, review) self.graph.add_node(re) self.reviews.append(re) self.graph.add_edge(reviewer, re) self.graph.add_edge(re, product) self._delta = None return re
[docs] @cache def retrieve_reviewers(self, review: Review) -> Collection[Reviewer]: """Find reviewers associated with a review. Args: review: A review instance. Returns: A list of reviewers associated with the review. """ return list(self.graph.predecessors(review))
[docs] @cache def retrieve_products(self, review: Review) -> Collection[Product]: """Find products associated with a review. Args: review: A review instance. Returns: A list of products associated with the given review. """ return list(self.graph.successors(review))
[docs] @cache def retrieve_reviews_by_reviewer(self, reviewer: Reviewer) -> Collection[Review]: """Find reviews given by a reviewer. Args: reviewer: Reviewer Returns: A list of reviews given by the reviewer. """ return list(self.graph.successors(reviewer))
[docs] @cache def retrieve_reviews_by_product(self, product: Product) -> Collection[Review]: """Find reviews to a product. Args: product: Product Returns: A list of reviews to the product. """ return list(self.graph.predecessors(product))
[docs] def retrieve_reviews( self, review: Review, time_diff: Optional[float] = None, score_diff: float = 0.25 ) -> ReviewSet: """Find agree and disagree reviews. This method retrieve two groups of reviews. Agree reviews have similar scores to a given review. On the other hands disagree reviews have different scores. Args: review: A review instance. time_diff: An integer. score_diff: An float value. Returns: A tuple consists of (a list of agree reviews, a list of disagree reviews) """ if not time_diff: time_diff = float("inf") agree, disagree = [], [] for p in self.retrieve_products(review): for re in self.retrieve_reviews_by_product(p): if re == review: continue if abs(re.time - review.time) < time_diff: if abs(re.rating - review.rating) < score_diff: agree.append(re) else: disagree.append(re) return ReviewSet(agree, disagree)
[docs] def update(self) -> float: """Update reviewers' anomalous scores and products' summaries. This update process consists of four steps; 1. Update honesty of reviews (See also :meth:`Review.update_honesty`), 2. Update rustiness of reviewers (See also :meth:`Reviewer.update_trustiness`), 3. Update reliability of products (See also :meth:`Product.update_reliability`), 4. Update agreements of reviews (See also :meth:`Review.update_agreement`). Returns: summation of maximum absolute updates for the above four steps. """ diff = max(re.update_honesty() for re in self.reviews) diff += max(r.update_trustiness() for r in self.reviewers) diff += max(p.update_reliability() for p in self.products) diff += max(re.update_agreement(self.delta) for re in self.reviews) return diff