#
# helper.py
#
# Copyright (c) 2016-2017 Junpei Kawamoto
#
# This file is part of rgmining-dataset-io.
#
# rgmining-dataset-io is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# rgmining-dataset-io is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rgmining-dataset-io. If not, see <http://www.gnu.org/licenses/>.
#
"""Provide helper functions and classes.
"""
# pylint: disable=invalid-name
from __future__ import absolute_import
try:
from itertools import imap, ifilter
except ImportError:
imap = map
ifilter = filter
import json
import sys
from functools import wraps
from collections import namedtuple
from dataset_io.constants import PRODUCT_ID
from dataset_io.constants import REVIEWER_ID
Reviewer = namedtuple("Reviewer", (REVIEWER_ID, "score"))
"""Named tuple to access reviewer's attributes easily.
"""
Product = namedtuple("Product", (PRODUCT_ID, "summary"))
"""Named tuple to access product's attribute easily.
"""
[docs]def quiet(f):
"""Decorator ignoring ValueError.
Args:
f: A function
Returns:
A decorated function which ignores ValueError and returns None
when such exceptions happen.
"""
@wraps(f)
def _(*args, **kwargs):
"""Decorated function returns None when ValueError occurs.
"""
try:
return f(*args, **kwargs)
except ValueError:
return None
return _
[docs]def convert_date(date):
"""Convert data-type data to int.
For example, date `2016-01-02` is converted to integer `20160102`.
Args:
data: data to convert
Returns:
Int-type date data.
"""
return int(str(date).replace("-", ""))
[docs]def normalize_rating(v):
"""Normalize five star ratings between 0 to 1.
Args:
v: rating which is between 1 to 5
Returns:
Normalized rating data between 0 to 1
"""
return (v - 1.) / 4.
[docs]def print_state(g, i, output=sys.stdout):
"""Print a current state of a given graph.
This method outputs a current of a graph as a set of json objects.
Graph objects must have two properties; `reviewers` and `products`.
Those properties returns a set of reviewers and products respectively.
In this output format, each line represents a reviewer or product object.
Reviewer objects are defined as ::
{
"iteration": <the iteration number given as i>
"reviewer":
{
"reviewer_id": <Reviewer's ID>
"score": <Anomalous score of the reviewer>
}
}
Product objects are defined as ::
{
"iteration": <the iteration number given as i>
"reviewer":
{
"product_id": <Product's ID>
"sumarry": <Summary of the reviews for the product>
}
}
Args:
g: Graph instance.
i: Iteration number.
output: A writable object (default: sys.stdout).
"""
for r in g.reviewers:
json.dump({
"iteration": i,
"reviewer": {
REVIEWER_ID: r.name,
"score": r.anomalous_score
}
}, output)
output.write("\n")
for p in g.products:
json.dump({
"iteration": i,
"product": {
PRODUCT_ID: p.name,
"summary": float(str(p.summary)) * 4. + 1
}
}, output)
output.write("\n")
[docs]def parse_state(fp, reviewer_handler=None, product_handler=None, iteration="final"):
"""Parse a state of a graph from an iterable.
Parse a state outputted from print_state and call callback functions.
The callback for reviewer must receive two arguments;
*iteration* and *review object*.
The review object has two attributes; *reviewer_id* and *score*.
The callback for product must Recife's two arguments;
*iteration* and *product object*.
The product object has two attributes; *product_id* and *summary*.
See print_state for more detail.
If the callback is set None, associated objects are not parsed.
Args:
fp: An iterable object containing state data.
reviewer_handler: A callback for reviewer (default: None).
product_handler: A callback for product (default: None).
iteration: Choose iteration to be parsed (default: 'final').
"""
for item in ifilter(bool, imap(quiet(json.loads), fp)):
if not isinstance(item, dict) or not "iteration" in item:
continue
if str(item["iteration"]) == str(iteration):
if reviewer_handler and "reviewer" in item:
reviewer_handler(iteration, Reviewer(**item["reviewer"]))
elif product_handler and "product" in item:
p = item["product"]
product_handler(iteration, Product(
p["product_id"], normalize_rating(p["summary"])))