Source code for dataset_io.helper

#
# helper.py
#
# Copyright (c) 2016-2017 Junpei Kawamoto
#
# This file is part of rgmining-dataset-io.
#
# rgmining-dataset-io is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# rgmining-dataset-io is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rgmining-dataset-io. If not, see <http://www.gnu.org/licenses/>.
#
"""Provide helper functions and classes.
"""
# pylint: disable=invalid-name
from __future__ import absolute_import
try:
    from itertools import imap, ifilter
except ImportError:
    imap = map
    ifilter = filter
import json
import sys
from functools import wraps
from collections import namedtuple

from dataset_io.constants import PRODUCT_ID
from dataset_io.constants import REVIEWER_ID

Reviewer = namedtuple("Reviewer", (REVIEWER_ID, "score"))
"""Named tuple to access reviewer's attributes easily.
"""

Product = namedtuple("Product", (PRODUCT_ID, "summary"))
"""Named tuple to access product's attribute easily.
"""


[docs]def quiet(f): """Decorator ignoring ValueError. Args: f: A function Returns: A decorated function which ignores ValueError and returns None when such exceptions happen. """ @wraps(f) def _(*args, **kwargs): """Decorated function returns None when ValueError occurs. """ try: return f(*args, **kwargs) except ValueError: return None return _
[docs]def convert_date(date): """Convert data-type data to int. For example, date `2016-01-02` is converted to integer `20160102`. Args: data: data to convert Returns: Int-type date data. """ return int(str(date).replace("-", ""))
[docs]def normalize_rating(v): """Normalize five star ratings between 0 to 1. Args: v: rating which is between 1 to 5 Returns: Normalized rating data between 0 to 1 """ return (v - 1.) / 4.
[docs]def parse_state(fp, reviewer_handler=None, product_handler=None, iteration="final"): """Parse a state of a graph from an iterable. Parse a state outputted from print_state and call callback functions. The callback for reviewer must receive two arguments; *iteration* and *review object*. The review object has two attributes; *reviewer_id* and *score*. The callback for product must Recife's two arguments; *iteration* and *product object*. The product object has two attributes; *product_id* and *summary*. See print_state for more detail. If the callback is set None, associated objects are not parsed. Args: fp: An iterable object containing state data. reviewer_handler: A callback for reviewer (default: None). product_handler: A callback for product (default: None). iteration: Choose iteration to be parsed (default: 'final'). """ for item in ifilter(bool, imap(quiet(json.loads), fp)): if not isinstance(item, dict) or not "iteration" in item: continue if str(item["iteration"]) == str(iteration): if reviewer_handler and "reviewer" in item: reviewer_handler(iteration, Reviewer(**item["reviewer"])) elif product_handler and "product" in item: p = item["product"] product_handler(iteration, Product( p["product_id"], normalize_rating(p["summary"])))