Source code for dataset_io.loader

# Copyright (c) 2016-2017 Junpei Kawamoto
# This file is part of rgmining-dataset-io.
# rgmining-dataset-io is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# rgmining-dataset-io is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with rgmining-dataset-io. If not, see <>.
"""Load review data formatted in JSON to a graph object.
# pylint: disable=invalid-name
from __future__ import absolute_import
import json
    from itertools import imap, ifilter
except ImportError:
    imap = map
    ifilter = filter
from dataset_io.helper import convert_date
from dataset_io.helper import normalize_rating
from dataset_io.helper import quiet
from dataset_io.constants import PRODUCT_ID
from dataset_io.constants import MEMBER_ID

[docs]def load(g, fp, anomalous=None, normalize=normalize_rating): """Load a review dataset to a given graph object. The graph object must implement the :ref:`graph-interface` i.e. it must have the following methods: new_reviewer(name, anomalous) create and register a new reviewer which has a given `name` and be initialized by a given `anomalous` score, new_product(name) create and register a new product which has a given `name`, find_reviewer(name) find and return a reviewer which has given `name`, find_product(name) find and return a product which has given `name`, add_review(self, reviewer, product, review, date) add a new review from `reviewer` to `product` issued in `date`, in which the review is a float value. and must have the following properties: reviewers (readable) a set of reviewers, products (readable) a set of products. `fp` is an iterative object which yields a JSON string representing a review. Each review must have the following elements:: { "member_id": "A1AF30H2MPOO9", "product_id": "0001056530", "rating": 4.0, "date": "2000-08-21" } where `member_id` is a reviewer's id, i.e. name, `product_id` is a product's id which the reviewer posts a review. Rating is a five-star score for the product. Date is the date the review issued. Args: g: graph object where loaded review data are stored. fp: readable object containing JSON data of a loading table. anomalous: default anomalous scores (Default: None). normalize: normalize function of rating scores; if set Nont, scores are not normalized. Returns: The graph instance, which is as same as *g*. """ if not normalize: normalize = lambda v: v reviewers = { r for r in g.reviewers} products = { p for p in g.products} for review in ifilter(bool, imap(quiet(json.loads), fp)): member_id = review[MEMBER_ID] product_id = review[PRODUCT_ID] rating = normalize(review["rating"]) date = convert_date(review["date"]) if member_id in reviewers: r = reviewers[member_id] else: r = g.new_reviewer(name=member_id, anomalous=anomalous) reviewers[member_id] = r if product_id in products: p = products[product_id] else: p = g.new_product(name=product_id.strip()) products[product_id] = p g.add_review(r, p, rating, date) return g