Source code for synthetic.loader

# Copyright (c) 2016-2017 Junpei Kawamoto
# This file is part of rgmining-synthetic-dataset.
# rgmining-synthetic-dataset is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# rgmining-synthetic-dataset is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with rgmining-synthetic-dataset. If not, see <>.
"""Provide a loading method of synthetic dataset.
from __future__ import division
from os import path

_REVIEWER_FILE = "reviewer.dat"
_PRODUCT_FILE = "product.dat"
_REVIEW_FILE = "review.dat"

def _fullpath(filename):
    """ Compute the full path of a given filename.

      filename: Filename.

      The full path of the given filename.
    return path.join(path.dirname(__file__), filename)

[docs]def load(g): """ Load synthetic dataset. Args: g: an instance of bipartite graph. Returns: The graph instance *g*. """ R = {} # Reviewers dict. P = {} # Products dict. # Load reviewers with open(_fullpath(_REVIEWER_FILE)) as fp: for line in fp: rid, name = line.strip().split(" ") R[rid] = g.new_reviewer(name=name) # Load products with open(_fullpath(_PRODUCT_FILE)) as fp: for line in fp: pid, name = line.strip().split(" ") P[pid] = g.new_product(name=name) # Load review with open(_fullpath(_REVIEW_FILE)) as fp: for line in fp: rid, pid, score = line.strip().split(" ") if rid not in R or pid not in P: continue g.add_review(R[rid], P[pid], float(score) / 5) return g