Source code for helper.datasets

#
# hepler/datasets.py
#
# Copyright (c) 2017 Junpei Kawamoto
#
# This file is part of rgmining-script.
#
# rgmining-script is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# rgmining-script is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rgmining-script. If not, see <http://www.gnu.org/licenses/>.
#
# pylint: disable=import-error
"""Check which datasets are installed.

This module exports a constant variable, :data:`DATASETS`,
The ``DATASETS`` is a dictionary of which a key is the name of a dataset
installed, and of which the associated value is the loading function of the
dataset.

You can get a set of installed dataset names by ``DATASETS.keys()``, and
load a dataset by ``DATASETS["name"](graph)``.
"""
import functools
from logging import getLogger

import dataset_io

_LOGGER = getLogger(__name__)
"""Logger.
"""

DATASETS = {}
"""Dictionary of installed datasets.

Keys are the names of the installed datasets, and the associated values are
load function of that dataset.
"""

[docs]def ignore_kwargs(func): """Decorator to ignore kwargs. """ @functools.wraps(func) def decorated_func(*args, **_kwargs): """Decorated function which ignore kwargs and run the wrapped function. """ return func(*args) return decorated_func
# Load and register the synthetic dataset. try: import synthetic # pylint: disable=wrong-import-position except ImportError: _LOGGER.info("rgmining-synthetic-dataset is not installed.") else: DATASETS["synthetic"] = ignore_kwargs(synthetic.load) # Load and register Amazon dataset. # The loader of the Amazon dataset takes one argument, categories, # to specify which reviews will be loaded. try: import amazon # pylint: disable=wrong-import-position except ImportError: _LOGGER.info("rgmining-amazon-dataset is not installed.") else: DATASETS["amazon"] = amazon.load # Load and register Trip Advisor dataset. try: import tripadvisor # pylint: disable=wrong-import-position except ImportError: _LOGGER.info("rgmining-tripadvisor-dataset is not installed.") else: DATASETS["tripadvisor"] = tripadvisor.load # Register the loader which load a dataset from a file. # The loder function takes kwargument, fp, where it loads the dataset. DATASETS["file"] = dataset_io.load
[docs]def load(graph, dataset, dataset_param): """Load a dataset and return a review graph. Args: graph: review graph object which the dataset is loaded to. dataset: name of the dataset to be loaded. dataset_param: list of key-value parameters. Returns: Review graph object, which is as same as the parameter graph. """ _LOGGER.info("Prepare options for the selected dataset.") params = {key: value for key, value in [v.split("=") for v in dataset_param]} if "file" in params: params["fp"] = open(params["file"]) del params["file"] try: _LOGGER.info("Load the dataset.") DATASETS[dataset](graph, **params) finally: if "fp" in params: params["fp"].close() return graph