Source code for example

#! /usr/bin/env python
#
# example.py
#
# Copyright (c) 2017 Junpei Kawamoto
#
# This file is part of rgmining-amazon-dataset.
#
# rgmining-amazon-dataset is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# rgmining-amazon-dataset is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with rgmining-amazon-dataset.  If not, see <http://www.gnu.org/licenses/>.
#
"""Evaluate a review graph mining algorithm with the amazon dataset.
"""
# pylint: disable=invalid-name
from __future__ import absolute_import, division
import logging
from logging import getLogger
import sys

import dsargparse
import amazon

LOGGER = getLogger(__name__)

#--------------------------
# Loading algorithms
#--------------------------
ALGORITHMS = {}
"""Dictionary of graph loading functions associated with installed algorithms.
"""

# Load and register RIA.
try:
    import ria
except ImportError:
    LOGGER.info("rgmining-ria is not installed.")
else:
    def ignore_args(func):
        """Returns a wrapped function which ignore given arguments."""
        def _(*_args):
            """The function body."""
            return func()
        return _
    ALGORITHMS["ria"] = ria.ria_graph
    ALGORITHMS["one"] = ignore_args(ria.one_graph)
    ALGORITHMS["onesum"] = ignore_args(ria.one_sum_graph)
    ALGORITHMS["mra"] = ignore_args(ria.mra_graph)


# Load and register RSD.
try:
    import rsd  # pylint: disable=wrong-import-position
except ImportError:
    LOGGER.info("rgmining-rsd is not installed.")
else:
    ALGORITHMS["rsd"] = rsd.ReviewGraph


# Load and register Fraud Eagle.
try:
    import fraud_eagle  # pylint: disable=wrong-import-position
except ImportError:
    LOGGER.info("rgmining-fraud-eagle is not installed.")
else:
    ALGORITHMS["feagle"] = fraud_eagle.ReviewGraph


# Load and register FRAUDAR.
try:
    import fraudar  # pylint: disable=wrong-import-position
except ImportError:
    LOGGER.info("rgmining-fraudar is not installed.")
else:
    def create_fraudar_graph(nblock=1):
        """Create a review graph defined in Fraud Eagle package.
        """
        return fraudar.ReviewGraph(int(nblock))
    ALGORITHMS["fraudar"] = create_fraudar_graph
#--------------------------


[docs]def run(method, loop, threshold, output, param): """Run a given algorithm with the Amazon dataset. Runs a given algorithm and outputs anomalous scores and summaries after each iteration finishes. The function will ends if a given number of loops ends or the update of one iteration becomes smaller than a given threshold. Some algorithm requires a set of parameters. For example, feagle requires parameter `epsilon`. Argument `param` specifies those parameters, and if you want to set 0.1 to the `epsilon`, pass `epsilon=0.1` via the argument. Args: method: name of algorithm. loop: the number of iteration (default: 20). threshold: threshold to judge an update is negligible (default: 10^-3). output: writable object where the output will be written. param: list of key and value pair which are connected with "=". """ kwargs = {key: float(value) for key, value in [v.split("=") for v in param]} g = ALGORITHMS[method](**kwargs) amazon.load(g) amazon.print_state(g, 0, output) # Updates logging.info("Start iterations.") for i in xrange(loop if not method.startswith("one") else 1): diff = g.update() if diff is not None and diff < threshold: break # Current summary logging.info("Iteration %d ends. (diff=%s)", i + 1, diff) amazon.print_state(g, i + 1, output) # Print final state. amazon.print_state(g, "final", output)
[docs]def main(): """The main function. """ if not ALGORITHMS: logging.error("No algorithms are installed.") sys.exit(1) parser = dsargparse.ArgumentParser(main=main) parser.add_argument("method", choices=sorted(ALGORITHMS.keys())) parser.add_argument( "--output", default=sys.stdout, type=dsargparse.FileType("w"), # pylint: disable=no-member help="file path to store results (Default: stdout).") parser.add_argument("--loop", type=int, default=20) parser.add_argument("--threshold", type=float, default=10^-3) parser.add_argument( "--param", action="append", default=[], help=( "key and value pair which are connected with '='.\n" "This option can be set multiply.")) run(**vars(parser.parse_args()))
if __name__ == "__main__": logging.basicConfig(level=logging.INFO, stream=sys.stderr) try: main() except KeyboardInterrupt: pass except Exception: # pylint: disable=broad-except logging.exception("Untracked exception occurred.") finally: logging.shutdown()