See here for a run through of this example.
The code for this example is in the test subdirectory of the ruffus module:
ruffus/test/simpler_with_shared_logging.py
- --log_file_name, -L
- shared log file location
- --help, -h
- show help message
- --target_tasks TARGET_TASK, -t TARGET_TASK
Target task(s) of pipeline. TARGET_TASK can be
- task1
- task2
- task3
- task4
For example:
complicated_example.py -t task1 -t task4
- --forced_tasks FORCED_TASK, -f FORCED_TASK
Pipeline task(s) which will be run even if they are up to date.
See above for a list of pipelined tasks
- --jobs N, -j N
- N specifies number of concurrent process running jobs in parallel
- --minimal_rebuild_mode, -M
- Rebuild a minimum of tasks necessary for the target. Ignore upstream out of date tasks if intervening tasks are up to date.
- --dependency FILE, -d file
- Print a dependency graph of the pipeline that would be executed to FILE, but do not execute it.
- --dependency_graph_format FORMAT, -F FORMAT
Format of dependency graph file.
Can be:
* 'ps' (PostScript) * 'svg' * 'svgz' (Structured Vector Graphics), * 'png' * 'gif' (bitmap graphics)
- --just_print, -n
- Print a description of the jobs that would be executed, but do not execute them.
- --no_key_legend_in_graph, -K
- Do not print out legend and key for dependency graph.
- --draw_graph_horizontally, -H
- Draw horizontal (left to right) dependency graph.
#!/usr/bin/env python
"""
test_tasks.py
"""
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
# options
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
from optparse import OptionParser
import sys, os
import os.path
import StringIO
# add self to search path for testing
exe_path = os.path.split(os.path.abspath(sys.argv[0]))[0]
sys.path.insert(0,os.path.abspath(os.path.join(exe_path,"..", "..")))
if __name__ == '__main__':
module_name = os.path.split(sys.argv[0])[1]
module_name = os.path.splitext(module_name)[0];
else:
module_name = __name__
parser = OptionParser(version="%prog 1.0")
parser.add_option("-t", "--target_tasks", dest="target_tasks",
action="append",
default = list(),
metavar="JOBNAME",
type="string",
help="Target task(s) of pipeline.")
parser.add_option("-f", "--forced_tasks", dest="forced_tasks",
action="append",
default = list(),
metavar="JOBNAME",
type="string",
help="Pipeline task(s) which will be included even if they are up to date.")
parser.add_option("-j", "--jobs", dest="jobs",
default=1,
metavar="jobs",
type="int",
help="Specifies the number of jobs (commands) to run simultaneously.")
parser.add_option("-v", "--verbose", dest = "verbose",
action="store_true", default=False,
help="Do not echo to shell but only print to log.")
parser.add_option("-d", "--dependency", dest="dependency_file",
#default="simple.svg",
metavar="FILE",
type="string",
help="Print a dependency graph of the pipeline that would be executed "
"to FILE, but do not execute it.")
parser.add_option("-F", "--dependency_graph_format", dest="dependency_graph_format",
metavar="FORMAT",
type="string",
default = 'svg',
help="format of dependency graph file. Can be 'ps' (PostScript), "+
"'svg' 'svgz' (Structured Vector Graphics), " +
"'png' 'gif' (bitmap graphics) etc ")
parser.add_option("-n", "--just_print", dest="just_print",
action="store_true", default=False,
help="Print a description of the jobs that would be executed, "
"but do not execute them.")
parser.add_option("-M", "--minimal_rebuild_mode", dest="minimal_rebuild_mode",
action="store_true", default=False,
help="Rebuild a minimum of tasks necessary for the target. "
"Ignore upstream out of date tasks if intervening tasks are fine.")
parser.add_option("-K", "--no_key_legend_in_graph", dest="no_key_legend_in_graph",
action="store_true", default=False,
help="Do not print out legend and key for dependency graph.")
parser.add_option("-H", "--draw_graph_horizontally", dest="draw_horizontally",
action="store_true", default=False,
help="Draw horizontal dependency graph.")
parser.add_option("-L", "--log_file_name", dest="log_file_name",
default="/tmp/simple.log",
metavar="FILE",
type="string",
help="log file.")
parameters = [
]
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
# imports
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
import StringIO
import re
import operator
import sys,os
from collections import defaultdict
sys.path.append(os.path.abspath(os.path.join(exe_path,"..", "..")))
from ruffus import *
# use simplejson in place of json for python < 2.6
try:
import json
except ImportError:
import simplejson
json = simplejson
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
# Shared logging
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
import multiprocessing
import multiprocessing.managers
import logging
import logging.handlers
#
# setup_logger
#
def setup_shared_logger(LOGGING_LEVEL, LOG_FILENAME):
"""
Function to setup logger shared between all processes
The logger object will be created within a separate (special) process
run by multiprocessing.BaseManager.start()
See "LoggingManager" below
"""
#
# Log file name with logger level
#
my_ruffus_logger = logging.getLogger('simpler_example_logger')
my_ruffus_logger.setLevel(LOGGING_LEVEL)
#
# Add handler to print to file, with the specified format
#
handler = logging.handlers.RotatingFileHandler(
LOG_FILENAME, maxBytes=100000, backupCount=5)
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)6s - %(message)s")
handler.setFormatter(formatter)
my_ruffus_logger.addHandler(handler)
#
# This log object will be wrapped in proxy
#
return my_ruffus_logger
#
# Proxy object for logging
# Logging messages will be marshalled (forwarded) to the process where the
# shared log lives
#
class LoggerProxy(multiprocessing.managers.BaseProxy):
def debug(self, message):
return self._callmethod('debug', [message])
def info(self, message):
return self._callmethod('info', [message])
def __str__ (self):
return "Logging proxy"
#
# Register the setup_logger function as a proxy for setup_logger
#
# We use SyncManager as a base class so we can get a lock proxy for synchronising
# logging later on
#
class LoggingManager(multiprocessing.managers.SyncManager):
"""
Logging manager sets up its own process and will create the real Log object there
We refer to this (real) log via proxies
"""
pass
LoggingManager.register('setup_logger', setup_shared_logger, proxytype=LoggerProxy, exposed = ('info', 'debug', '__str__'))
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
# Functions
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
def create_custom_file_func(params):
"""
creates function which can be used as input to @files_func
"""
def cust_func ():
for job_param in params:
yield job_param
return cust_func
def is_job_uptodate (infiles, outfiles, *extra_params):
"""
assumes first two parameters are files, checks if they are up to date
"""
return task.needs_update_check_modify_time (infiles, outfiles, *extra_params)
def test_post_task_function ():
print "Hooray"
import time
def test_job_io(infiles, outfiles, extra_params):
"""
cat input files content to output files
after writing out job parameters
"""
# dump parameters
params = (infiles, outfiles)# + extra_params[0:-3]
logger_proxy, logging_mutex = extra_params
with logging_mutex:
logger_proxy.debug("job = %s, process name = %s" %
(json.dumps(params),
multiprocessing.current_process().name))
sys.stdout.write(' job = %s\n' % json.dumps(params))
if isinstance(infiles, str):
infiles = [infiles]
elif infiles == None:
infiles = []
if isinstance(outfiles, str):
outfiles = [outfiles]
output_text = list()
for f in infiles:
output_text.append(open(f).read())
output_text = "".join(sorted(output_text))
output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n"
for f in outfiles:
open(f, "w").write(output_text)
time.sleep(1)
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
# Main logic
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
if __name__ == '__main__':
# get help string
f =StringIO.StringIO()
parser.print_help(f)
helpstr = f.getvalue()
#
# Get options
#
(options, remaining_args) = parser.parse_args()
#
# make shared log and proxy
#
manager = LoggingManager()
manager.register('setup_logger', setup_shared_logger, proxytype=LoggerProxy, exposed = ('info', 'debug'))
manager.start()
LOG_FILENAME = options.log_file_name
LOGGING_LEVEL = logging.DEBUG
logger_proxy = manager.setup_logger(LOGGING_LEVEL, LOG_FILENAME)
#
# make sure we are not logging at the same time in different processes
#
logging_mutex = manager.Lock()
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
# Tasks
#88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888
#
# task1
#
@files(None, 'a.1', logger_proxy, logging_mutex)
def task1(infiles, outfiles, *extra_params):
"""
First task
"""
test_job_io(infiles, outfiles, extra_params)
#
# task2
#
@files_re('*.1', '(.*).1', r'\1.1', r'\1.2', logger_proxy, logging_mutex)
@follows(task1)
def task2(infiles, outfiles, *extra_params):
"""
Second task
"""
test_job_io(infiles, outfiles, extra_params)
#
# task3
#
@files_re('*.1', '(.*).1', r'\1.2', r'\1.3', logger_proxy, logging_mutex)
@follows(task2)
def task3(infiles, outfiles, *extra_params):
"""
Third task
"""
test_job_io(infiles, outfiles, extra_params)
#
# task4
#
@files_re('*.1', '(.*).1', r'\1.3', r'\1.4', logger_proxy, logging_mutex)
@follows(task3)
def task4(infiles, outfiles, *extra_params):
"""
Fourth task
"""
test_job_io(infiles, outfiles, extra_params)
#
# Necessary to protect the "entry point" of the program under windows.
# see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming
#
if __name__ == '__main__':
try:
if options.just_print:
pipeline_printout(sys.stdout, options.target_tasks, options.forced_tasks,
long_winded=True,
gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode)
elif options.dependency_file:
pipeline_printout_graph ( open(options.dependency_file, "w"),
options.dependency_graph_format,
options.target_tasks,
options.forced_tasks,
draw_vertically = not options.draw_horizontally,
gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
no_key_legend = options.no_key_legend_in_graph)
else:
pipeline_run(options.target_tasks, options.forced_tasks, multiprocess = options.jobs,
gnu_make_maximal_rebuild_mode = not options.minimal_rebuild_mode,
logger = logger_proxy)
except Exception, e:
print e.args
complicated_example – A more ambitious real-world example
Enter search terms or a module, class or function name.