This example shows how to log messages from within each of your pipelined functions.
from ruffus import * from ruffus.proxy_logger import * import logging import sys,os # use simplejson in place of json for python < 2.6 try: import json except ImportError: import simplejson json = simplejson #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # Shared logging #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 logger_args={} logger_args["file_name"] = "pipeline.log" logger_args["level"] = logging.DEBUG logger_args["rotating"] = True logger_args["maxBytes"]=20000 logger_args["backupCount"]=10 logger_args["formatter"]="%(asctime)s - %(name)s - %(levelname)6s - %(message)s" (logger_proxy, logging_mutex) = make_shared_logger_and_proxy (setup_std_shared_logger, "my_logger", logger_args) #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # Helper Function which writes to a shared log #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 import time def test_job_io(infiles, outfiles, extra_params): """ cat input files content to output files after writing out job parameters """ # # dump parameters params = (infiles, outfiles)# + extra_params[0:-3] # logger_proxy, logging_mutex = extra_params with logging_mutex: logger_proxy.debug("job = %s, process name = %s" % (json.dumps(params), multiprocessing.current_process().name)) # # sys.stdout.write(' job = %s\n' % json.dumps(params)) # if isinstance(infiles, str): infiles = [infiles] elif infiles == None: infiles = [] if isinstance(outfiles, str): outfiles = [outfiles] output_text = list() for f in infiles: output_text.append(open(f).read()) output_text = "".join(sorted(output_text)) output_text += json.dumps(infiles) + " -> " + json.dumps(outfiles) + "\n" for f in outfiles: open(f, "w").write(output_text) #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # Tasks #88888888888888888888888888888888888888888888888888888888888888888888888888888888888888888 # # task1 # @files(None, 'a.1', logger_proxy, logging_mutex) def task1(infiles, outfiles, *extra_params): """ First task """ test_job_io(infiles, outfiles, extra_params) # # task2 # @transform(task1, regex('.1'), '.2', logger_proxy, logging_mutex) def task2(infiles, outfiles, *extra_params): """ Second task """ test_job_io(infiles, outfiles, extra_params) # # task3 # @transform(task2, regex('.2'), '.3', logger_proxy, logging_mutex) def task3(infiles, outfiles, *extra_params): """ Third task """ test_job_io(infiles, outfiles, extra_params) # # task4 # @transform(task3, regex('.3'), '.4', logger_proxy, logging_mutex) def task4(infiles, outfiles, *extra_params): """ Fourth task """ test_job_io(infiles, outfiles, extra_params) # # Necessary to protect the "entry point" of the program under windows. # see: http://docs.python.org/library/multiprocessing.html#multiprocessing-programming # pipeline_run([task4], multiprocess = 10, logger = logger_proxy)
>>> pipeline_run([task4], multiprocess = 10, logger = logger_proxy) job = [null, "a.1"] job = ["a.1", "a.2"] job = ["a.2", "a.3"] job = ["a.3", "a.4"]Pipeline.log will contain our unimaginative log messages:
2009-11-15 03:04:55,884 - my_logger - DEBUG - job = [null, "a.1"], process name = PoolWorker-2 2009-11-15 03:04:56,941 - my_logger - INFO - Job = [None -> a.1, <LoggingProxy>, <thread.lock>] completed 2009-11-15 03:04:56,942 - my_logger - INFO - Completed Task = task1 2009-11-15 03:04:56,945 - my_logger - DEBUG - job = ["a.1", "a.2"], process name = PoolWorker-4 2009-11-15 03:04:57,962 - my_logger - INFO - Job = [a.1 -> a.2, <LoggingProxy>, <thread.lock>] completed 2009-11-15 03:04:57,962 - my_logger - INFO - Completed Task = task2 2009-11-15 03:04:57,965 - my_logger - DEBUG - job = ["a.2", "a.3"], process name = PoolWorker-3 2009-11-15 03:04:59,009 - my_logger - INFO - Job = [a.2 -> a.3, <LoggingProxy>, <thread.lock>] completed 2009-11-15 03:04:59,010 - my_logger - INFO - Completed Task = task3 2009-11-15 03:04:59,013 - my_logger - DEBUG - job = ["a.3", "a.4"], process name = PoolWorker-5 2009-11-15 03:05:00,024 - my_logger - INFO - Job = [a.3 -> a.4, <LoggingProxy>, <thread.lock>] completed 2009-11-15 03:05:00,025 - my_logger - INFO - Completed Task = task4