Given a Python script with print()
statements, I'd like to be able to run through the script and insert a comment after each statement that shows the output from each. To demonstrate, take this script named example.py
:
a, b = 1, 2
print('a + b:', a + b)
c, d = 3, 4
print('c + d:', c + d)
The desired output would be:
a, b = 1, 2
print('a + b:', a + b)
# a + b: 3
c, d = 3, 4
print('c + d:', c + d)
# c + d: 7
Here's my attempt, which works for simple examples like the one above:
import sys
from io import StringIO
def intercept_stdout(func):
"redirect stdout from a target function"
def wrapper(*args, **kwargs):
"wrapper function for intercepting stdout"
# save original stdout
original_stdout = sys.stdout
# set up StringIO object to temporarily capture stdout
capture_stdout = StringIO()
sys.stdout = capture_stdout
# execute wrapped function
func(*args, **kwargs)
# assign captured stdout to value
func_output = capture_stdout.getvalue()
# reset stdout
sys.stdout = original_stdout
# return captured value
return func_output
return wrapper
@intercept_stdout
def exec_target(name):
"execute a target script"
with open(name, 'r') as f:
exec(f.read())
def read_target(name):
"read source code from a target script & return it as a list of lines"
with open(name) as f:
source = f.readlines()
# to properly format last comment, ensure source ends in a newline
if len(source[-1]) >= 1 and source[-1][-1] != '\n':
source[-1] += '\n'
return source
def annotate_source(target):
"given a target script, return the source with comments under each print()"
target_source = read_target(target)
# find each line that starts with 'print(' & get indices in reverse order
print_line_indices = [i for i, j in enumerate(target_source)
if len(j) > 6 and j[:6] == 'print(']
print_line_indices.reverse()
# execute the target script and get each line output in reverse order
target_output = exec_target(target)
printed_lines = target_output.split('\n')
printed_lines.reverse()
# iterate over the source and insert commented target output line-by-line
annotated_source = []
for i, line in enumerate(target_source):
annotated_source.append(line)
if print_line_indices and i == print_line_indices[-1]:
annotated_source.append('# ' + printed_lines.pop() + '\n')
print_line_indices.pop()
# return new annotated source as a string
return ''.join(annotated_source)
if __name__ == '__main__':
target_script = 'example.py'
with open('annotated_example.py', 'w') as f:
f.write(annotate_source(target_script))
However, it fails for scripts with print()
statements that span multiple lines, as well as for print()
statements that aren't at the start of a line. In a best-case scenario, it would even work for print()
statements inside a function. Take the following example:
print('''print to multiple lines, first line
second line
third line''')
print('print from partial line, first part') if True else 0
1 if False else print('print from partial line, second part')
print('print from compound statement, first part'); pass
pass; print('print from compound statement, second part')
def foo():
print('bar')
foo()
Ideally, the output would look like this:
print('''print to multiple lines, first line
second line
third line''')
# print to multiple lines, first line
# second line
# third line
print('print from partial line, first part') if True else 0
# print from partial line, first part
1 if False else print('print from partial line, second part')
# print from partial line, second part
print('print from compound statement, first part'); pass
# print from compound statement, first part
pass; print('print from compound statement, second part')
# print from compound statement, second part
def foo():
print('bar')
foo()
# bar
But the script above mangles it like so:
print('''print to multiple lines, first line
# print to multiple lines, first line
second line
third line''')
print('print from partial line, first part') if True else 0
# second line
1 if False else print('print from partial line, second part')
print('print from compound statement, first part'); pass
# third line
pass; print('print from compound statement, second part')
def foo():
print('bar')
foo()
What approach would make this process more robust?