Like blhsing, I use a single takewhile
with a stateful predicate. But I use a generator, so Python keeps track of the state for me, as progress in my code. That's faster, at least for longer cases.
def pred():
x = yield
while x < 12000:
x = yield True
while x >= 12000:
x = yield True
yield False
pred = pred()
next(pred)
result = takewhile(pred.send, lcg())
print(*result)
Output (Attempt This Online!):
1 149 11249 57305 38044 35283 24819 26463 18689 25472
Easy to extend if you want more than two phases, just add more loops.
Benchmark results for your small example:
lcg()
2.63 ± 0.01 μs Stefan_selfmade
3.33 ± 0.02 μs Stefan_nonlocal
3.39 ± 0.02 μs Stefan_generator
3.53 ± 0.03 μs blhsing
3.83 ± 0.03 μs blhsing_old
Stefan_generator
is my above solution, blhsing
is their current solution. The others are included for curiosity (Stefan_nonlocal
instead uses a bool
flag, I wanted to see how that compares to blhsing's set
flag. Stefan_selfmade
is without takewhile
, fast but not very nice. blhsing_old
is an older version with multiple clear()
).
Much of the time is spent in your lcg
generator, so I also tried running the solutions on a precomputed list of its results, to better compare the time spent in our solutions:
list(islice(lcg(), 20))
1.13 ± 0.01 μs Stefan_selfmade
1.66 ± 0.02 μs Stefan_nonlocal
1.76 ± 0.01 μs blhsing
1.78 ± 0.02 μs Stefan_generator
2.12 ± 0.01 μs blhsing_old
And I tried longer cases, where my generator's slightly higher setup costs pay off by having faster usage. With 100 to 10000 elements in each of the two phases:
100 and 100
12.06 ± 0.07 μs Stefan_selfmade
14.11 ± 0.13 μs Stefan_generator
18.83 ± 0.12 μs Stefan_nonlocal
19.93 ± 0.21 μs blhsing
24.82 ± 0.31 μs blhsing_old
1000 and 1000
112.88 ± 0.41 μs Stefan_selfmade
129.49 ± 0.55 μs Stefan_generator
179.32 ± 2.37 μs Stefan_nonlocal
192.87 ± 3.03 μs blhsing
239.37 ± 4.70 μs blhsing_old
10000 and 10000
1.11 ± 0.01 ms Stefan_selfmade
1.27 ± 0.01 ms Stefan_generator
1.79 ± 0.01 ms Stefan_nonlocal
1.89 ± 0.02 ms blhsing
2.36 ± 0.01 ms blhsing_old
Full code (Attempt This Online!):
from timeit import timeit
from time import time
from statistics import mean, stdev
from collections import deque
from itertools import takewhile, islice, product
def Stefan_generator(iterable):
def pred():
x = yield
while x < 12000:
x = yield True
while x >= 12000:
x = yield True
yield False
pred = pred()
next(pred)
return takewhile(pred.send, iterable)
def Stefan_nonlocal(iterable):
first = True
def pred(x):
nonlocal first
if first:
if x < 12000:
return True
first = False
return x >= 12000
return takewhile(pred, iterable)
def Stefan_selfmade(iterable):
it = iter(iterable)
for x in it:
if x < 12000:
yield x
elif x >= 12000:
yield x
for x in it:
if x >= 12000:
yield x
else:
return
else:
return
def blhsing(iterable):
return takewhile(lambda x, f={0}: f and (x < 12000 or f.pop()) or x >= 12000, iterable)
def blhsing_old(iterable):
return takewhile(lambda x, f={1}: f and x < 12000 or f.clear() or x >= 12000, iterable)
funcs = Stefan_generator, Stefan_nonlocal, Stefan_selfmade, blhsing, blhsing_old
def lcg(a=75, c=74, m=2 ** 16 + 1, x0=1):
xn = x0
yield xn
while True:
xn = (a * xn + c) % m
yield xn
### Correctness
def check(iterable):
expect = list(funcs[0](iterable))
for f in funcs:
result = list(f(iterable))
assert result == expect, (iterable, expect, result, f.__name__)
for a, b, c in product(range(5), repeat=3):
check([11999] * a + [12000] * b + [11999] * c)
check([6000, float('nan'), 6000])
check([6000, float('nan'), 18000])
### Speed
def test(title, iterable, number, unit, scale):
print()
print(title)
t0 = time()
times = {f: [] for f in funcs}
def stats(f):
ts = [t * scale for t in sorted(times[f])[:10]]
return f'{mean(ts):6.2f} ± {stdev(ts):4.2f} {unit} '
for _ in range(100):
for f in funcs:
t = timeit(lambda: deque(f(iterable), 0), number=number) / number
times[f].append(t)
for f in sorted(funcs, key=stats):
print(stats(f), f.__name__)
print(time() - t0)
class Lcg:
__iter__ = lcg.__call__
test('lcg()', Lcg(), 2500, 'μs', 1e6)
test('list(islice(lcg(), 20))', list(islice(lcg(), 20)), 5000, 'μs', 1e6)
for _ in range(1):
test('100 and 100', [6000] * 100 + [18000] * 100 + [6000], 500, 'μs', 1e6)
test('1000 and 1000', [6000] * 1000 + [18000] * 1000 + [6000], 50, 'μs', 1e6)
test('10000 and 10000', [6000] * 10000 + [18000] * 10000 + [6000], 5, 'ms', 1e3)