8

Given a multiline string with some blank lines, how can I iterate over lines in Lua including the blank lines?

local s = "foo\nbar\n\njim"
for line in magiclines(s) do
  print( line=="" and "(blank)" or line)
end
--> foo
--> bar
--> (blank)
--> jim

This code does not include blank lines:

for line in string.gmatch(s,'[^\r\n]+') do print(line) end
--> foo
--> bar
--> jim

This code includes extra spurious blank lines:

for line in string.gmatch(s,"[^\r\n]*") do
  print( line=="" and "(blank)" or line)
end
--> foo
--> (blank)
--> bar
--> (blank)
--> (blank)
--> jim
--> (blank)
hjpotter92
  • 78,589
  • 36
  • 144
  • 183
Phrogz
  • 296,393
  • 112
  • 651
  • 745

6 Answers6

10

Try this:

function magiclines(s)
        if s:sub(-1)~="\n" then s=s.."\n" end
        return s:gmatch("(.-)\n")
end
lhf
  • 70,581
  • 9
  • 108
  • 149
4

See if this magiclines implementation suits your bill:

local function magiclines( str )
    local pos = 1;
    return function()
        if not pos then return nil end
        local  p1, p2 = string.find( str, "\r?\n", pos )
        local line
        if p1 then
            line = str:sub( pos, p1 - 1 )
            pos = p2 + 1
        else
            line = str:sub( pos )
            pos = nil
        end
        return line
    end
end

You can test it with the following code:

local text = [[
foo
bar

jim

woof
]]


for line in magiclines( text ) do
    print( line=="" and "(blank)" or line)
end

Output:

foo
bar
(blank)
jim
(blank)
woof
(blank)
4

Here’s a solution utilizing LPEG:

local lpeg      = require "lpeg"
local lpegmatch = lpeg.match
local P, C      = lpeg.P, lpeg.C

local iterlines
do
  local eol  = P"\r\n" + P"\n\r" + P"\n" + P"\r"
  local line = (1 - eol)^0
  iterlines = function (str, f)
    local lines = ((line / f) * eol)^0 * (line / f)
    return lpegmatch (lines, str)
  end
end

What you get is a function that can be used in place of an iterator. Its first argument is the string you want to iterate, the second is the action for each match:

--- print each line
iterlines ("foo\nbar\n\njim\n\r\r\nbaz\rfoo\n\nbuzz\n\n\n\n", print)

--- count lines while printf
local n = 0
iterlines ("foo\nbar\nbaz", function (line)
  n = n + 1
  io.write (string.format ("[%2d][%s]\n", n, line))
end)
Philipp Gesang
  • 496
  • 1
  • 6
  • 16
  • @dualed That was ambiguous, I removed the sentence. I was referring to the “loop body may accept empty string” error. Would it be possible to rewrite the pattern so that it accepts empty lines without that workaround? – Philipp Gesang Oct 12 '13 at 12:31
  • I've come across that error, but I'm not really sure what you did there as a workaround, you'll have to actually "capture" it though, otherwise I think lpeg will discard it -- see my solution :) – dualed Oct 12 '13 at 12:40
  • @dualed When analyzing your solution it made click, now the workaround is gone. Thanks! – Philipp Gesang Oct 12 '13 at 12:50
  • I see what you did there. I was looking and looking for the `C()` capture, but of course it must work with the function capture too... – dualed Oct 12 '13 at 13:45
4

Here is another lPeg solution because it seems I was writing it at the same time as phg. But since grammars are prettier, I'll still give it to you!

local lpeg = require "lpeg"
local C, V, P = lpeg.C, lpeg.V, lpeg.P

local g = P({ "S",
    S = (C(V("C")^0) * V("N"))^0 * C(V("C")^0),
    C = 1 - V("N"),
    N = P("\r\n") + "\n\r" + "\n" + "\r",
})

Use it like this:

local test = "Foo\n\nBar\rfoo\r\n\n\n\rbar"
for k,v in pairs({g:match(test)}) do
    print(">", v);
end

Or just print(g:match(test)) of course

dualed
  • 10,262
  • 1
  • 26
  • 29
  • I like that solution ``;-)`` But I think the first rule should read ``S = (C(V("C")^0) * V("N"))^0 * C(V("C")^0),`` in order for it to accept single lines without a newline char. Btw. you don’t really need a grammar for this. – Philipp Gesang Oct 12 '13 at 12:46
  • Yes, I think you are right about the first line. And of course you don't "need" a grammar, but I think its ... well more convenient to write and read - at least if you're used to reading formal syntax definitions. In any case a "grammar" in lPeg is only syntactic sugar for writing the same thing with local (or even global) variables. – dualed Oct 12 '13 at 13:35
3

The following pattern should match each line including blank lines with one caveat: the string must contain a terminating CR or LF.

local s = "foo\nbar\n\njim\n" -- added terminating \n

for line in s:gmatch("([^\r\n]*)[\r\n]") do
   print(line == "" and "(blank)" or line)
end

--> foo
--> bar
--> (blank)
--> jim

An alternate pattern that does not require a trailing CR or LF will produce a blank line as the last line (since is it acceptable to capture nothing).

local s = "foo\nbar\n\njim"

for line in s:gmatch("([^\r\n]*)[\r\n]?") do
   print(line == "" and "(blank)" or line)
end

--> foo
--> bar
--> (blank)
--> jim
--> (blank)
Adam
  • 3,053
  • 2
  • 26
  • 29
0
--  Iterate over all items in `text`, use it like this :
--      options :
--          separator = "patern string"    default "\n"
--          plain = boolean               default nil   for string.find
--          no_item = anything default nil, it's the return for previous_item [1] and next_item[last]

--  for item, state in TextIterator(s, { options }) do
--  -- item - contents without separator
--  -- state.item_num           - number of the item, starting from 1
--  -- state.previous_item      - the preceding item
--  -- state.next_item          - the following item
--  -- state.separator          - the matching separator
--  -- state.is_last                - it's the last item
--end
local function TextIterator(s, options)
    local options = options or {}
    local state = {text = s, begin=1, next_begin=1,
            item_num=-1, next_item=options.no_item, is_last=false}

    local function get_item(state)
        local text = state.text
        state.begin = state.next_begin
        state.previous_item = state.item
        state.item = state.next_item
        state.separator = state.next_separator
        state.item_num = state.item_num + 1
        if state.begin == -1 then
            state.next_item = options.no_item
            state.next_begin = -2
            state.is_last = true
            return state.item, state
        elseif state.begin == -2 then
            return nil
        end

        local b, e = text:find(options.separator,
            state.next_begin, options.plain)
        if b then
            if options.plain then
                state.next_separator = options.separator
            else
                state.next_separator = string.match(text,
                "("..options.separator..")", state.next_begin)
            end
            state.next_begin = e+1
            state.next_item = text:sub(state.begin,`enter code here`
                e-string.len(state.next_separator))
            return state.item, state
        else
            state.next_separator = ""
            state.next_begin = -1
            state.next_item = text:sub(state.begin)
            return state.item, state
        end
    end

    if not options.separator then options.separator = "\n" end

    get_item(state) -- initialize
    return get_item, state
end

txt = "a,b;c.d/e.f.g"

for item, state in TextIterator(txt, { separator="%p", plain=false, no_item=nil }) do
    print(item, state.item_num, state.separator,
        state.previous_item, state.next_item, state.is_last)
end

>lua -e "io.stdout:setvbuf 'no'" "txtiterator.lua" 
a   1   ,   nil b   false
b   2   ;   a   c   false
c   3   .   b   d   false
d   4   /   c   e   false
e   5   .   d   f   false
f   6   .   e   g   false
g   7       f   nil true
>Exit code: 0