A colleague has also been starting to learn F#, and today gave me a problem he’d been trying to solve in F# in a “functional style”. He wanted a function that took a string input, and returned a list of the words, tupled with the starting index.

I spent the whole of lunch staring at my screen, trying to wrap my head around List.fold, list.foldBack and other functions; but no joy.

While driving home, it occurred to me that I could do away with that nonsense, and just have a recursive function that passes all the values down, top-to-bottom!

Here’s my latest attempt… The results are correct, which is an improvement over my original attempt!

let splitOn f x = (Seq.takeWhile f x, Seq.skipWhile f x)
let isSpace c = c = ' '
let notSpace c = c <> ' '
let string (s : seq<char>) = new String(Seq.toArray s)

/// Get a list of all words in a string tupled with the starting index
let getWords input =

    let rec getWordsRec index results input =
        match input with
        | [] -> results
        | _ ->
            let white, rest = splitOn isSpace input
            let word, rest = splitOn notSpace rest
            getWordsRec
                (index + Seq.length white + Seq.length word)
                ((index + Seq.length white, string word) :: results)
                (Seq.toList rest)

    getWordsRec 0 [] (Seq.toList input) |> List.rev



let expected = [(0, "Hello"); (6, "test"); (12, "World")]
printfn "Result is %A" ((getWords "Hello test  World" = expected))

Can anyone come up with something more elegant?

Update

I managed to simplify/flatten it a little, though I’m not convinced it’s any more readable!

let string (s : seq<char>) = new String(Seq.toArray s)

let getWords input =
    input
    |> Seq.zip (Seq.initInfinite id) // Zip the list with indexes
    |> Seq.scan (fun (i1, x1) (i2, x2) -> // Scan; replacing  index with the previous index when not a space
        match x1 with
        | ' ' -> (i2, x2)
        | _ -> (i1, x2)
    ) (0, ' ')
    |> Seq.filter (fun (_, x) -> x <> ' ') // Strip the spaces
    |> Seq.groupBy (fun (i, _) -> i) // Group by the word start index
    |> Seq.map (fun (i, x) -> (i, x |> Seq.map snd |> string)) // Strip redundant indexes; convert to string