diff --git a/src-test/Tests.hs b/src-test/Tests.hs index 14cdbd8..51f3cac 100644 --- a/src-test/Tests.hs +++ b/src-test/Tests.hs @@ -169,7 +169,10 @@ unitTests = testGroup "Unit-tests" , testCase "singleton" $ [ c | c <- [minBound..maxBound], IUT.singleton c /= IUT.fromText (T.singleton c) ] @?= [] , testCase "splitAtEnd" $ IUT.splitAtEnd 1 "€€" @?= ("€","€") - , testCase "split" $ IUT.split (== 'a') "aabbaca" @?= ["", "", "bb", "c", ""] + , testCase "split#1" $ IUT.split (== 'a') "aabbaca" @?= ["", "", "bb", "c", ""] + , testCase "split#2" $ IUT.split (const False) "aabbaca" @?= ["aabbaca"] + , testCase "split#3" $ IUT.split (const True) "abc" @?= ["","","",""] + , testCase "split#4" $ IUT.split (const True) "" @?= [""] , testCase "literal0" $ IUT.unpack testLit0 @?= [] , testCase "literal1" $ IUT.unpack testLit1 @?= ['€','\0','€','\0'] diff --git a/src/Data/Text/Short.hs b/src/Data/Text/Short.hs index 5f28cbf..3e9b1a0 100644 --- a/src/Data/Text/Short.hs +++ b/src/Data/Text/Short.hs @@ -325,30 +325,6 @@ dropWhile p = snd . span p dropWhileEnd :: (Char -> Bool) -> ShortText -> ShortText dropWhileEnd p = fst . spanEnd p --- | \(\mathcal{O}(n)\) Splits a string into components delimited by separators, --- where the predicate returns True for a separator element. The --- resulting components do not contain the separators. Two adjacent --- separators result in an empty component in the output. eg. --- --- >>> split (=='a') "aabbaca" --- ["","","bb","c",""] --- --- >>> split (=='a') "" --- [""] --- --- prop> intercalate (singleton c) (split (== c) t) = t --- --- __NOTE__: 'split' never returns an empty list to match the semantics of its counterpart from "Data.Text". --- --- @since 0.1.3 -split :: (Char -> Bool) -> ShortText -> [ShortText] -split p st0 = loop st0 - where - loop st = - let (x, rest) = span (not . p) st - in case uncons rest of - Nothing -> [st] - Just (_, rest') -> x : loop rest' -- $setup -- >>> :set -XOverloadedStrings diff --git a/src/Data/Text/Short/Internal.hs b/src/Data/Text/Short/Internal.hs index a3d2837..876985e 100644 --- a/src/Data/Text/Short/Internal.hs +++ b/src/Data/Text/Short/Internal.hs @@ -49,6 +49,7 @@ module Data.Text.Short.Internal , span , spanEnd + , split , intersperse , intercalate @@ -352,13 +353,41 @@ findIndex p st = go 0 0 !sz = toB st + +-- | \(\mathcal{O}(n)\) Splits a string into components delimited by separators, +-- where the predicate returns True for a separator element. The +-- resulting components do not contain the separators. Two adjacent +-- separators result in an empty component in the output. eg. +-- +-- >>> split (=='a') "aabbaca" +-- ["","","bb","c",""] +-- +-- >>> split (=='a') "" +-- [""] +-- +-- prop> intercalate (singleton c) (split (== c) t) = t +-- +-- __NOTE__: 'split' never returns an empty list to match the semantics of its counterpart from "Data.Text". +-- +-- @since 0.1.3 +split :: (Char -> Bool) -> ShortText -> [ShortText] +split p st0 = go 0 + where + go !ofs0 = case findOfs' p st0 ofs0 of + Just (ofs1,ofs2) -> slice st0 ofs0 (ofs1-ofs0) : go ofs2 + Nothing + | ofs0 == 0 -> st0 : [] + | otherwise -> slice st0 ofs0 (maxOfs-ofs0) : [] + + !maxOfs = toB st0 + -- internal helper {-# INLINE findOfs #-} findOfs :: (Char -> Bool) -> ShortText -> B -> Maybe B findOfs p st = go where go :: B -> Maybe B - go !ofs | ofs >= sz = Nothing + go !ofs | ofs >= sz = Nothing go !ofs | p c = Just ofs | otherwise = go ofs' where @@ -366,6 +395,20 @@ findOfs p st = go !sz = toB st +{-# INLINE findOfs' #-} +findOfs' :: (Char -> Bool) -> ShortText -> B -> Maybe (B,B) +findOfs' p st = go + where + go :: B -> Maybe (B,B) + go !ofs | ofs >= sz = Nothing + go !ofs | p c = Just (ofs,ofs') + | otherwise = go ofs' + where + (c,ofs') = decodeCharAtOfs st ofs + + !sz = toB st + + {-# INLINE findOfsRev #-} findOfsRev :: (Char -> Bool) -> ShortText -> B -> Maybe B findOfsRev p st = go @@ -770,7 +813,7 @@ foreign import ccall unsafe "hs_text_short_index_cp_rev" c_text_short_index_rev -- | \(\mathcal{O}(n)\) Split 'ShortText' into two halves. -- --- @'splitAtOfs n t@ returns a pair of 'ShortText' with the following properties: +-- @'splitAt' n t@ returns a pair of 'ShortText' with the following properties: -- -- prop> length (fst (splitAt n t)) == min (length t) (max 0 n) -- @@ -829,7 +872,7 @@ splitAtEnd i st splitAtOfs :: B -> ShortText -> (ShortText,ShortText) splitAtOfs ofs st | ofs == 0 = (mempty,st) - | ofs > stsz = (st,mempty) + | ofs >= stsz = (st,mempty) | otherwise = (slice st 0 ofs, slice st ofs (stsz-ofs)) where !stsz = toB st