Skip to content

Commit

Permalink
Optimize split (follow-up to #13)
Browse files Browse the repository at this point in the history
  • Loading branch information
hvr committed Jul 20, 2019
1 parent 80e66db commit 0d24078
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 28 deletions.
5 changes: 4 additions & 1 deletion src-test/Tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,10 @@ unitTests = testGroup "Unit-tests"
, testCase "singleton" $ [ c | c <- [minBound..maxBound], IUT.singleton c /= IUT.fromText (T.singleton c) ] @?= []

, testCase "splitAtEnd" $ IUT.splitAtEnd 1 "€€" @?= ("","")
, testCase "split" $ IUT.split (== 'a') "aabbaca" @?= ["", "", "bb", "c", ""]
, testCase "split#1" $ IUT.split (== 'a') "aabbaca" @?= ["", "", "bb", "c", ""]
, testCase "split#2" $ IUT.split (const False) "aabbaca" @?= ["aabbaca"]
, testCase "split#3" $ IUT.split (const True) "abc" @?= ["","","",""]
, testCase "split#4" $ IUT.split (const True) "" @?= [""]

, testCase "literal0" $ IUT.unpack testLit0 @?= []
, testCase "literal1" $ IUT.unpack testLit1 @?= ['','\0','','\0']
Expand Down
24 changes: 0 additions & 24 deletions src/Data/Text/Short.hs
Original file line number Diff line number Diff line change
Expand Up @@ -325,30 +325,6 @@ dropWhile p = snd . span p
dropWhileEnd :: (Char -> Bool) -> ShortText -> ShortText
dropWhileEnd p = fst . spanEnd p

-- | \(\mathcal{O}(n)\) Splits a string into components delimited by separators,
-- where the predicate returns True for a separator element. The
-- resulting components do not contain the separators. Two adjacent
-- separators result in an empty component in the output. eg.
--
-- >>> split (=='a') "aabbaca"
-- ["","","bb","c",""]
--
-- >>> split (=='a') ""
-- [""]
--
-- prop> intercalate (singleton c) (split (== c) t) = t
--
-- __NOTE__: 'split' never returns an empty list to match the semantics of its counterpart from "Data.Text".
--
-- @since 0.1.3
split :: (Char -> Bool) -> ShortText -> [ShortText]
split p st0 = loop st0
where
loop st =
let (x, rest) = span (not . p) st
in case uncons rest of
Nothing -> [st]
Just (_, rest') -> x : loop rest'

-- $setup
-- >>> :set -XOverloadedStrings
Expand Down
49 changes: 46 additions & 3 deletions src/Data/Text/Short/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ module Data.Text.Short.Internal

, span
, spanEnd
, split

, intersperse
, intercalate
Expand Down Expand Up @@ -352,20 +353,62 @@ findIndex p st = go 0 0

!sz = toB st


-- | \(\mathcal{O}(n)\) Splits a string into components delimited by separators,
-- where the predicate returns True for a separator element. The
-- resulting components do not contain the separators. Two adjacent
-- separators result in an empty component in the output. eg.
--
-- >>> split (=='a') "aabbaca"
-- ["","","bb","c",""]
--
-- >>> split (=='a') ""
-- [""]
--
-- prop> intercalate (singleton c) (split (== c) t) = t
--
-- __NOTE__: 'split' never returns an empty list to match the semantics of its counterpart from "Data.Text".
--
-- @since 0.1.3
split :: (Char -> Bool) -> ShortText -> [ShortText]
split p st0 = go 0
where
go !ofs0 = case findOfs' p st0 ofs0 of
Just (ofs1,ofs2) -> slice st0 ofs0 (ofs1-ofs0) : go ofs2
Nothing
| ofs0 == 0 -> st0 : []
| otherwise -> slice st0 ofs0 (maxOfs-ofs0) : []

!maxOfs = toB st0

-- internal helper
{-# INLINE findOfs #-}
findOfs :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfs p st = go
where
go :: B -> Maybe B
go !ofs | ofs >= sz = Nothing
go !ofs | ofs >= sz = Nothing
go !ofs | p c = Just ofs
| otherwise = go ofs'
where
(c,ofs') = decodeCharAtOfs st ofs

!sz = toB st

{-# INLINE findOfs' #-}
findOfs' :: (Char -> Bool) -> ShortText -> B -> Maybe (B,B)
findOfs' p st = go
where
go :: B -> Maybe (B,B)
go !ofs | ofs >= sz = Nothing
go !ofs | p c = Just (ofs,ofs')
| otherwise = go ofs'
where
(c,ofs') = decodeCharAtOfs st ofs

!sz = toB st


{-# INLINE findOfsRev #-}
findOfsRev :: (Char -> Bool) -> ShortText -> B -> Maybe B
findOfsRev p st = go
Expand Down Expand Up @@ -770,7 +813,7 @@ foreign import ccall unsafe "hs_text_short_index_cp_rev" c_text_short_index_rev

-- | \(\mathcal{O}(n)\) Split 'ShortText' into two halves.
--
-- @'splitAtOfs n t@ returns a pair of 'ShortText' with the following properties:
-- @'splitAt' n t@ returns a pair of 'ShortText' with the following properties:
--
-- prop> length (fst (splitAt n t)) == min (length t) (max 0 n)
--
Expand Down Expand Up @@ -829,7 +872,7 @@ splitAtEnd i st
splitAtOfs :: B -> ShortText -> (ShortText,ShortText)
splitAtOfs ofs st
| ofs == 0 = (mempty,st)
| ofs > stsz = (st,mempty)
| ofs >= stsz = (st,mempty)
| otherwise = (slice st 0 ofs, slice st ofs (stsz-ofs))
where
!stsz = toB st
Expand Down

0 comments on commit 0d24078

Please sign in to comment.