{-# LANGUAGE BangPatterns      #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes        #-}

module System.IO.Streams.Internal.Search
  ( search
  , MatchInfo(..)
  ) where

------------------------------------------------------------------------------
import           Control.Monad               (when)
import           Control.Monad.IO.Class      (liftIO)
import           Control.Monad.ST            (ST)
import           Data.ByteString.Char8       (ByteString)
import qualified Data.ByteString.Char8       as S
import qualified Data.ByteString.Unsafe      as S
import qualified Data.Vector.Unboxed         as V
import qualified Data.Vector.Unboxed.Mutable as MV
import           Prelude                     (Bool (..), Either (..), Enum (..), Eq (..), IO, Int, Monad (..), Num (..), Ord (..), Show, either, id, maybe, not, otherwise, ($), ($!), (&&), (.), (||))
------------------------------------------------------------------------------
import           System.IO.Streams.Internal  (InputStream)
import qualified System.IO.Streams.Internal  as Streams


------------------------------------------------------------------------------
-- | 'MatchInfo' provides match information when performing string search.
data MatchInfo = Match   {-# UNPACK #-} !ByteString
               | NoMatch {-# UNPACK #-} !ByteString
  deriving (Int -> MatchInfo -> ShowS
[MatchInfo] -> ShowS
MatchInfo -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [MatchInfo] -> ShowS
$cshowList :: [MatchInfo] -> ShowS
show :: MatchInfo -> String
$cshow :: MatchInfo -> String
showsPrec :: Int -> MatchInfo -> ShowS
$cshowsPrec :: Int -> MatchInfo -> ShowS
Show, MatchInfo -> MatchInfo -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: MatchInfo -> MatchInfo -> Bool
$c/= :: MatchInfo -> MatchInfo -> Bool
== :: MatchInfo -> MatchInfo -> Bool
$c== :: MatchInfo -> MatchInfo -> Bool
Eq)


------------------------------------------------------------------------------
-- | Does the given needle match the haystack over the given ranges of indices?
matches :: ByteString     -- ^ needle
        -> Int            -- ^ needle start
        -> Int            -- ^ needle end (inclusive)
        -> ByteString     -- ^ haystack
        -> Int            -- ^ haystack start
        -> Int            -- ^ haystack end (inclusive)
        -> Bool
matches :: ByteString -> Int -> Int -> ByteString -> Int -> Int -> Bool
matches !ByteString
needle !Int
nstart !Int
nend' !ByteString
haystack !Int
hstart !Int
hend' =
    Int -> Int -> Bool
go Int
nend' Int
hend'
  where
    go :: Int -> Int -> Bool
go !Int
nend !Int
hend =
        if Int
nend forall a. Ord a => a -> a -> Bool
< Int
nstart Bool -> Bool -> Bool
|| Int
hend forall a. Ord a => a -> a -> Bool
< Int
hstart
          then Bool
True
          else let !nc :: Word8
nc = ByteString -> Int -> Word8
S.unsafeIndex ByteString
needle Int
nend
                   !hc :: Word8
hc = ByteString -> Int -> Word8
S.unsafeIndex ByteString
haystack Int
hend
               in if Word8
nc forall a. Eq a => a -> a -> Bool
/= Word8
hc
                    then Bool
False
                    else Int -> Int -> Bool
go (Int
nendforall a. Num a => a -> a -> a
-Int
1) (Int
hendforall a. Num a => a -> a -> a
-Int
1)
{-# INLINE matches #-}


------------------------------------------------------------------------------
-- | Given a 'ByteString' to look for (the \"needle\") and an 'InputStream',
-- produces a new 'InputStream' which yields data of type 'MatchInfo'.
--
-- Example:
--
-- @
-- ghci> 'System.IO.Streams.fromList' [\"food\", \"oof\", \"oodles\", \"ok\"] >>=
--       'search' \"foo\" >>= 'System.IO.Streams.toList'
-- ['Match' \"foo\",'NoMatch' \"d\",'NoMatch' \"oo\",'Match' \"foo\",'NoMatch' \"dlesok\"]
-- @
--
-- Uses the Boyer-Moore-Horspool algorithm
-- (<http://en.wikipedia.org/wiki/Boyer%E2%80%93Moore%E2%80%93Horspool_algorithm>).
search :: ByteString                   -- ^ \"needle\" to look for
       -> InputStream ByteString       -- ^ input stream to wrap
       -> IO (InputStream MatchInfo)
search :: ByteString -> InputStream ByteString -> IO (InputStream MatchInfo)
search ByteString
needle InputStream ByteString
stream = forall r a. Generator r a -> IO (InputStream r)
Streams.fromGenerator forall a b. (a -> b) -> a -> b
$
                       forall {m :: * -> *}.
MonadIO m =>
Int -> m (Either ByteString ByteString)
lookahead Int
nlen forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either ByteString -> Generator MatchInfo ()
finishAndEOF ByteString -> Generator MatchInfo ()
startSearch

  where
    --------------------------------------------------------------------------
    finishAndEOF :: ByteString -> Generator MatchInfo ()
finishAndEOF ByteString
x = if ByteString -> Bool
S.null ByteString
x
                       then forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! ()
                       else forall r. r -> Generator r ()
Streams.yield forall a b. (a -> b) -> a -> b
$! ByteString -> MatchInfo
NoMatch ByteString
x

    --------------------------------------------------------------------------
    startSearch :: ByteString -> Generator MatchInfo ()
startSearch !ByteString
haystack =
        if ByteString -> Bool
S.null ByteString
haystack
          then forall {m :: * -> *}.
MonadIO m =>
Int -> m (Either ByteString ByteString)
lookahead Int
nlen forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either ByteString -> Generator MatchInfo ()
finishAndEOF ByteString -> Generator MatchInfo ()
startSearch
          else Int -> Generator MatchInfo ()
go Int
0

      where
        ----------------------------------------------------------------------
        !hlen :: Int
hlen = ByteString -> Int
S.length ByteString
haystack

        ----------------------------------------------------------------------
        go :: Int -> Generator MatchInfo ()
go !Int
hidx
          | Int
hend forall a. Ord a => a -> a -> Bool
>= Int
hlen = Int -> Generator MatchInfo ()
crossBound Int
hidx
          | Bool
otherwise = do
              let match :: Bool
match = ByteString -> Int -> Int -> ByteString -> Int -> Int -> Bool
matches ByteString
needle Int
0 Int
lastIdx ByteString
haystack Int
hidx Int
hend
              if Bool
match
                then do
                  let !nomatch :: ByteString
nomatch    = Int -> ByteString -> ByteString
S.take Int
hidx ByteString
haystack
                  let !aftermatch :: ByteString
aftermatch = Int -> ByteString -> ByteString
S.drop (Int
hend forall a. Num a => a -> a -> a
+ Int
1) ByteString
haystack

                  ByteString -> ByteString -> Generator MatchInfo ()
produceMatch ByteString
nomatch ByteString
aftermatch
                else do
                  -- skip ahead
                  let c :: Word8
c = ByteString -> Int -> Word8
S.unsafeIndex ByteString
haystack Int
hend
                  let !skip :: Int
skip = forall a. Unbox a => Vector a -> Int -> a
V.unsafeIndex Vector Int
table forall a b. (a -> b) -> a -> b
$ forall a. Enum a => a -> Int
fromEnum Word8
c
                  Int -> Generator MatchInfo ()
go (Int
hidx forall a. Num a => a -> a -> a
+ Int
skip)

          where
            !hend :: Int
hend = Int
hidx forall a. Num a => a -> a -> a
+ Int
nlen forall a. Num a => a -> a -> a
- Int
1

        ----------------------------------------------------------------------
        mkCoeff :: Int -> (Int, Int)
mkCoeff Int
hidx = let !ll :: Int
ll = Int
hlen forall a. Num a => a -> a -> a
- Int
hidx
                           !nm :: Int
nm = Int
nlen forall a. Num a => a -> a -> a
- Int
ll
                       in (Int
ll, Int
nm)

        ----------------------------------------------------------------------
        crossBound :: Int -> Generator MatchInfo ()
crossBound !Int
hidx0 = do
            let (!Int
leftLen, Int
needMore) = Int -> (Int, Int)
mkCoeff Int
hidx0

            forall {m :: * -> *}.
MonadIO m =>
Int -> m (Either ByteString ByteString)
lookahead Int
needMore forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
              forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\ByteString
s -> ByteString -> Generator MatchInfo ()
finishAndEOF forall a b. (a -> b) -> a -> b
$ ByteString -> ByteString -> ByteString
S.append ByteString
haystack ByteString
s)
                     (Int -> Int -> Int -> ByteString -> Generator MatchInfo ()
runNext Int
hidx0 Int
leftLen Int
needMore)

          where
            runNext :: Int -> Int -> Int -> ByteString -> Generator MatchInfo ()
runNext !Int
hidx !Int
leftLen !Int
needMore !ByteString
nextHaystack = do
                let match1 :: Bool
match1 = ByteString -> Int -> Int -> ByteString -> Int -> Int -> Bool
matches ByteString
needle Int
leftLen Int
lastIdx ByteString
nextHaystack Int
0
                                     (Int
needMoreforall a. Num a => a -> a -> a
-Int
1)
                let match2 :: Bool
match2 = ByteString -> Int -> Int -> ByteString -> Int -> Int -> Bool
matches ByteString
needle Int
0 (Int
leftLenforall a. Num a => a -> a -> a
-Int
1) ByteString
haystack Int
hidx
                                     (Int
hlenforall a. Num a => a -> a -> a
-Int
1)

                if Bool
match1 Bool -> Bool -> Bool
&& Bool
match2
                  then do
                    let !nomatch :: ByteString
nomatch = Int -> ByteString -> ByteString
S.take Int
hidx ByteString
haystack
                    let !aftermatch :: ByteString
aftermatch = Int -> ByteString -> ByteString
S.drop Int
needMore ByteString
nextHaystack

                    ByteString -> ByteString -> Generator MatchInfo ()
produceMatch ByteString
nomatch ByteString
aftermatch

                  else do
                    let c :: Word8
c = ByteString -> Int -> Word8
S.unsafeIndex ByteString
nextHaystack forall a b. (a -> b) -> a -> b
$ Int
needMore forall a. Num a => a -> a -> a
- Int
1
                    let p :: Int
p = forall a. Unbox a => Vector a -> Int -> a
V.unsafeIndex Vector Int
table (forall a. Enum a => a -> Int
fromEnum Word8
c)

                    if Int
p forall a. Ord a => a -> a -> Bool
< Int
leftLen
                      then do
                        let !hidx' :: Int
hidx' = Int
hidx forall a. Num a => a -> a -> a
+ Int
p
                        let (!Int
leftLen', Int
needMore') = Int -> (Int, Int)
mkCoeff Int
hidx'
                        let !nextlen :: Int
nextlen = ByteString -> Int
S.length ByteString
nextHaystack
                        if Int
nextlen forall a. Ord a => a -> a -> Bool
< Int
needMore'
                          then
                            -- this should be impossibly rare
                            forall {m :: * -> *}.
MonadIO m =>
Int -> m (Either ByteString ByteString)
lookahead (Int
needMore' forall a. Num a => a -> a -> a
- Int
nextlen) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
                              forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\ByteString
s -> ByteString -> Generator MatchInfo ()
finishAndEOF forall a b. (a -> b) -> a -> b
$
                                            [ByteString] -> ByteString
S.concat [ ByteString
haystack
                                                     , ByteString
nextHaystack
                                                     , ByteString
s ])
                                     (\ByteString
s -> Int -> Int -> Int -> ByteString -> Generator MatchInfo ()
runNext Int
hidx' Int
leftLen' Int
needMore' forall a b. (a -> b) -> a -> b
$
                                            ByteString -> ByteString -> ByteString
S.append ByteString
nextHaystack ByteString
s)
                          else Int -> Int -> Int -> ByteString -> Generator MatchInfo ()
runNext Int
hidx' Int
leftLen' Int
needMore' ByteString
nextHaystack
                      else do
                          let sidx :: Int
sidx = Int
p forall a. Num a => a -> a -> a
- Int
leftLen
                          let (!ByteString
crumb, ByteString
rest) = Int -> ByteString -> (ByteString, ByteString)
S.splitAt Int
sidx ByteString
nextHaystack
                          forall r. r -> Generator r ()
Streams.yield forall a b. (a -> b) -> a -> b
$! ByteString -> MatchInfo
NoMatch forall a b. (a -> b) -> a -> b
$! ByteString -> ByteString -> ByteString
S.append ByteString
haystack ByteString
crumb
                          ByteString -> Generator MatchInfo ()
startSearch ByteString
rest

    --------------------------------------------------------------------------
    produceMatch :: ByteString -> ByteString -> Generator MatchInfo ()
produceMatch ByteString
nomatch ByteString
aftermatch = do
        forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ ByteString -> Bool
S.null ByteString
nomatch) forall a b. (a -> b) -> a -> b
$ forall r. r -> Generator r ()
Streams.yield forall a b. (a -> b) -> a -> b
$! ByteString -> MatchInfo
NoMatch ByteString
nomatch
        forall r. r -> Generator r ()
Streams.yield forall a b. (a -> b) -> a -> b
$! ByteString -> MatchInfo
Match ByteString
needle
        ByteString -> Generator MatchInfo ()
startSearch ByteString
aftermatch

    --------------------------------------------------------------------------
    !nlen :: Int
nlen    = ByteString -> Int
S.length ByteString
needle
    !lastIdx :: Int
lastIdx = Int
nlen forall a. Num a => a -> a -> a
- Int
1

    --------------------------------------------------------------------------
    !table :: Vector Int
table = forall a. Unbox a => (forall s. ST s (MVector s a)) -> Vector a
V.create forall a b. (a -> b) -> a -> b
$ do
        MVector s Int
t <- forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
MV.replicate Int
256 Int
nlen
        forall s. MVector s Int -> ST s (MVector s Int)
go MVector s Int
t

      where
        go :: forall s . MV.MVector s Int -> ST s (MV.MVector s Int)
        go :: forall s. MVector s Int -> ST s (MVector s Int)
go !MVector s Int
t = Int -> ST s (MVector s Int)
go' Int
0
          where
            go' :: Int -> ST s (MVector s Int)
go' !Int
i | Int
i forall a. Ord a => a -> a -> Bool
>= Int
lastIdx  = forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Int
t
                   | Bool
otherwise     = do
                let c :: Int
c = forall a. Enum a => a -> Int
fromEnum forall a b. (a -> b) -> a -> b
$ ByteString -> Int -> Word8
S.unsafeIndex ByteString
needle Int
i
                forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> Int -> a -> m ()
MV.unsafeWrite MVector s Int
t Int
c (Int
lastIdx forall a. Num a => a -> a -> a
- Int
i)
                Int -> ST s (MVector s Int)
go' forall a b. (a -> b) -> a -> b
$! Int
iforall a. Num a => a -> a -> a
+Int
1

    --------------------------------------------------------------------------
    lookahead :: Int -> m (Either ByteString ByteString)
lookahead Int
n = forall {m :: * -> *}.
MonadIO m =>
([ByteString] -> [ByteString])
-> Int -> m (Either ByteString ByteString)
go forall a. a -> a
id Int
n
      where
        go :: ([ByteString] -> [ByteString])
-> Int -> m (Either ByteString ByteString)
go [ByteString] -> [ByteString]
dlist !Int
k = forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (forall a. InputStream a -> IO (Maybe a)
Streams.read InputStream ByteString
stream) forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe forall {b}. m (Either ByteString b)
eof ByteString -> m (Either ByteString ByteString)
chunk
          where
            eof :: m (Either ByteString b)
eof = forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$! [ByteString] -> ByteString
S.concat forall a b. (a -> b) -> a -> b
$ [ByteString] -> [ByteString]
dlist []

            chunk :: ByteString -> m (Either ByteString ByteString)
chunk ByteString
x = if Int
r forall a. Ord a => a -> a -> Bool
<= Int
0
                        then forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$! forall a b. b -> Either a b
Right forall a b. (a -> b) -> a -> b
$! [ByteString] -> ByteString
S.concat forall a b. (a -> b) -> a -> b
$ [ByteString] -> [ByteString]
d' []
                        else ([ByteString] -> [ByteString])
-> Int -> m (Either ByteString ByteString)
go [ByteString] -> [ByteString]
d' Int
r
              where
                l :: Int
l  = ByteString -> Int
S.length ByteString
x
                r :: Int
r  = Int
k forall a. Num a => a -> a -> a
- Int
l
                d' :: [ByteString] -> [ByteString]
d' = [ByteString] -> [ByteString]
dlist forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString
xforall a. a -> [a] -> [a]
:)