{-# LANGUAGE BangPatterns #-}
module Data.ByteString.Lazy.Search.KarpRabin (
indicesOfAny
) where
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Unsafe (unsafeIndex)
import qualified Data.IntMap as IM
import Data.Array
import Data.Array.Base (unsafeAt)
import Data.Word (Word8)
import Data.Int (Int64)
import Data.Bits
import Data.List (foldl')
{-# INLINE indicesOfAny #-}
indicesOfAny :: [S.ByteString]
-> L.ByteString
-> [(Int64,[Int])]
indicesOfAny :: [ByteString] -> ByteString -> [(Int64, [Int])]
indicesOfAny [ByteString]
pats
| forall (t :: * -> *) a. Foldable t => t a -> Bool
null [ByteString]
nepats = forall a b. a -> b -> a
const []
| Bool
otherwise = [ByteString] -> [ByteString] -> [(Int64, [Int])]
lazyMatcher [ByteString]
nepats forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
L.toChunks
where
nepats :: [ByteString]
nepats = forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Bool
S.null) [ByteString]
pats
{-# INLINE rehash1 #-}
rehash1 :: Int -> Int -> Word8 -> Word8 -> Int
rehash1 :: Int -> Int -> Word8 -> Word8 -> Int
rehash1 Int
out Int
h Word8
o Word8
n =
(Int
h forall a. Bits a => a -> Int -> a
`shiftL` Int
1 forall a. Num a => a -> a -> a
- (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
o forall a. Bits a => a -> Int -> a
`shiftL` Int
out)) forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
n
{-# INLINE rehash2 #-}
rehash2 :: Int -> Int -> Word8 -> Word8 -> Int
rehash2 :: Int -> Int -> Word8 -> Word8 -> Int
rehash2 Int
out Int
h Word8
o Word8
n =
(Int
h forall a. Bits a => a -> Int -> a
`shiftL` Int
2 forall a. Num a => a -> a -> a
- (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
o forall a. Bits a => a -> Int -> a
`shiftL` Int
out)) forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
n
{-# INLINE rehash3 #-}
rehash3 :: Int -> Int -> Word8 -> Word8 -> Int
rehash3 :: Int -> Int -> Word8 -> Word8 -> Int
rehash3 Int
out Int
h Word8
o Word8
n =
(Int
h forall a. Bits a => a -> Int -> a
`shiftL` Int
3 forall a. Num a => a -> a -> a
- (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
o forall a. Bits a => a -> Int -> a
`shiftL` Int
out)) forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
n
{-# INLINE rehash4 #-}
rehash4 :: Int -> Int -> Word8 -> Word8 -> Int
rehash4 :: Int -> Int -> Word8 -> Word8 -> Int
rehash4 Int
out Int
h Word8
o Word8
n =
(Int
h forall a. Bits a => a -> Int -> a
`shiftL` Int
4 forall a. Num a => a -> a -> a
- (forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
o forall a. Bits a => a -> Int -> a
`shiftL` Int
out)) forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
n
lazyMatcher :: [S.ByteString] -> [S.ByteString] -> [(Int64,[Int])]
lazyMatcher :: [ByteString] -> [ByteString] -> [(Int64, [Int])]
lazyMatcher [ByteString]
pats = forall {a}.
Num a =>
Int -> Int -> ByteString -> [ByteString] -> [(a, [Int])]
search Int
0 Int
hLen ByteString
S.empty
where
!hLen :: Int
hLen = forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum (Int
32 forall a. a -> [a] -> [a]
: forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Int
S.length [ByteString]
pats)
!shDi :: Int
shDi = case Int
32 forall a. Integral a => a -> a -> a
`quot` Int
hLen of
Int
q | Int
q forall a. Ord a => a -> a -> Bool
< Int
4 -> Int
q
| Bool
otherwise -> Int
4
!outS :: Int
outS = Int
shDiforall a. Num a => a -> a -> a
*Int
hLen
!patNum :: Int
patNum = forall (t :: * -> *) a. Foldable t => t a -> Int
length [ByteString]
pats
!patArr :: Array Int ByteString
patArr = forall i e. Ix i => (i, i) -> [e] -> Array i e
listArray (Int
0, Int
patNum forall a. Num a => a -> a -> a
- Int
1) [ByteString]
pats
{-# INLINE rehash #-}
rehash :: Int -> Word8 -> Word8 -> Int
rehash :: Int -> Word8 -> Word8 -> Int
rehash = case Int
shDi of
Int
1 -> Int -> Int -> Word8 -> Word8 -> Int
rehash1 Int
hLen
Int
2 -> Int -> Int -> Word8 -> Word8 -> Int
rehash2 Int
outS
Int
3 -> Int -> Int -> Word8 -> Word8 -> Int
rehash3 Int
outS
Int
_ -> Int -> Int -> Word8 -> Word8 -> Int
rehash4 Int
outS
hash :: S.ByteString -> Int
hash :: ByteString -> Int
hash = forall a. (a -> Word8 -> a) -> a -> ByteString -> a
S.foldl' (\Int
h Word8
w -> (Int
h forall a. Bits a => a -> Int -> a
`shiftL` Int
shDi) forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w) Int
0 forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ByteString -> ByteString
S.take Int
hLen
!hashMap :: IntMap [Int]
hashMap =
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (\IntMap [Int]
mp (Int
h,Int
i) -> forall a. (a -> a -> a) -> Int -> a -> IntMap a -> IntMap a
IM.insertWith (forall a b c. (a -> b -> c) -> b -> a -> c
flip forall a. [a] -> [a] -> [a]
(++)) Int
h [Int
i] IntMap [Int]
mp) forall a. IntMap a
IM.empty forall a b. (a -> b) -> a -> b
$
forall a b. [a] -> [b] -> [(a, b)]
zip (forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Int
hash [ByteString]
pats) [Int
0 :: Int .. ]
search :: Int -> Int -> ByteString -> [ByteString] -> [(a, [Int])]
search Int
_ Int
_ ByteString
_ [] = []
search !Int
h !Int
rm !ByteString
prev (!ByteString
str : [ByteString]
rest)
| Int
strLen forall a. Ord a => a -> a -> Bool
< Int
rm =
let !h' :: Int
h' = forall a. (a -> Word8 -> a) -> a -> ByteString -> a
S.foldl' (\Int
o Word8
w -> (Int
o forall a. Bits a => a -> Int -> a
`shiftL` Int
1) forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w) Int
h ByteString
str
!prev' :: ByteString
prev' = ByteString -> ByteString -> ByteString
S.append ByteString
prev ByteString
str
in Int -> Int -> ByteString -> [ByteString] -> [(a, [Int])]
search Int
h' (Int
rm forall a. Num a => a -> a -> a
- Int
strLen) ByteString
prev' [ByteString]
rest
| Bool
otherwise =
let !h' :: Int
h' = forall a. (a -> Word8 -> a) -> a -> ByteString -> a
S.foldl' (\Int
o Word8
w -> (Int
o forall a. Bits a => a -> Int -> a
`shiftL` Int
1) forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
w) Int
h
(Int -> ByteString -> ByteString
S.take Int
rm ByteString
str)
in if ByteString -> Bool
S.null ByteString
prev
then forall {a}.
Num a =>
a -> [ByteString] -> ByteString -> Int -> [(a, [Int])]
noPast a
0 [ByteString]
rest ByteString
str Int
h'
else forall {a}.
Num a =>
a
-> [ByteString]
-> ByteString
-> Int
-> ByteString
-> Int
-> Int
-> [(a, [Int])]
past a
0 [ByteString]
rest ByteString
prev Int
0 ByteString
str Int
rm Int
h'
where
!strLen :: Int
strLen = ByteString -> Int
S.length ByteString
str
noPast :: a -> [ByteString] -> ByteString -> Int -> [(a, [Int])]
noPast !a
prior [ByteString]
rest !ByteString
str Int
hsh = Int -> Int -> [(a, [Int])]
go Int
hsh Int
0
where
!strLen :: Int
strLen = ByteString -> Int
S.length ByteString
str
!maxIdx :: Int
maxIdx = Int
strLen forall a. Num a => a -> a -> a
- Int
hLen
{-# INLINE strAt #-}
strAt :: Int -> Word8
strAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
i
go :: Int -> Int -> [(a, [Int])]
go !Int
h Int
sI =
case forall a. Int -> IntMap a -> Maybe a
IM.lookup Int
h IntMap [Int]
hashMap of
Maybe [Int]
Nothing ->
if Int
sI forall a. Eq a => a -> a -> Bool
== Int
maxIdx
then case [ByteString]
rest of
[] -> []
(ByteString
nxt : [ByteString]
more) ->
let !h' :: Int
h' = Int -> Word8 -> Word8 -> Int
rehash Int
h (Int -> Word8
strAt Int
sI) (ByteString -> Int -> Word8
unsafeIndex ByteString
nxt Int
0)
!prior' :: a
prior' = a
prior forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
strLen
!prev :: ByteString
prev = Int -> ByteString -> ByteString
S.drop (Int
sI forall a. Num a => a -> a -> a
+ Int
1) ByteString
str
in if Int
hLen forall a. Eq a => a -> a -> Bool
== Int
1
then a -> [ByteString] -> ByteString -> Int -> [(a, [Int])]
noPast a
prior' [ByteString]
more ByteString
nxt Int
h'
else a
-> [ByteString]
-> ByteString
-> Int
-> ByteString
-> Int
-> Int
-> [(a, [Int])]
past a
prior' [ByteString]
more ByteString
prev Int
0 ByteString
nxt Int
1 Int
h'
else Int -> Int -> [(a, [Int])]
go (Int -> Word8 -> Word8 -> Int
rehash Int
h (Int -> Word8
strAt Int
sI) (Int -> Word8
strAt (Int
sI forall a. Num a => a -> a -> a
+ Int
hLen))) (Int
sI forall a. Num a => a -> a -> a
+ Int
1)
Just [Int]
ps ->
let !rst :: ByteString
rst = Int -> ByteString -> ByteString
S.drop Int
sI ByteString
str
!rLen :: Int
rLen = Int
strLen forall a. Num a => a -> a -> a
- Int
sI
{-# INLINE hd #-}
hd :: Word8
hd = Int -> Word8
strAt Int
sI
{-# INLINE more #-}
more :: [(a, [Int])]
more =
if Int
sI forall a. Eq a => a -> a -> Bool
== Int
maxIdx
then case [ByteString]
rest of
[] -> []
(ByteString
nxt : [ByteString]
fut) ->
let !h' :: Int
h' = Int -> Word8 -> Word8 -> Int
rehash Int
h Word8
hd (ByteString -> Int -> Word8
unsafeIndex ByteString
nxt Int
0)
!prior' :: a
prior' = a
prior forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
strLen
in if Int
hLen forall a. Eq a => a -> a -> Bool
== Int
1
then a -> [ByteString] -> ByteString -> Int -> [(a, [Int])]
noPast a
prior' [ByteString]
fut ByteString
nxt Int
h'
else a
-> [ByteString]
-> ByteString
-> Int
-> ByteString
-> Int
-> Int
-> [(a, [Int])]
past a
prior' [ByteString]
fut ByteString
rst Int
1 ByteString
nxt Int
1 Int
h'
else Int -> Int -> [(a, [Int])]
go (Int -> Word8 -> Word8 -> Int
rehash Int
h Word8
hd (Int -> Word8
strAt (Int
sI forall a. Num a => a -> a -> a
+ Int
hLen))) (Int
sI forall a. Num a => a -> a -> a
+ Int
1)
okay :: ByteString -> Bool
okay ByteString
bs
| Int
rLen forall a. Ord a => a -> a -> Bool
< ByteString -> Int
S.length ByteString
bs = ByteString -> ByteString -> Bool
S.isPrefixOf ByteString
rst ByteString
bs Bool -> Bool -> Bool
&&
ByteString -> [ByteString] -> Bool
checkFut (Int -> ByteString -> ByteString
S.drop Int
rLen ByteString
bs) [ByteString]
rest
| Bool
otherwise = ByteString -> ByteString -> Bool
S.isPrefixOf ByteString
bs ByteString
rst
in case forall a. (a -> Bool) -> [a] -> [a]
filter (ByteString -> Bool
okay forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Array Int ByteString
patArr forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
`unsafeAt`)) [Int]
ps of
[] -> [(a, [Int])]
more
[Int]
qs -> seq :: forall a b. a -> b -> b
seq (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
qs) forall a b. (a -> b) -> a -> b
$
(a
prior forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
sI,[Int]
qs) forall a. a -> [a] -> [a]
: [(a, [Int])]
more
past :: a
-> [ByteString]
-> ByteString
-> Int
-> ByteString
-> Int
-> Int
-> [(a, [Int])]
past !a
prior [ByteString]
rest !ByteString
prev !Int
pI !ByteString
str !Int
sI !Int
hsh
| Int
strLen forall a. Ord a => a -> a -> Bool
< Int
4040 =
let !prior' :: a
prior' = a
prior forall a. Num a => a -> a -> a
- a
1 forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
sI forall a. Num a => a -> a -> a
- Int
hLen)
!curr :: ByteString
curr = ByteString -> ByteString -> ByteString
S.append (Int -> ByteString -> ByteString
S.drop Int
pI ByteString
prev) ByteString
str
in a -> [ByteString] -> ByteString -> Int -> [(a, [Int])]
noPast a
prior' [ByteString]
rest ByteString
curr Int
hsh
| Bool
otherwise = Int -> Int -> Int -> [(a, [Int])]
go Int
hsh Int
pI Int
sI
where
!strLen :: Int
strLen = ByteString -> Int
S.length ByteString
str
{-# INLINE strAt #-}
strAt :: Int -> Word8
strAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
str Int
i
{-# INLINE prevAt #-}
prevAt :: Int -> Word8
prevAt !Int
i = ByteString -> Int -> Word8
unsafeIndex ByteString
prev Int
i
go :: Int -> Int -> Int -> [(a, [Int])]
go !Int
h !Int
p !Int
s
| Int
s forall a. Eq a => a -> a -> Bool
== Int
hLen = a -> [ByteString] -> ByteString -> Int -> [(a, [Int])]
noPast a
prior [ByteString]
rest ByteString
str Int
h
| Bool
otherwise =
case forall a. Int -> IntMap a -> Maybe a
IM.lookup Int
h IntMap [Int]
hashMap of
Maybe [Int]
Nothing ->
let {-# INLINE h' #-}
h' :: Int
h' = Int -> Word8 -> Word8 -> Int
rehash Int
h (Int -> Word8
prevAt Int
p) (Int -> Word8
strAt Int
s)
in Int -> Int -> Int -> [(a, [Int])]
go Int
h' (Int
p forall a. Num a => a -> a -> a
+ Int
1) (Int
s forall a. Num a => a -> a -> a
+ Int
1)
Just [Int]
ps ->
let !prst :: ByteString
prst = Int -> ByteString -> ByteString
S.drop Int
p ByteString
prev
{-# INLINE more #-}
more :: [(a, [Int])]
more = Int -> Int -> Int -> [(a, [Int])]
go (Int -> Word8 -> Word8 -> Int
rehash Int
h (Int -> Word8
prevAt Int
p) (Int -> Word8
strAt Int
s)) (Int
p forall a. Num a => a -> a -> a
+ Int
1) (Int
s forall a. Num a => a -> a -> a
+ Int
1)
okay :: ByteString -> Bool
okay ByteString
bs = ByteString -> [ByteString] -> Bool
checkFut ByteString
bs (ByteString
prst forall a. a -> [a] -> [a]
: ByteString
str forall a. a -> [a] -> [a]
: [ByteString]
rest)
in case forall a. (a -> Bool) -> [a] -> [a]
filter (ByteString -> Bool
okay forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (a :: * -> * -> *) e i.
(IArray a e, Ix i) =>
a i e -> Int -> e
unsafeAt Array Int ByteString
patArr)) [Int]
ps of
[] -> [(a, [Int])]
more
[Int]
qs -> seq :: forall a b. a -> b -> b
seq (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Int]
qs) forall a b. (a -> b) -> a -> b
$
(a
prior forall a. Num a => a -> a -> a
+ forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int
s forall a. Num a => a -> a -> a
- Int
hLen), [Int]
qs) forall a. a -> [a] -> [a]
: [(a, [Int])]
more
{-# INLINE checkFut #-}
checkFut :: S.ByteString -> [S.ByteString] -> Bool
checkFut :: ByteString -> [ByteString] -> Bool
checkFut ByteString
_ [] = Bool
False
checkFut !ByteString
bs (!ByteString
h : [ByteString]
t)
| Int
hLen forall a. Ord a => a -> a -> Bool
< ByteString -> Int
S.length ByteString
bs = ByteString -> ByteString -> Bool
S.isPrefixOf ByteString
h ByteString
bs Bool -> Bool -> Bool
&& ByteString -> [ByteString] -> Bool
checkFut (Int -> ByteString -> ByteString
S.drop Int
hLen ByteString
bs) [ByteString]
t
| Bool
otherwise = ByteString -> ByteString -> Bool
S.isPrefixOf ByteString
bs ByteString
h
where
!hLen :: Int
hLen = ByteString -> Int
S.length ByteString
h