# 字符串搜索的 Horspool 算法

Mar 14, 2015

abcbabababab
cbabab

abcbabababab
cbabab

needle 里第一个 b 不行，因为会错过解；最后一个 b 也不行，因为 needle 没有动。所以除了 needle 里最后一个位置之外，倒数第一个配对的字符才是正确的选择。

int jmp_table[UCHAR_MAX+1];
for(int i = 0; i <= UCHAR_MAX; i++)
jmp_table[i] = nlen;
int last = nlen - 1;
for(int i = 0; i < last; i++)
jmp_table[needle[i]] = last - i;

int horspool(char *haystack, char *needle) {
int hlen = strlen(haystack), nlen = strlen(needle);
int jmp_table[UCHAR_MAX+1];
for(int i = 0; i <= UCHAR_MAX; i++)
jmp_table[i] = nlen;
int last = nlen - 1;
for(int i = 0; i < last; i++)
jmp_table[needle[i]] = last - i;
int j = 0;
while(j <= hlen - nlen){
char c = haystack[j + last];
int i = last;
while(i >= 0 && haystack[j + i] == needle[i]) --i;
if(i == -1) return j;
j += jmp_table[c];
}
return -1;
}

import qualified Data.Map as M (findWithDefault, fromList)
import qualified Data.ByteString as BS

horspool :: BS.ByteString -> BS.ByteString -> Maybe Int
horspool s p = f (lenp - 1) (lenp - 1) (lenp - 1)
where lenp = BS.length p
lens = BS.length s
jmptbl = M.fromList \$ zip (BS.unpack p) [lenp - 1, lenp - 2 .. 1]
jmp x = M.findWithDefault lenp x jmptbl
f i j k
| i >= lens = Nothing
| j == -1 = Just (i + 1)
| s `BS.index` i == p `BS.index` j = f (i-1) (j-1) k
| otherwise = let k' = k + jmp (s `BS.index` k)
in f k' (lenp - 1) k'

main :: IO ()
main = print \$ horspool "abcbabababab" "cbabab"