字符串匹配的RK算法+随机

在使用RK算法进行字符串匹配时，q必须是一个充分大的素数。为了减少出现假匹配的概率，先计算小于12mn2的素数集合，再从集合中随机取出一个素数，可使假匹配的概率小于1/n。此外，这个算法总能得到正确的答案。

#include<iostream>
#include<map>
#include<set>
#include<time.h>
using namespace std;
#define N 100 // 待匹配串的最多长度
#define M 20 // 模式串的最多长度
#define MULTIPLIER 0x015A4E35L
#define INCREMENT 1
static unsigned long seed;
/* 生成随机数种子 */
void random_seed(unsigned long d){
if(d==0){
seed = time(0);
}else{
seed = d;
}
}
/* 生成一个low~high范围内的随机数 */
unsigned int random(unsigned long low, unsigned long high){
seed = MULTIPLIER * seed + INCREMENT;
return ((seed>>16)%(high-low)+low);
}
long BASE; // 进制数
set<char> v; // 存放待匹配串和模式串中出现的字符的集合
map<char, int>data; // 存放待匹配串和模式串中出现的字符的哈希表
/* 将串中出现的字符放入集合 */
void inSet(char s[]){
int n = strlen(s);
for(int i = 0; i<n; i++){
v.insert(s[i]);
}
BASE = v.size();
}
/* 计算出现的字符的哈希表 */
void countMap(){
data.clear();
int i = 0;
for(set<char>::iterator iter = v.begin(); iter!=v.end(); iter++){
data[*iter] = i++;
}
}
/* 出现的字符的哈希函数 */
int ch(char s){
return data[s];
}
/*
* 字符串匹配
* 输入：存放待匹配串的数组S[]，待匹配串的长度n，
* 存放模式串的数组P[]，模式串的长度m，素数p
* 输出：与P相匹配的子串在待匹配串中的起始位置loc
*/
void match(char S[], long n, char P[], long m, long &loc, long q){
long b = BASE;
long i, k;
long w = 0, p = 0;
long x = 1;
// 计算b^(m-1) % q
for(i=0; i<m-1; i++){
x = (x*b)%q;
}
// 计算第一个窗口子串的哈希值
for(i=0; i<m; i++){
w = (w*b + ch(S[i]))%q;
}
// 计算模式串的哈希值
for(i=0; i<m; i++){
p = (p*b + ch(P[i]))%q;
}
i = 0;
while((i<=n-m) && (loc == -1)){
if(w==p) { // 如果与模式串相等则仔细检查是否真的相等
for( k=0; k<m; k++){
if(S[i+k]!=P[k])break;
}
if(k>=m) loc = i;
}
// 计算下一个窗口子串的哈希值
w = w - ch(S[i])*x%q;
if(w<0) w += q;
w = (w*b + ch(S[i+m]))%q;
i++;
}
}
/*
* 字符串匹配的随机算法
* 输入：存放待匹配串的数组S[]，待匹配串的长度n，
* 存放模式串的数组P[]，模式串的长度m，
* 小于12*m*n*n的素数集合R[]，素数集合的元素个数a
* 输出：与P相匹配的子串在待匹配串中的起始位置loc
*/
void match_random(char S[], long n, char P[], long m, long &loc, long R[], int a){
long q;
random_seed(0);
q = random(a/2,a); // 从a/2~a范围中取一个素数
q = R[q];
match(S, n, P, m, loc, q);
}
/*
* 找素数算法
* 输入：所找素数的上界size
* 输出：所找素数集合的大小a
*/
long* findPrime(int size, int &a){
long* r = new long[size];
bool* prime = new bool[size];
int i,j;
prime[0] = false;
for(i=1; i<size; i++){
prime[i] = true;
}
i = 1;
while(i<size){
if(prime[i]){
j = (i+1)*2;
while(j<size){
prime[j-1] = false;
j += i+1;
}
}
i++;
}
a = 0;
for(i=1; i<size; i++){
if(prime[i-1]){
r[a++] = i;
}
}
delete prime;
return r;
}
int main(){
char str[N], substr[M];
while(cin>>str){
if(!strcmp(str,"exit"))break; // 待匹配串为exit时退出
cin>>substr;
long loc = -1;
int n = strlen(str);
int m = strlen(substr);
int a;
long* R = findPrime(12*m*n*n, a); // 计算小于12*m*n*n的素数集合
/* 计算待匹配串和模式串中出现的元素集合 */
v.clear();
inSet(str);
inSet(substr);
countMap();
// 随机匹配
match_random(str, n, substr, m, loc, R, a);
if(loc==-1){ // 如果未匹配则尝试用最接近12*m*n*n的素数进行匹配
cout<<"No Found!"<<endl;
match(str, n, substr, m, loc, R[a-1]);
cout<<"Use Prime: "<<R[a-1]<<endl;
if(loc==-1){
cout<<"No Found!"<<endl;
}else{
cout<<"Location In "<<loc+1<<endl;
}
}else{
cout<<"Location In "<<loc+1<<endl;
}
delete R;
system("pause");
cout<<"================================"<<endl;
}
return 0;
}

才知道%对负数不起作用……还需要自己手工判断……

随机的快速排序算法

仅仅在原来的快速排序基础上增加了取随机数为划分点，改进了原有快排使用第一个元素作为划分元素的缺点，即减少了遇到最坏情况的可能。

#include<iostream>
#include<time.h>
using namespace std;
#define N 20 // 最大数组个数
#define MULTIPLIER 0x015A4E35L
#define INCREMENT 1
static unsigned long seed;
/* 生成随机数种子 */
void random_seed(unsigned long d){
if(d==0){
seed = time(0);
}else{
seed = d;
}
}
/* 生成一个low~high范围内的随机数 */
unsigned int random(unsigned long low, unsigned long high){
seed = MULTIPLIER * seed + INCREMENT;
return ((seed>>16)%(high-low)+low);
}
/*
* 按枢点元素划分序列
* 输入：数组A[]，序列的起始位置low，终止位置high
* 输出：按枢点元素划分的序列A[]，枢点元素位置i
*/
template<class Type>
int split(Type A[], int low, int high){
int k, i = low;
Type x= A[low];
for(k = low+1; k<=high; k++) {
if(A[k]<=x){
i ++;
if(i!=k){
swap(A[i], A[k]);
}
}
}
swap(A[low], A[i]);
return i;
}
/*
* 初始化随机数并执行快排
* 输入：数组A[]，序列的起始位置low，终止位置high
* 输出：排序好的序列A[]
*/
template<class Type>
void quicksort_random(Type A[], int low, int high){
random_seed(0);
r_quicksort(A, low, high);
}
/*
* 随机选择枢点的快速排序
* 输入：数组A[]，序列的起始位置low，终止位置high
* 输出：排序好的序列A[]
*/
template<class Type>
void r_quicksort(Type A[], int low, int high){
int k;
if( low < high ){
k = random(low, high);
swap(A[low], A[k]);
k = split(A, low, high);
r_quicksort(A, low, k-1);
r_quicksort(A, k+1, high);
}
}
int main(){
int A[N];
int i, n;
cin>>n;
for(i=0; i<n; i++)cin>>A[i];
quicksort_random(A,0,n-1);
for(i=0; i<n-1; i++)cout<<A[i]<<" ";
cout<<A[n-1]<<endl;
return 0;
}

Liny_@NotePad

沉迷ACG中

字符串匹配的RK算法+随机

随机的快速排序算法

搜索

YOYO

分类

留言

评论

同学们

计数器

统计下