首页 > ACM题库 > HDU-杭电 > HDU 4029-Distinct Sub-matrix-后缀数组-[解题报告]HOJ
2015
04-15

HDU 4029-Distinct Sub-matrix-后缀数组-[解题报告]HOJ

Distinct Sub-matrix

问题描述 :

In this problem, let us consider an N*M matrix of capital letters. By selecting consecutive columns and rows, we can define the sub-matrix as the elements on chosen columns and rows.
Two sub-matrices should be regarded the same if and only if they have the same dimensions and characters (which, of course, are capital letters) on corresponding position. It is your task to find the number of distinct sub-matrices of a given letter matrix.

输入:

The input contains a lot of test cases. The first line of input contains exactly one integer, indicating the number of test cases.
  For each of the test case, the first line contains two integers N and M, denoting the number of rows and columns of the given matrix. (1 <= N, M <= 128)
  The next N lines contain only capital letters, indicating the given matrix.

输出:

The input contains a lot of test cases. The first line of input contains exactly one integer, indicating the number of test cases.
  For each of the test case, the first line contains two integers N and M, denoting the number of rows and columns of the given matrix. (1 <= N, M <= 128)
  The next N lines contain only capital letters, indicating the given matrix.

样例输入:

2
2 2
AB
BA
3 3
ABA
BAA
AAA

样例输出:

Case #1: 7
Case #2: 22

题意 : 给定一个 n * m 的矩形 ( n <= 128 , m <= 128 ) , 求不同的子矩形个数。

思路 : 因为 n 和 m 都很小 , 所以我们可以枚举子矩阵的高度 ,即将高度为 H 的一段子串用Hash压缩成一个整数 ,然后把起点在同一行的整数组成一个字符串,中间用没有出现过的整数隔开,然后利用后缀数组求解不同的子串个数,这是求出来的就是高度为H的不同子矩阵的个数。

求解的时候,我们要把枚举的Hash值离散化,不然后缀数组里基数排序要T的。

#include <stdio.h>
#include <string.h>
#include <algorithm>
using namespace std;

int n , m ;
char mp[155][155] ;
typedef __int64 LL ;  
typedef unsigned __int64 uLL ;

uLL Hash[155][155] ;
uLL xp[155] ;
#define maxn 40005

int wa[maxn],wb[maxn],wv[maxn],wt[maxn];    

int cmp(int *r,int a,int b,int l)    
{return r[a]==r[b]&&r[a+l]==r[b+l];}    
  
void da(int *r,int *sa,int n,int m){    
    int i,j,p,*x=wa,*y=wb,*t;    
    for(i=0;i<m;i++) wt[i]=0;    
    for(i=0;i<n;i++) wt[x[i]=r[i]]++;    
    for(i=1;i<m;i++) wt[i]+=wt[i-1];    
    for(i=n-1;i>=0;i--) sa[--wt[x[i]]]=i;    
    for(j=1,p=1;p<n;j*=2,m=p){    
        for(p=0,i=n-j;i<n;i++) y[p++]=i;    
        for(i=0;i<n;i++) if(sa[i]>=j) y[p++]=sa[i]-j;    
        for(i=0;i<n;i++) wv[i]=x[y[i]];    
        for(i=0;i<m;i++) wt[i]=0;    
        for(i=0;i<n;i++) wt[wv[i]]++;    
        for(i=1;i<m;i++) wt[i]+=wt[i-1];    
        for(i=n-1;i>=0;i--) sa[--wt[wv[i]]]=y[i];    
        for(t=x,x=y,y=t,p=1,x[sa[0]]=0,i=1;i<n;i++)    
            x[sa[i]]=cmp(y,sa[i-1],sa[i],j)?p-1:p++;    
    }    
}    
  
int Rank[maxn],height[maxn];    
  
void calheight(int *r,int *sa,int n){    
    int i , j , k = 0 ;    
    for( i=1 ; i<=n ; i++ ) Rank[sa[i]]=i;    
    for(i=0;i<n;i++) {    
        if(k)k--;    
        int j = sa[Rank[i]-1];    
        while(r[i+k]==r[j+k]) k++ ;    
        height[Rank[i]] = k ;    
    }       
    return;    
}    

uLL x[maxn] ;

int lisanhua( int n , uLL * x ) {
	int Index = 1 ;
	for( int i = 1 ; i < n ; i ++ ) {
		if( x[i] != x[i-1] ) 
			x[Index++] = x[i] ;
	}
	return Index - 1 ;
}

int r[maxn] , sa[maxn] ;
int len[maxn] ;

int main(){
	int cas ;
	int casn = 1 ;
	scanf( "%d" , &cas ) ;
	while( cas -- ) {
		scanf( "%d%d" , &n , &m ) ;
		for( int i = 1 ; i <= n ; i ++ )
			scanf( "%s" , mp[i] + 1 ) ;
		memset( Hash , 0 , sizeof(Hash) ) ;
		xp[0] = 1 ;
		for( int i = 1 ; i <= n ; i ++ ) xp[i] = xp[i-1] * 133 ;
		for( int i = 1 ; i <= n ; i ++ ) {
			for( int j = 1 ; j <= m ; j ++ ) {
				Hash[i][j] = Hash[i-1][j] * 133 + mp[i][j] - 'A' ;
			}
		}
		__int64 ans = 0 ;
		// 枚举高度
		for( int h = 1 ; h <= n ; h ++ ) {
			// 离散化用 , 不然基数排序太苦逼
			int Index = 0 ;
			for( int i = 1 ; i + h - 1 <= n ; i ++ ) {
				for( int j = 1 ; j <= m ; j ++ ) {
					x[Index++] = Hash[i+h-1][j] - Hash[i-1][j] * xp[h] ; 
				}
			}
			sort( x , x + Index ) ;
			Index = lisanhua( Index , x ) ;
			int k = 0 ; 
			memset( len , 0 , sizeof(len) ) ;
			for( int i = 1 ; i + h - 1 <= n; i ++ ) {
				for( int j = 1 ; j <= m; j ++ ) {
					uLL key = Hash[i+h-1][j] - Hash[i-1][j] * xp[h] ;
					int Id = lower_bound( x , x + Index + 1 , key ) - x ;
					len[k] = m - j + 1 ;
					r[k++] = Id + 1 ;
				}
				r[k++] = Index + 5 + i ;
			}
			r[k] = 0 ;
			da( r , sa , k + 1 , Index + n + 50 ) ;
			calheight( r , sa , k ) ;
			ans += len[sa[1]] ;
			for( int i = 2 ; i <= k ; i ++ ) {
				ans += len[sa[i]] - height[i] ;
			}
		}
		printf( "Case #%d: %I64d\n" , casn ++ , ans ) ;
	}
	return 0 ;
}

版权声明:本文为博主原创文章,未经博主允许不得转载。

参考:http://blog.csdn.net/lishaozhe1024/article/details/37541949


  1. 这道题这里的解法最坏情况似乎应该是指数的。回溯的时候
    O(n) = O(n-1) + O(n-2) + ….
    O(n-1) = O(n-2) + O(n-3)+ …
    O(n) – O(n-1) = O(n-1)
    O(n) = 2O(n-1)