UVa 526

From Algorithmist
Jump to navigation Jump to search

526 - String Distance and Transform Process[edit]

Summary[edit]

Given three operations (insert, delete, replace) and two strings, calculate the lowest cost of transforming the first string into the second one. Generate the list of operations for such a transformation as well.

Explanation[edit]

A typical edit distance problem, with all three of the possible operations having the same cost.

Gotchas[edit]

  • Be careful with empty lines.
  • Be careful with the position of operations (i.e. the i-th character). It might have changed due to deletion and insertion.

Notes[edit]

  • The list of operations need not to match with the given sample output exactly, but the cost must be minimum and the operation should make sense.

Implementations[edit]

You can put the list of operation into the DP as well. This is easier to program, since no trace-back is not needed. However, it would be slow.

Optimizations[edit]

Do not put the list of operation into DP. Do a separate trace-back to get the sequence of operations. (See the solution below)

Solution[edit]

#include <cstdio>
#include <cstring>

using namespace std;

#define MAXLEN 100 

int m[MAXLEN+1][MAXLEN+1]; // DP sub-problem
char p[MAXLEN+1][MAXLEN+1]; // decision matrix

// s[1...], t[1...] 
int string_compare(char *s, char *t){ 
	int i, j, k; 
	int opt[3]; // cost of the tree option
	int s_len, t_len; 
	s_len = strlen(s); 
	t_len = strlen(t); // length boundary
	m[0][0] = 0; 
	p[0][0] = -1; 
	for(i=1; i<MAXLEN; ++i){ 
		m[0][i] = i; // source is empty, insert i times
		p[0][i] = 1; // mark insert operation
		m[i][0] = i; // target is empty, delete i times
		p[i][0] = 2; // mark delete operation
	}

	for(i=1; i<s_len; i++) {
		for(j=1; j<t_len; j++){ 
			opt[0] = m[i-1][j-1] + (s[i]==t[j] ? 0: 1); // match or substitute 
			opt[1] = m[i][j-1] + 1; // insert 
			opt[2] = m[i-1][j] + 1; // delete

			//m[i][j] = min(opt[0..2]) 
			m[i][j] = opt[0]; 
			p[i][j] = 0; 
			for(k=0; k<3; ++k){
				if(opt[k]<m[i][j]){ 
					m[i][j] = opt[k];
					p[i][j] = k; 
				} 
			}
		} 
	}
	return m[s_len-1][t_len-1]; 
}

// build the sequence of operations
void traceback(char *s, char *t){ 
	char st[MAXLEN + MAXLEN +2]; // sequence 
	int len; // length of the sequence
	int i, j, k; 
	
	i = strlen(s) - 1; 
	j = strlen(t) - 1; 
	k = 0; 
	
	while(true){ 
		if(p[i][j] == -1) 
			break; 
		else if(p[i][j] == 0){
			if(s[i] == t[j]) 
				st[k] = 'M'; 
			else 
				st[k] = 'S'; --i, --j; 
		} 
		else if(p[i][j] == 1){ 
			st[k] = 'I'; --j; 
		} 
		else if(p[i][j] == 2){ 
			st[k] = 'D'; --i; 
		} k++; 
	} 
	
	len = k; 
	i = j = 0; 
	int u, v; 
	u = 1; //the u-th operation
	v = 1; //the v-th operating position
	
	for(k=len-1; k>=0; --k){ 
		if(st[k] == 'M'){ 
			++i;
			++j; 
			++v; 
		} 
		else if(st[k] == 'S'){
			++i; 
			++j; 
			printf("%d Replace %d,%c\n", u, v, t[j]); 
			++v; 
			++u; 
		} 
		else if(st[k] == 'I'){
			++j; 
			printf("%d Insert %d,%c\n", u, v, t[j]); 
			++v; 
			++u; 
		} 
		else if(st[k] == 'D'){
			++i; 
			printf("%d Delete %d\n", u, v); 
			++u; 
		} 
	}

}

int main(){ 
// freopen("in", "r", stdin);
	int ld; 
	char s[MAXLEN+1], t[MAXLEN+1]; 
	s[0] = t[0] = ' '; 
	bool first = true; //output blank line control
	while(gets(&s[1])){
		gets(&t[1]); 
		if(!first) 
			printf("\n"); 
		else 
			first = false; 
		ld = string_compare(s, t);
		printf("%d\n", ld); 
		traceback(s, t); 
	} 
	return 0; 
}

Input[edit]

abcac
bcd
aaa
aabaaaa

boy
apple

yoyo
oyoy
hello_world
hello_old

Output[edit]

3
1 Delete 1
2 Delete 3
3 Replace 3,d

4
1 Insert 1,a
2 Insert 2,a
3 Insert 3,b
4 Insert 4,a

3
1 Insert 1,b
2 Insert 2,o
3 Insert 3,y

5
1 Delete 1
2 Delete 1
3 Delete 1
4 Delete 1
5 Delete 1

2
1 Delete 1
2 Insert 4,y

2
1 Delete 7
2 Delete 8

See Also[edit]