Conflation

The document describes a conflation algorithm to process text documents. The algorithm performs the following steps: 1. Displays the original file contents. 2. Removes punctuation marks from the text. 3. Removes high frequency words from the text. 4. Performs suffix stripping to remove common suffixes from words. 5. Detects equivalent stems remaining after suffix stripping. 6. Generates a representation of the original text after processing. The algorithm opens multiple files to read input text and write output at each processing step. It provides menus to demonstrate each step and writes the results to files.

Uploaded by

Pratik B

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

245 views6 pages

Conflation

Uploaded by

Pratik B

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 6

Aim:-Conflation Algorithm

#include<stdio.h>
#include<conio.h>
#include<stdlib.h>
#include<string.h>
void orig_file();
void punct_remove();
void freq_words_remove();
void suffix_strip();
void equi_stem();
void stem();
void main()
{
int ans,ch;
FILE *fp,*fp1,*fp2,*fp3,*fp4,*fp5,*fp6,*fp7;
clrscr();
fp=fopen("Filenew.txt","r+");
fp1=fopen("Wo_punct.txt","w+");
fp2=fopen("Stops.txt","r+");
fp3=fopen("Wo_freq.txt","w+");
fp4=fopen("Suffix_list.txt","r+");
fp5=fopen("Wo_suffix.txt","w+");
fp6=fopen("Doc_copy.txt","w+");
fp7=fopen("Doc_rep.txt","w+");
if(fp==NULL || fp1==NULL || fp2==NULL || fp3==NULL || fp4==NULL || fp5==NULL
|| fp6==NULL || fp7==NULL)
{
printf("\nError in opening file!!!\n");
getch();
exit(0);
}
printf("\t\t --------CONFLATION ALGORTIHM SIMULATION--------\n");
do
{
printf("\nMENU:\n=====\n");
printf("\n1.Display the Original File contents.");
printf("\n2.Remove Punctuation Marks.");
printf("\n3.Remove the High Frequency Words.");
printf("\n4.Perform Suffix Stripping.");
printf("\n5.Detect Equivalent Stems.");
printf("\n6.Document Representation of the original file.");
printf("\n7.Exit.");
printf("\nEnter your Choice: ");
scanf("%d",&ch);
switch(ch)
{
case 1: printf("\n\t\t\t\tORIGINAL DOCUMENT\n");
printf("\t\t\t\t-----------------\n");
orig_file(fp);
break;
case 2: printf("\n\t\t\tAfter removal of Punctuation Marks\n");
printf("\t\t\t----------------------------------\n");
punct_remove(fp,fp1);
break;
case 3: printf("\n\t\t\tAfter Removal of High Frequency
Words\n");

printf("\t\t\t-------------------------------------\n");
freq_words_remove(fp1,fp2,fp3);
break;
case 4: printf("\n\t\t\tAfter Suffix Stripping\n");
printf("\t\t\t----------------------\n");
suffix_strip(fp3,fp4,fp5);
break;
case 5: printf("\n\t\t\tAfter Detecting Equivalent Stems\n");
printf("\t\t\t--------------------------------\n");
equi_stem(fp5,fp6,fp7);
stem(fp6,fp7);
break;
case 6: printf("\n\t\t\t DOCUMENT REPRESENTATION\n");
printf("\t\t\t -----------------------\n");
orig_file(fp7);
break;
case 7: exit(1);
default: printf("\nINVALID INPUT!!!");
break;
}
printf("\n\nDo You Want To Continue?\n1.YES\t\t0.NO\n");
scanf("%d",&ans);
}while(ans==1);
fclose(fp);
fclose(fp1);
fclose(fp2);
fclose(fp3);
fclose(fp4);
fclose(fp5);
fclose(fp6);
fclose(fp7);
getch();
}
void orig_file(FILE *fp)
{
char al;
rewind(fp);
al=fgetc(fp);
while(al != EOF)
{
printf("%c",al);
al=fgetc(fp);
}
}
void punct_remove(FILE *fp,FILE *fp1)
{
char punct;
rewind(fp);
rewind(fp1);
while(!feof(fp))
{
punct = getc(fp);
if(punct!='.'&&punct!=','&&punct!='!'&&punct!='?'&&punct!=':'&&punct!
='\''&&punct!='"'&&punct!=';'&&punct!='-'&&punct!='('&&punct!=')'&&punct!
='['&&punct!=']')
{
fputc(punct,fp1);
}
else
{
fputc(' ',fp1);
} }
orig_file(fp1);
}
void freq_words_remove(FILE *fp1,FILE *fp2,FILE *fp3)
{
char dword[15], stopword[15];
int comp,i,flag=0;
fp1=fopen("Wo_punct.txt","r");
rewind(fp1);
rewind(fp2);
rewind(fp3);
fscanf(fp1,"%s",dword);
while(!feof(fp1))
{
rewind(fp2);
fscanf(fp2,"%s",stopword);
for(i=0;i<635;i++)
{
flag=0;
comp = stricmp(dword,stopword);
if(comp==0)
{
flag=1;
break;
}
fscanf(fp2,"%s",stopword);
}
if(flag==0)
{
fprintf(fp3,"%s",dword);
fputc(' ',fp3);
}
fscanf(fp1,"%s",dword);
}
orig_file(fp3);
}
void suffix_strip(FILE *fp3,FILE *fp4,FILE *fp5)
{
char c,sword[15]="\0",suffix[8]="\0",*comp=NULL;
char *revpos=NULL,newword[15]="\0";
int i,j,length,suf_length,sub_length=0;
fp3=fopen("Wo_freq.txt","r");
rewind(fp3);
rewind(fp4);
rewind(fp5);
fscanf(fp3,"%s",sword);
while(!feof(fp3))
{
rewind(fp4);
fscanf(fp4,"%s",suffix);
for(i=0;i<8;i++)
{
comp = strstr(sword,suffix);
if(comp!=NULL)
{
length = strlen(sword);
suf_length = strlen(suffix);
sub_length = strlen(comp);
c=suffix[0];
if(i==0 || i==1 || i==2 || i==3 || i==4 || i==5)
{
printf("Suffix '%s' is a substring of
'%s'\n\n",comp,sword);
if(length>=4)
{
revpos = strrchr(sword,c);
if(strlen(revpos)==suf_length)
{
j=0;
while(sword[j]!= *revpos)
{
newword[j] = sword[j];
j++;
}
newword[j]='\0';
fprintf(fp5,"%s",newword);
fputc(' ',fp5);
}
}
}
else if(i==6 && sub_length==1)
{
printf("Suffix '%s' is a substring of
'%s'\n\n",comp,sword);
if(length>=4)
{
revpos = strrchr(sword,c);
if(strlen(revpos)==suf_length)
{
j=0;
while(sword[j]!= c)
{
newword[j] = sword[j];
j++;
}
newword[j]='\0';
fprintf(fp5,"%s",newword);
fputc(' ',fp5);
}
}
}
else if(i==7)
{
printf("Suffix '%s' is a substring of
'%s'\n\n",comp,sword);
if(length>=4)
{
revpos = strrchr(sword,c);
if((strlen(revpos)==suf_length+1) ||
(strlen(revpos)==suf_length))
{
j=0;
while(sword[j] != c)
{
newword[j] = sword[j];
j++;
}
if(strlen(revpos)==suf_length+1)
{
newword[j]='l';
newword[++j]='\0';
}
if(strlen(revpos)==suf_length)
{
newword[j]='\0';
}
fprintf(fp5,"%s",newword);
fputc(' ',fp5);
}
}
}
else
{
fprintf(fp5,"%s",sword);
fputc(' ',fp5);
}
break;
}
fscanf(fp4,"%s",suffix);
}
if(comp==NULL)
{
fprintf(fp5,"%s",sword);
fputc(' ',fp5);
}
fscanf(fp3,"%s",sword);
}
orig_file(fp5);
}
void equi_stem(FILE *fp5,FILE *fp6)
{
char word1[15]="\0",nextword[15]="\0";
int comp,flag=0;
rewind(fp5);
rewind(fp6);
fscanf(fp5,"%s",word1);
fprintf(fp6,"%s",word1);
fputc(' ',fp6);
while(!feof(fp5))
{
flag=0;
fscanf(fp5,"%s",word1);
rewind(fp6);
fscanf(fp6,"%s",nextword);
while(!feof(fp6))
{
comp = stricmp(word1,nextword);
if(comp==0)
{
flag=0;
break;
}
if(comp!=0)
{
flag=1;
}
fscanf(fp6,"%s",nextword);
}
if(flag==1)
{
fprintf(fp6,"%s",word1);
fputc(' ',fp6);
}
}
printf("\n\t\t\tAfter Removing Repeated Words\n");
printf("\t\t\t-----------------------------\n");
orig_file(fp6); }
void stem(FILE *fp6,FILE *fp7)
{
char word1[15]="\0",rep_word[15]="happy";
printf("\n\n\t\t\tAfter Removing Equivalent Stems\n");
printf("\t\t\t-------------------------------\n");
rewind(fp6);
fscanf(fp6,"%s",word1);
while(!feof(fp6))
{
if(strstr(word1,"happiness"))
{
fprintf(fp7,"%s",rep_word);
fputc(' ',fp7);
}
else
{
fprintf(fp7,"%s",word1);
fputc(' ',fp7);
}
fscanf(fp6,"%s",word1);
}
orig_file(fp7);
}

Pass 1 Assembler
No ratings yet
Pass 1 Assembler
4 pages
VoThanhDat22280010 BTVN
No ratings yet
VoThanhDat22280010 BTVN
36 pages
ISR Code
No ratings yet
ISR Code
27 pages
004
No ratings yet
004
119 pages
Print
No ratings yet
Print
38 pages
SP Lab Ex - 1 To 15 (03.04.2024)
No ratings yet
SP Lab Ex - 1 To 15 (03.04.2024)
67 pages
String Get - String
No ratings yet
String Get - String
17 pages
CPU Part 2
No ratings yet
CPU Part 2
143 pages
C Programming Loop
No ratings yet
C Programming Loop
14 pages
Bloque 3 (Reducido A Temario de Examen)
No ratings yet
Bloque 3 (Reducido A Temario de Examen)
11 pages
Modified COMPD FIN
No ratings yet
Modified COMPD FIN
53 pages
CDSS Lab Programs 1-11
No ratings yet
CDSS Lab Programs 1-11
27 pages
Compd Fin
No ratings yet
Compd Fin
45 pages
Assignment: Ahsanullah University of Science and Technology (AUST)
No ratings yet
Assignment: Ahsanullah University of Science and Technology (AUST)
8 pages
String Problems
No ratings yet
String Problems
13 pages
Lab 2
No ratings yet
Lab 2
7 pages
20BBS0163 Os Theory Da
No ratings yet
20BBS0163 Os Theory Da
6 pages
Strings
No ratings yet
Strings
7 pages
Pass 1 & 2
No ratings yet
Pass 1 & 2
8 pages
C Prog Week 10
No ratings yet
C Prog Week 10
6 pages
Workshop 08
No ratings yet
Workshop 08
10 pages
Raport: Ministerul Educaţiei Al Republicii Moldova Universitatea Tehnică A Moldovei
No ratings yet
Raport: Ministerul Educaţiei Al Republicii Moldova Universitatea Tehnică A Moldovei
10 pages
Day11 Assignment
No ratings yet
Day11 Assignment
7 pages
Wa0005
No ratings yet
Wa0005
11 pages
PSIPL7
No ratings yet
PSIPL7
12 pages
2 Pass Macro Processor
No ratings yet
2 Pass Macro Processor
4 pages
Compiler 5-10
No ratings yet
Compiler 5-10
11 pages
Exp 1 - Updated
No ratings yet
Exp 1 - Updated
4 pages
Ex. No: 7 Generating A Transcript With CGPA and GRADE: Program
No ratings yet
Ex. No: 7 Generating A Transcript With CGPA and GRADE: Program
7 pages
Compiler 12,13,14
No ratings yet
Compiler 12,13,14
10 pages
Telecom Interview Questions Answers Guide PDF
No ratings yet
Telecom Interview Questions Answers Guide PDF
10 pages
Singlepass
No ratings yet
Singlepass
3 pages
MP Record Print....
No ratings yet
MP Record Print....
11 pages
Message
No ratings yet
Message
4 pages
CS6612 - Rejinpaul
No ratings yet
CS6612 - Rejinpaul
64 pages
#Include Int Main : //C Code To Count Number of Words Spaces and White Characters
No ratings yet
#Include Int Main : //C Code To Count Number of Words Spaces and White Characters
5 pages
Praticals of Compiler Design
No ratings yet
Praticals of Compiler Design
13 pages
Conflation
No ratings yet
Conflation
4 pages
File Program
No ratings yet
File Program
2 pages
CN Record
No ratings yet
CN Record
64 pages
Dsa Lab 10,11,12,13
No ratings yet
Dsa Lab 10,11,12,13
20 pages
Dictionar C++
No ratings yet
Dictionar C++
4 pages
22BCE1892 Final
No ratings yet
22BCE1892 Final
13 pages
Replace A Specific Word With The Given Word in The File
No ratings yet
Replace A Specific Word With The Given Word in The File
4 pages
Match
No ratings yet
Match
2 pages
Compiler Design Lab Assignment 1 - 21BDS0214
No ratings yet
Compiler Design Lab Assignment 1 - 21BDS0214
17 pages
Practical 7: AIM: Write A Program To Generate Quadruple Table Form Given String
No ratings yet
Practical 7: AIM: Write A Program To Generate Quadruple Table Form Given String
11 pages
Pass 2
No ratings yet
Pass 2
1 page
Assignment 1:: Write A Program To Implement Simple Code Optimization Technique
No ratings yet
Assignment 1:: Write A Program To Implement Simple Code Optimization Technique
3 pages
CPDS Lab Files
No ratings yet
CPDS Lab Files
3 pages
Spell Checker
No ratings yet
Spell Checker
5 pages
Sodapdf
No ratings yet
Sodapdf
23 pages
RBNMM
No ratings yet
RBNMM
2 pages
17
No ratings yet
17
35 pages
Programs
No ratings yet
Programs
7 pages
Код
No ratings yet
Код
2 pages
Design of A Two Pass Assembler - Pass I: Program
No ratings yet
Design of A Two Pass Assembler - Pass I: Program
6 pages
CCDE Written Learning Matrix
0% (1)
CCDE Written Learning Matrix
36 pages
First Pass of Two Pass Assembler
No ratings yet
First Pass of Two Pass Assembler
4 pages
One Pass
No ratings yet
One Pass
2 pages
Huawei Products
No ratings yet
Huawei Products
251 pages
Kpit Code
No ratings yet
Kpit Code
14 pages
How To Configure A GPON ONT (Distributed Mode)
No ratings yet
How To Configure A GPON ONT (Distributed Mode)
9 pages
5.web Crawler Writeup
No ratings yet
5.web Crawler Writeup
7 pages
Assignment No: 2: Aim: Objective
No ratings yet
Assignment No: 2: Aim: Objective
4 pages
استمارة معلومات مذكرة التخرج
No ratings yet
استمارة معلومات مذكرة التخرج
27 pages
ETG100 User Manual en
No ratings yet
ETG100 User Manual en
18 pages
CJ - FDD - Handover Success Rate (Intra Frequency)
No ratings yet
CJ - FDD - Handover Success Rate (Intra Frequency)
22 pages
S4120 Series Switch Installation Manual
No ratings yet
S4120 Series Switch Installation Manual
135 pages
Real Life Applications of OSI Models
No ratings yet
Real Life Applications of OSI Models
12 pages
OSIReference Model 3
No ratings yet
OSIReference Model 3
25 pages
X435 DS X435 1 en-US Extreme Datasheet
No ratings yet
X435 DS X435 1 en-US Extreme Datasheet
9 pages
3.5.5 Packet Tracer - Investigate The TCP-IP and OSI Models in Action
No ratings yet
3.5.5 Packet Tracer - Investigate The TCP-IP and OSI Models in Action
2 pages
Cloud Networking Scaling Out Data Center Networks
No ratings yet
Cloud Networking Scaling Out Data Center Networks
22 pages
BCS 041
No ratings yet
BCS 041
4 pages
Wodesys WD-4503AC
No ratings yet
Wodesys WD-4503AC
3 pages
CCNA Security v2.0 Practice Skills Assesement Part 1 - Packet Tracer - Implementing Network Security
No ratings yet
CCNA Security v2.0 Practice Skills Assesement Part 1 - Packet Tracer - Implementing Network Security
16 pages
3.1.4 Packet Tracer - Who Hears The Broadcast PDF
0% (1)
3.1.4 Packet Tracer - Who Hears The Broadcast PDF
2 pages
FD EPON OLT 4 PORT FD4004 - Datasheet
No ratings yet
FD EPON OLT 4 PORT FD4004 - Datasheet
4 pages
MikroTik Routers and Wireless
No ratings yet
MikroTik Routers and Wireless
7 pages
USR TCP232 304 Datasheet
No ratings yet
USR TCP232 304 Datasheet
1 page
CCNA 3 Chapter 2 v5.0 Exam Answers 2015 100
No ratings yet
CCNA 3 Chapter 2 v5.0 Exam Answers 2015 100
6 pages
Assignment No.: 5: Aim: Theory
No ratings yet
Assignment No.: 5: Aim: Theory
3 pages
Practice Quiz3 Soln
No ratings yet
Practice Quiz3 Soln
2 pages
ePMP™ Force 400 Series: Quick Look
No ratings yet
ePMP™ Force 400 Series: Quick Look
7 pages
5G Release 17 Network Architecture
No ratings yet
5G Release 17 Network Architecture
2 pages
Question Bank For Univ Exam
No ratings yet
Question Bank For Univ Exam
3 pages
Assignment No: 3: Aim: Objective: Theory:-Inverted Index
No ratings yet
Assignment No: 3: Aim: Objective: Theory:-Inverted Index
2 pages
Trinux Packages
No ratings yet
Trinux Packages
5 pages
Opnet Atm
No ratings yet
Opnet Atm
4 pages
Protocol Description Ipx / SPX (Nwlink)
No ratings yet
Protocol Description Ipx / SPX (Nwlink)
2 pages
3.1.2.7 Packet Tracer - Investigating A VLAN Implementation PDF
No ratings yet
3.1.2.7 Packet Tracer - Investigating A VLAN Implementation PDF
4 pages
BGP TTL Security Hack PDF
No ratings yet
BGP TTL Security Hack PDF
3 pages
Computer Engineering Laboratory Solution Primer
From Everand
Computer Engineering Laboratory Solution Primer
Karan Bhandari
No ratings yet

Conflation

Uploaded by

Conflation

Uploaded by

Aim:-Conflation Algorithm

You might also like