0% found this document useful (0 votes)
24 views27 pages

ISR Code

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views27 pages

ISR Code

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 27

Name: Akshata Vishal Dongaonkar

Roll No.:45020
Div: BE-IT-A
Subject: ISR

1. Conflation Algorithm

import java.io.BufferedReader; import

java.io.FileNotFoundException; import

java.io.FileReader; import

java.io.IOException; import

java.io.InputStreamReader;

import java.util.ArrayList;

import java.util.HashMap;

import java.util.Iterator; import

java.util.Map;

public class Conflation { public static ArrayList<String> stopList = new

ArrayList<String>(); public static ArrayList<String> removestopList = new

ArrayList<String>(); public static String suffixes[] = { "able", "ing", "ion",

"y", "ment" };

public static String stopwords[] = { "i", "big", "am", "m", "a", "we",

"are", "it", "of", "this", "and", "is", "to", "at", "in", "was",

"with", "doing", "It", "not", "our" };

public static void main(String[] args) {

InputStreamReader st = new InputStreamReader(System.in);

BufferedReader buff = new BufferedReader(st);

String fname = "";

System.out.println("Enter a filename:");

try {
fname = buff.readLine();

} catch (IOException e) {

e.printStackTrace();

conflation(fname);

public static void conflation(String fname) {

BufferedReader buff;

int i = 0, j = 0;

try {

buff = new BufferedReader(new FileReader(fname));

int flag = 0;

String line = "";

line = buff.readLine();

String[] buffer = line.split(" ");

for (i = 0; i < buffer.length; i++) {

flag = 0;

if (buffer[i].endsWith("."))

buffer[i] = buffer[i].replace(".", "");

for (j = 0; j < stopwords.length; j++) {

if (buffer[i].equals(stopwords[j])) {

stopList.add(buffer[i]);

flag = 1;

break;

if (flag != 1 && !buffer[i].equals(null)) {

removestopList.add(buffer[i]);
}

System.out.println("\n--------------After Removing Stop Words-----------------");

for (int k = 0; k < removestopList.size(); k++) {

System.out.println(removestopList.get(k));

suffixesString(removestopList);

countFrequency(removestopList);

} catch (FileNotFoundException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

private static void countFrequency(ArrayList<String> removestopList2) {

// Mapping of String->Integer (word -> frequency)

System.out.println("\n\n-------After Counting Frequency-----------"); final

Map<String, Integer> frequencyMap = new HashMap<String, Integer>(); for

(int k = 0; k < removestopList.size(); k++) {

String currentWord = removestopList.get(k);

Integer frequency = frequencyMap.get(currentWord);

// Add the word if it doesn't already exist, otherwise increment the

// frequency counter.

if (frequency == null) {

frequency = 0;

frequencyMap.put(currentWord, frequency + 1);


}

Iterator entries = frequencyMap.entrySet().iterator();

while (entries.hasNext()) {

Map.Entry entry = (Map.Entry) entries.next();

String key = (String) entry.getKey();

Integer value = (Integer) entry.getValue();

System.out.println(key + " = " + value);

private static void suffixesString(ArrayList<String> removestopList) {

System.out.println("\n\n--------After Removing Suffixes------------"); for

(int k = 0; k < removestopList.size(); k++) {

String suffixString = removestopList.get(k);

int flag = 0;

for (int m = 0; m < suffixes.length; m++) {

if (suffixString.endsWith(suffixes[m])) {

int len = suffixString.length();

int len1 = suffixes[m].length();

int len2 = len - len1;

String sufString = suffixString.substring(0, len2);

System.out.print(suffixString + "\t\t");

System.out.println(sufString);

flag = 1;

break;

if (flag != 1)

System.out.println(suffixString + "\t\t" + suffixString);

}
}

Output:
2. Single Pass Algorithm
import java.io.BufferedReader;

import java.io.IOException; import

java.io.InputStreamReader; import

java.util.ArrayList;

public class Singlepass { public static void main(String[]

args) throws IOException {

BufferedReader stdInpt = new BufferedReader(new InputStreamReader(System.in));

System.out.println("Enter the no of Tokens"); int noOfDocuments =

Integer.parseInt(stdInpt.readLine()); System.out.println("Enter the no of

Documents"); int noOfTokens = Integer.parseInt(stdInpt.readLine());

System.out.println("Enter the threshhold"); float threshhold =

Float.parseFloat(stdInpt.readLine()); System.out.println("Enter the Document

Token Matrix"); int[][] input = new int[noOfDocuments][noOfTokens]; for

(int i = 0; i < noOfDocuments; ++i) { for (int j = 0; j < noOfTokens; ++j) {

System.out.println("Enter(" + i + "," + j + ")");

input[i][j] = Integer.parseInt(stdInpt.readLine());

SinglePassAlgorithm(noOfDocuments, noOfTokens, threshhold, input);

private static void SinglePassAlgorithm(int noOfDocuments, int noOfTokens,float threshhold, int[][]


input) {
int[][] cluster = new int[noOfDocuments][noOfDocuments + 1];

ArrayList<Float[]> clusterRepresentative = new ArrayList<Float[]>();

cluster[0][0] = 1; cluster[0][1] = 0; int noOfClusters = 1;

Float[] temp = new Float[noOfTokens];

temp = convertintArrToFloatArr(input[0]);

clusterRepresentative.add(temp); for (int i

= 1; i < noOfDocuments; ++i) {

float max = -1; int clusterId = -1; for (int j = 0; j < noOfClusters; ++j) { float similarity

= calculateSimilarity( convertintArrToFloatArr(input[i]),clusterRepresentative.get(j));

if (similarity > threshhold) {

if (similarity > max) {

max = similarity;

clusterId = j;

if (max == -1) {

cluster[noOfClusters][0] = 1;

cluster[noOfClusters][1] = i;

noOfClusters++;

clusterRepresentative.add(convertintArrToFloatArr(input[i]));

} else { cluster[clusterId][0] +=

1; int index = cluster[clusterId][0];

cluster[clusterId][index] = i;

clusterRepresentative.set(clusterId,

calculateClusterRepresentative(cluster[clusterId],

input, noOfTokens));

}
for (int i = 0; i < noOfClusters; ++i) {

System.out.print("\n" + i + "\t");

for (int j = 1; j <= cluster[i][0]; ++j) {

System.out.print(" " + cluster[i][j]);

/* This function convert input integer array into float array.*/ private

static Float[] convertintArrToFloatArr(int[] input) {

int size = input.length;

Float[] answer = new Float[size];

for (int i = 0; i < input.length; ++i) {

answer[i] = (float) input[i];

return answer;

/**

* This function calculate the similarity value.

* Formula= answer =answer+ a[i]*b[i]

*/ private static float calculateSimilarity(Float[] a, Float[]

b) {

float answer = 0;

for (int i = 0; i < a.length; ++i) {

answer += a[i] * b[i];

return answer;

/* This function calculates the centroid value.*/ private static

Float[] calculateClusterRepresentative(int[] cluster,


int[][] input, int noOFTokens) {

Float[] answer = new Float[noOFTokens]; for

(int i = 0; i < noOFTokens; ++i) {

answer[i] = Float.parseFloat("0");

for (int i = 1; i <= cluster[0]; ++i) {

for (int j = 0; j < noOFTokens; ++j) {

answer[j] += input[cluster[i]][j];

for (int i = 0; i < noOFTokens; ++i) {

answer[i] /= cluster[0];

return answer;

Output:
3. Inverted File
import java.io.BufferedReader;

import java.io.FileNotFoundException;

import java.io.FileReader; import

java.io.IOException; import

java.io.InputStreamReader; import

java.util.ArrayList; import

java.util.StringTokenizer;

public class InvertedFile { public static void

displayIndex(ArrayList<String> invertedData,

int[][] docno) {

int i, j;

for (i = 0; i < invertedData.size(); i++) {

System.out.print(invertedData.get(i) + "\t"); for (j = 1; j <=

docno[i][0]; j++)

System.out.print(docno[i][j] + "\t");

System.out.print("\n");

public static void indexing(String fname, ArrayList<String> invertedData,int[][] docno, int fileno) {

BufferedReader br;

try {

br = new BufferedReader(new FileReader(fname));

String data = "", line = br.readLine();

while (line != null) {

data += line + " ";

line = br.readLine();

}
String[] st = data.split("[ ,.]");

String currenttoken = null;

int i = 0;

while (i < st.length) {

currenttoken = st[i];

int indx = invertedData.indexOf(currenttoken);

if (indx == -1) {

invertedData.add(currenttoken);

indx = invertedData.indexOf(currenttoken);

docno[indx][0] = 1;

docno[indx][1] = fileno;

} else {

docno[indx][docno[indx][0] + 1] = fileno;

docno[indx][0] += 1;

i += 1;

} catch (Exception e) {

e.printStackTrace();

public static void main(String[] args) throws NumberFormatException,

IOException {

String fname = "";

ArrayList<String> invertedData = new ArrayList<String>();

int docno[][] = new int[100][10];

InputStreamReader ins = new InputStreamReader(System.in);


BufferedReader br = new BufferedReader(ins);

System.out.println("\nENTER TOTAL NO OF FILES:"); int no

= Integer.parseInt(br.readLine());

int i = 1;

while (i - 1 != no) {

System.out.println("\nENTER FILE " + i + " NAME:");

fname = br.readLine();

indexing(fname, invertedData, docno, i);

i += 1;

displayIndex(invertedData, docno);

Output:
4. Precision And Recall

import java.util.HashSet; import

java.util.Set;

public class PrecisionRecallCalculator {

public static void main(String[] args) {

// Sample input: Answer set A, Query q1, and Relevant documents Rq1

Set<String> answerSetA = new HashSet<>(); answerSetA.add("Doc1");

answerSetA.add("Doc2"); answerSetA.add("Doc3");

answerSetA.add("Doc4"); answerSetA.add("Doc5");

Set<String> relevantDocumentsRq1 = new HashSet<>();

relevantDocumentsRq1.add("Doc1"); relevantDocumentsRq1.add("Doc2");

relevantDocumentsRq1.add("Doc3");

// Query q1

String query = "q1";

// Calculate precision and recall

double precision = calculatePrecision(answerSetA, relevantDocumentsRq1);

double recall = calculateRecall(answerSetA, relevantDocumentsRq1);

// Print the results

System.out.println("Query: " + query);

System.out.println("Precision: " + precision);

System.out.println("Recall: " + recall);

}
// Calculate precision

public static double calculatePrecision(Set<String> retrievedDocuments, Set<String>


relevantDocuments) {

int relevantRetrieved = 0;

for (String doc : retrievedDocuments) {

if (relevantDocuments.contains(doc)) {

relevantRetrieved++;

return (double) relevantRetrieved / retrievedDocuments.size();

// Calculate recall

public static double calculateRecall(Set<String> retrievedDocuments, Set<String>


relevantDocuments) {

int relevantRetrieved = 0;

for (String doc : retrievedDocuments) {

if (relevantDocuments.contains(doc)) {

relevantRetrieved++;

return (double) relevantRetrieved / relevantDocuments.size();

Output:
5. Harmonic Mean

package com.darshan;

public class MetricsCalculator

public static double calculateF1(double precision, double recall)

{ if (precision + recall == 0) { return 0;

} return 2 * (precision * recall) / (precision +

recall);

public static double calculateEMeasure(double precision, double recall,

double alpha) { if (precision == 0 && recall == 0) {

return 0;

} return 1 / ((alpha / precision) + ((1 - alpha) /

recall));

} public static void main(String[]

args) {

// Example values

double precision = 0.75;

double recall = 0.80;

double alpha = 0.5;

// Calculate F-measure double f1Score =

calculateF1(precision, recall);

System.out.println("F-measure (F1-score): " + String.format("%.2f",


f1Score));

// Calculate E-measure double eMeasure =

calculateEMeasure(precision, recall, alpha);

System.out.println("E-measure: " + String.format("%.2f", eMeasure));


}

Output :
6. Feature Extraction
7. Web Crowler
8. Weather Forecasting

You might also like