#include <stdio.h> #include <stdlib.h> /* Time interval format: yyymmddhhmm yyyymmddhhmm Example Input: -------------- 4 5 6 1 9 7 8 2 10 Process: -------- Data Structure time_node to store the intervals ------------------------------ | timestamp | is_start | index | ------------------------------ Step 1: ------- - stores the timestamps into an array ----------- ----------- ----------- ----------- | 5 | 1 | 0 | | 6 | 0 | 0 | | 1 | 1 | 1 | | 9 | 0 | 1 | ----------- ----------- ----------- ----------- 0 1 2 3 ----------- ----------- ----------- ------------ | 7 | 1 | 2 | | 8 | 0 | 2 | | 2 | 1 | 3 | | 10 | 0 | 3 | ----------- ----------- ----------- ------------ 4 5 6 7 Step 2: ------- - Sort the array wrt the timestamp value ----------- ----------- ----------- ----------- | 1 | 1 | 1 | | 2 | 1 | 3 | | 5 | 1 | 0 | | 6 | 0 | 0 | ----------- ----------- ----------- ----------- 0 1 2 3 ----------- ----------- ----------- ------------ | 7 | 1 | 2 | | 8 | 0 | 2 | | 9 | 0 | 1 | | 10 | 0 | 3 | ----------- ----------- ----------- ------------ 4 5 6 7 Step 3: ------- 3.1 Push the index value to the index_list if it is the start of an interval 3.2 Delete the index value from the index_list if it is the end of an interval 3.2.1 add all the index values from the index_list to the conflict_list of the this interval 3.2.2 add this interval index into the conflict_list of all the index values in the index_list */ /* Index to find parent and the childern */ #define LEFT(a) (2*(a+1) -1) #define RIGHT(a) (2*(a+1)) #define PARENT(a) ((a-1)/2) typedef enum boolean { FALSE = 0, TRUE = 1 } bool; /* To store the calendar appointment intervals */ typedef struct time_node time_node; struct time_node { unsigned long long timestamp; bool is_start; int index; }; /* To store the conflicts */ typedef struct index_node index_node; struct index_node { int index; index_node *next; }; /* Swap function */ #define XORSWAP(a, b) ((a.timestamp)^=(b.timestamp),(b.timestamp)^=(a.timestamp),(a.timestamp)^=(b.timestamp), \ (a.is_start)^=(b.is_start),(b.is_start)^=(a.is_start),(a.is_start)^=(b.is_start), \ (a.index)^=(b.index),(b.index)^=(a.index),(a.index)^=(b.index)) /* Heapify */ void maxheapify (time_node *list, int count, int index) { int left = LEFT (index); int right = RIGHT (index); int largest = index; if (left <= count-1 && list[left].timestamp > list[largest].timestamp) largest = left; if (right <= count-1 && list[right].timestamp > list[largest].timestamp) largest = right; if (largest != index) { XORSWAP (list[index], list[largest]); maxheapify (list, count, largest); } } /* Buids the heap */ void build_max_heap (time_node *list, int count) { for (int i = (count-1)/2; i >= 0; i--) { maxheapify (list, count, i); } } /* Sorts the intervals using heap sort */ void heap_sort (time_node *list, int count) { build_max_heap (list, count); int tmp_count = count-1; for (int i = count-1; i > 0; i--) { XORSWAP (list[0], list[i]); maxheapify (list, i, 0); } } /* Adds the index node into the list */ bool add_index_node (index_node **list, int index) { if (NULL == *list) { *list = (index_node *) calloc (1, sizeof (index_node)); if (NULL == *list) { fprintf (stderr, "Error: memory allocation failure."); return FALSE; } else { (*list)->index = index; (*list)->next = NULL; } } else { index_node *t = *list; while (NULL != t->next) t = t->next; t->next = (index_node *) calloc (1, sizeof (index_node)); t = t->next; if (NULL == t) { fprintf (stderr, "Error: memory allocation failure."); return FALSE; } else { t->index = index; t->next = NULL; } } return TRUE; } /* Deletes the index node from the list */ void delete_index_node (index_node **list, int index) { if (NULL == *list) { return; } else { index_node *t = *list; index_node *lbo = *list; while (t->index != index) { if (t->next->next == NULL) { lbo = t; } t = t->next; } if (t == *list) { *list = (*list)->next; free (t); } else if (NULL == t->next) { lbo->next = NULL; free(t); } else { t->index = t->next->index; index_node *d = t->next; t->next = t->next->next; free (d); } } } int main (int argc, char **argv) { time_node *interval_list = NULL; /* Reads the input file for the intervals */ FILE *fp = NULL; fp = fopen ("intervals.txt", "r"); if (NULL == fp) { fprintf (stderr, "Error: Could not read the file."); return -1; } int count = 0; int readcount = 0; int tmp_count = 0; /* Reads the count */ readcount = fscanf (fp, "%d", &count); if (0 >= readcount) { fprintf (stderr, "Error: Nothing in the file."); return -2; } interval_list = (time_node *) calloc (count*2, sizeof (time_node)); if (NULL == interval_list) { fprintf (stderr, "Error: Memory allocation failed."); return -3; } /* Reads the intervals */ printf ("Input:\n"); for(tmp_count = 0; tmp_count < count; tmp_count++) { if (feof(fp)) { break; } unsigned long long start = 0; unsigned long long end = 0; readcount = fscanf (fp, "%llu %llu", &start, &end); printf ("%5d) : %llu <-> %llu\n", tmp_count+1, start, end); interval_list[tmp_count*2].timestamp = start; interval_list[tmp_count*2].is_start = TRUE; interval_list[tmp_count*2].index = tmp_count; interval_list[tmp_count*2+1].timestamp = end; interval_list[tmp_count*2+1].is_start = FALSE; interval_list[tmp_count*2+1].index = tmp_count; } printf ("\n"); fclose(fp); /* Sort the timestamp values */ heap_sort (interval_list, count*2); index_node *index_list = NULL; index_node **conflict_list = NULL; conflict_list = (index_node **) calloc (count, sizeof (index_node*)); if (NULL == conflict_list) { fprintf (stderr, "Error: Memory allocation failed."); return -3; } /* main loop to find the conflicts */ for (int i = 0; i < count*2; i++) { if (interval_list[i].is_start == TRUE) { if (FALSE == add_index_node (&index_list, interval_list[i].index)) { fprintf (stderr, "Error: Memory allocation failed."); return -3; } } else { delete_index_node (&index_list, interval_list[i].index); index_node *t = index_list; while (t) { if (FALSE == add_index_node (&(conflict_list[interval_list[i].index]), t->index) || FALSE == add_index_node (&(conflict_list[t->index]), interval_list[i].index)) { fprintf (stderr, "Error: Memory allocation failed."); return -3; } t = t->next; } } } /* Print the result */ printf ("Conflicts:\n"); for (int i = 0; i < count; i++) { printf ("%5d : ", i+1); index_node *t = conflict_list[i]; while (t) { printf ("%d ", t->index+1); t = t->next; } printf ("\n"); } return 0; }We can use self-balanced binary search tree to keep the index_list to get a better time complexity.
Tuesday, June 3, 2014
Find the conflicting appointments
Labels:
appointments
,
binary
,
calendar
,
conflict
,
data structures
,
dynamic
,
heap
,
overlap
,
self-balanced
,
sort
,
timestamp
Thursday, January 16, 2014
In a given 2D binary array, find the area of the biggest rectangle which is filled with 1
Given array
1 1 0 1 0 1
1 1 0 1 1 1
0 1 1 1 1 1
0 1 1 1 1 1
0 0 1 1 1 0
0 0 0 0 0 0
Consider each block in the array is an unit and he columns in the array as hanging towers.
Modify the array in a way that, on each row, the number denotes the height of each column.
1 1 0 1 0 1
2 2 0 2 1 2
0 3 1 3 2 3
0 4 2 4 3 4
0 0 3 5 4 0
0 0 0 0 0 0
Basic Rules to find the biggest rectangle:
1. For any tower, the boundaries on each side will be decided by its smaller towers. It means
that all the towers on each side till we find a smaller tower will be included for the calculation.
2. Area for the subset equals to (the height of the smaller tower) X (no. of towers in the subset)
Calculate the area of each subset for each level in the array from top to bottom.
Level[0] [set 0] Level[0] [set 1] Level[0] [set 2]
1 1 0 1 0 1 1 1 0 1 0 1 1 1 0 1 0 1
--- - -
2 towers with 1 unit each 1 tower with 1 unit 1 tower with 1 unit
Maximum Area=2x1=2 Maximum Area=1x1=1 Maximum Area=1x1=1
Level[0] Maximum Area = 2
-------------------------------------------------------------------------------
Level[1] [set 0] Level[1] [set 1]
2 2 0 2 1 2 2 2 0 2 1 2
--- -----
2 towers with 2 units each 1 tower with 1 unit
Maximum Area=2x2=4 2 towers with 2 units each
Maximum Area=3x1=3
For [set 1], even though we have 2 towers with 2 units each in this set, we cannot consider them
as a single unit because of the smaller unit's presence in between. So, each tower will be consider
as a different subset. But, for the tower with 1 unit has bigger towers around it. So, all the units
will be taken (but only one unit from each bigger towers) for the area calculation.
Level[0] Maximum Area = 4
-------------------------------------------------------------------------------
and so on...
int findBigBox (int row, int column, int *matrix)
{
int max_area = 0;
int *score = (int *) calloc (row+1, sizeof(int));
if (NULL != score)
{
for (int i=0; i<row; i++)
{
for (int j=0; j<column; j++)
{
/* Process only if we find 1 */
if (*(matrix+(i*column)+j) != 0)
{
int count = 0;
int start = j;
/* Iterate through all the consecutive ones. */
for (;*(matrix+(i*column)+j) != 0 && j<column; j++)
{
/* If it is not the first row, add this element
with the previous row element at the same column */
if (i != 0)
{
*(matrix+(i*column)+j) += *(matrix+((i-1)*column)+j);
}
*(score+*(matrix+(i*column)+j)) = 1;
}
getScore (row, column, start, j-1, matrix+(i*column), score);
for (int s=1; s<=row; s++)
{
if (*(score+s))
{
int area = s * *(score+s);
if (max_area < area)
max_area = area;
}
}
/* Reset the score array to use for the next subset */
memset (score, 0, (row+1)*sizeof(int));
}
}
}
}
free (score);
return max_area;
}
Here the process of finding the number of towers on each subset for each height is call SCORE.
This SCORE process can be represented as a tree, which has the following rules,
1. Node structure
-> Height
-> Score
-> Next
-> Left
-> Right
2. Same numbers on the same set which are non-contiguous will be added as the [Next] node.
3. Same numbers on the same set which are contiguous will increment the [Score] in the first node.
4. Taller tower on its left in the set will be added to the [Left] node (child).
5. Taller tower on its right in the set will be added to the [Right] node (child).
6. [Score] = [Left]->[score] + [Right]->[score] + no of the same [Height] in the set.
Example:
For the below given set,
3 3 2 3 2 3 1 3 1 3 2 3
1. --------- 2. --------- 3. ---------
| H=3|S=1 | | H=3|S=2 | | H=2|S=3 |
--------- --------- ---------
/L
---------
| H=3|S=2 |
---------
In the third step, we move the number 2 as the root and 3 as its left as 3 falls on 2's left in the set and
3 is bigger than 2.
4. ---------
| H=2|S=4 |
---------
/L \R
--------- ---------
| H=3|S=2 | | H=3|S=1 |
--------- ---------
We move the number 3 as its right as 3 falls on 2's right in the set
5. --------- N ---------
| H=2|S=5 | ---- | H=2|S=0 |
--------- ---------
/L \R
--------- ---------
| H=3|S=2 | | H=3|S=1 |
--------- ---------
We move the number 2 to the [Next] of the existing 2 as they are on the same set still. (Which means it is the ROOT node)
6. --------- N ---------
| H=2|S=6 | ---- | H=2|S=0 |
--------- ---------
/L \R \R
--------- --------- ---------
| H=3|S=2 | | H=3|S=1 | | H=3|S=1 |
--------- --------- ---------
This 3 falls on the right side of the second 2, and increase the count on the first 2
7. ---------
| H=1|S=7 |
---------
/L
--------- N ---------
| H=2|S=6 | ---- | H=2|S=0 |
--------- ---------
/L \R \R
--------- --------- ---------
| H=3|S=2 | | H=3|S=1 | | H=3|S=1 |
--------- --------- ---------
and so on...
Finally, traverse the tree to find the Maximum SCORE for each height.
void getScore (int row, int column, int start, int end, int *matrix_row, int *score)
{
/* Intervals to be processed to calculate the score for the number */
int *currentQ = (int *) calloc (column, sizeof(int));
/* Intervals to be updated for the next number */
int *nextQ = (int *) calloc (column, sizeof(int));
/* Interval counts */
int currentCount = 1;
int nextCount = 0;
/* Start with the whole given set */
currentQ[0] = start;
currentQ[1] = end;
for (int i=1; i<=row; i++)
{
if (*(score+i))
{
int max = 0;
for (int c=0; c<currentCount; c++)
{
int tmp_score = *(currentQ+(c*2)+1) - *(currentQ+(c*2)) + 1;
/* Update the temporary max */
if (max < tmp_score)
{
max = tmp_score;
}
/* Update the intervals for the next bigger number */
for (int k=*(currentQ+(c*2)); k<=*(currentQ+(c*2)+1); k++)
{
/* Find the start of an interval */
while (*(matrix_row+k) == i && k<=*(currentQ+(c*2)+1))
{
k++;
}
/* End of the current set */
if (k > *(currentQ+(c*2)+1))
break;
*(nextQ+(nextCount*2)) = k;
/* Find the end of that interval */
while (*(matrix_row+k) != i && k<=*(currentQ+(c*2)+1)) k++;
if (k > *(currentQ+(c*2)+1)) // End of the current set
{
*(nextQ+(nextCount*2)+1) = k-1;
}
else
{
*(nextQ+(nextCount*2)+1) = k-1;
}
nextCount++;
}
}
int *tmp = currentQ;
currentQ = nextQ;
nextQ = tmp;
currentCount = nextCount;
nextCount = 0;
/* Update the score */
*(score+i) = max;
}
}
}
Saturday, January 11, 2014
Number of ways decoding a message
A message containing letters from A-Z is being encoded to numbers using the following mapping:
‘A’ => 1
‘B’ => 2
…
‘Z’ => 26
Given an encoded message containing digits, determine the total number of ways to decode it.
For example, given encoded message “12″, it could be decoded as “AB” (1 2) or “L” (12). The number of ways decoding “12″ is 2.
Solution:
Function call: {decode (input, result, 0);}
[input] => Input String
[output] => Temporary buffer to form the decoded string
[index] => Index to decoded character
‘A’ => 1
‘B’ => 2
…
‘Z’ => 26
Given an encoded message containing digits, determine the total number of ways to decode it.
For example, given encoded message “12″, it could be decoded as “AB” (1 2) or “L” (12). The number of ways decoding “12″ is 2.
Solution:
- Find all the subsets where we have 1 and 2 in the sequence continuously and keep the counts of total no of elements in each subset.
- If the next element is 0 and count is more than 1, subtract the count by 1
- Else increase the count by 1
- find the fibonacci number for each count [0]=1 [1]=1 [2]=2 [3]=3 ...
- Multiply all the fibonacci numbers gives the result
#include <stdio.h> #include <stdlib.h> #include <strings.h> int main (int argc, char **argv) { int length = strlen(argv[1]); char *result = (char *) calloc (length + 1, sizeof (char)); int noofways = 1; int *fibonacci = (int *) calloc (length, sizeof(int)); *fibonacci = 1; *(fibonacci+1) = 2; for (int i=2; i<length; i++) { *(fibonacci+i) = *(fibonacci+i-1) + *(fibonacci+i-2); } for (int i=1; i<=26; i++) { printf ("[%c:%d] ", 'A'+i-1, i); } printf ("\n"); char *input = argv[1]; while (*input != '\0') { if (*input >= '1' && *input <='2') { int count = 0; for (;*input >= '1' && *input <='2'; input++, count++) ; if (*input != '\0') { if ((*(input-1) == '1' && *input >= '1' && *input <= '9') || (*(input-1) == '2' && *input >= '1' && *input <= '6')) { count++; } else if ((*(input-1) == '1' && *input == '0') || (*(input-1) == '2' && *input == '0')) { count--; if (count == 0) count = 1; } } else if (*input == '\0') { noofways = noofways * *(fibonacci+count-1); break; } noofways = noofways * *(fibonacci+count-1); } else if (*input == '0') { noofways = 0; break; } input++; } printf ("No of ways: %d\n", noofways); return 0; }Code to get all the decoded strings
Function call: {decode (input, result, 0);}
[input] => Input String
[output] => Temporary buffer to form the decoded string
[index] => Index to decoded character
typedef enum boolean { FALSE=0, TRUE=1}bool; bool decode (char *input, char *output, int index) { static int count = 0; if (*input == '\0') { *(output+index) = '\0'; count++; printf ("%-4d%s\n", count, output); return TRUE; } else if (*input == '0') { return FALSE; } else if ((*input == '1' && *(input+1) == '0') || (*input == '2' && *(input+1) == '0')) { *(output+index) = 'A' + ((*input - '0') * 10) - 1; return decode (input+2, output, index+1); } else { *(output+index) = 'A' + (*input - '1'); decode (input+1, output, index+1); if ((*input == '1' && *(input+1) >= '1' && *(input+1) <= '9') || (*input == '2' && *(input+1) >= '1' && *(input+1) <= '6')) { *(output+index) = 'A' + (*input - '0') * 10 + *(input+1) - '0' - 1; decode (input+2, output, index+1); } } return TRUE; }
Friday, January 10, 2014
Largest Sum Subarray
You are given a binary array with N elements: d[0], d[1], ... d[N - 1].
d[i] can only be 0 or 1
You can perform AT MOST one move on the array: choose any two integers [L, R], and flip all the elements between (and including) the L-th and R-th bits. L and R represent the left-most and right-most index of the bits marking the boundaries of the segment which you have decided to flip.
What is the maximum number of '1'-bits (indicated by S) which you can obtain in the final bit-string?
d[i] can only be 0 or 1
You can perform AT MOST one move on the array: choose any two integers [L, R], and flip all the elements between (and including) the L-th and R-th bits. L and R represent the left-most and right-most index of the bits marking the boundaries of the segment which you have decided to flip.
What is the maximum number of '1'-bits (indicated by S) which you can obtain in the final bit-string?
import copy def max_subarray(array): # Start & End indices of the max subarray start = end = 0 # Temporary Variable to hold the new Start t_start = 0 # Holds the latest max sum # starts with array[0] because it addresses the # case where all the elements in the array are # negative numbers. max_ending_here = max_so_far = array[0] # Holds the max subarray # Starts with the first element array[0] maxsubarray = t_maxsubarray = [array[0]] for index, x in enumerate(array[1:]): # max(x, max_ending_here + x) => max_ending_here if x > max_ending_here + x: # This is where the set starts t_start = index + 1 t_maxsubarray = [] max_ending_here = x else: max_ending_here = max_ending_here + x # max(max_so_far, max_ending_here) => max_so_far if max_so_far < max_ending_here: # The set ends here t_maxsubarray.append(x) # This is the latest max subarray maxsubarray = copy.deepcopy(t_maxsubarray) # Update the new Start index start = t_start # Update the new End index end = index + 1 max_so_far = max_ending_here else: t_maxsubarray.append(x) print maxsubarray print start, end print max_so_farReference: [WIKI] Maximum Subarray Problem
Saturday, January 4, 2014
Spell Check
I am using self balanced binary search tree to create the dictionary because the hash values for the dictionary wordsEn.txt vary from few thousands to few thousands of millions. If I want to map this big range to small one, there may be more collision.
My hash function gives me same hash value for the strings even if the character are jumbled. For example, "add" and "dad" will have the same hash value. I collect the list of words which have the same has value as the given string and find how close these words to the given string. I sort these suggestions in the order where the closest possibility comes first.
To get more words, i repeat this with adding a character each time from 'a' to 'z' with the given string and one more time with removing the character from all the possible indices.
Finally print the list.
Filename: dictionary.hpp
My hash function gives me same hash value for the strings even if the character are jumbled. For example, "add" and "dad" will have the same hash value. I collect the list of words which have the same has value as the given string and find how close these words to the given string. I sort these suggestions in the order where the closest possibility comes first.
To get more words, i repeat this with adding a character each time from 'a' to 'z' with the given string and one more time with removing the character from all the possible indices.
Finally print the list.
Filename: dictionary.hpp
#ifndef __MY_DICTIONARY__ #define __MY_DICTIONARY__ #include <iostream> #include <string> using namespace std; typedef struct word t_word; class wordlist { private: string dictionaryfile; t_word *dictionary; public: wordlist (string dictfile) { this->dictionaryfile = dictfile; this->dictionary = NULL; } wordlist() { } void setWordFile (string wordfile) { this->dictionaryfile = wordfile; } string getWordFile () { return this->dictionaryfile; } ~wordlist () { } bool generateDictionary (); unsigned int calcHash (string); t_word *create (string); void rotateLeft (t_word*); void rotateRight (t_word*); void balanceFactors (t_word*, t_word*); void balanceLeftRight (t_word*, t_word*); void balanceRightLeft (t_word*, t_word*); void balance (t_word*, t_word*); void insert (string); int getScore (string, string); int findMatch (string, vector<string>&); }; #endifFilename: dictionary.cpp
#include <iostream> #include <vector> #include <string> #include <fstream> #include <stdlib.h> using namespace std; #include "dictionary.hpp" // Node to store the word and the corresponding hash values struct word{ string word; unsigned int hash; // Pointers to create the self-balanced binary search tree t_word *parent; t_word *left; t_word *right; char balanceFactor; // Pointer to store the words with the same hash value t_word *next; }; // Generates the dictionary (self-balanced binary search tree) // with the list of words from the given dictionary file. bool wordlist::generateDictionary () { ifstream ifs (this->dictionaryfile.c_str()); string str; try { while (getline (ifs, str)) { // Remove the new line str.resize (str.length()-1); // Add the word into the dictionary insert (str); } ifs.close(); } catch (exception& e) { cerr << "[Error]: " << e.what() << endl; if (ifs.is_open()) { ifs.close(); } return false; } return true; } // Calculates the hash value for the given word // Position of the characters does not have importance here. unsigned int wordlist::calcHash (string word) { unsigned int hash = 0; int val[26] = {0}; const char *exp = word.c_str(); while (('a' <= *exp && 'z' >= *exp) || '\'' == *exp) { hash = hash + (( (int(*exp) & 0x01) * 0xcf * 3+ (int(*exp) & 0x02) * 0xbf * 37+ (int(*exp) & 0x04) * 0xaf * 79+ (int(*exp) & 0x08) * 0x9f * 131+ (int(*exp) & 0x10) * 0x8f * 181+ (int(*exp) & 0x20) * 0x7f * 239+ (int(*exp) & 0x40) * 0x6f * 293+ (int(*exp) & 0x80) * 0x5f * 359) * (int (*exp) - int ('a') + 1) * 421 ) + (int (*exp) - int ('a') + 1) * 17; exp++; } hash += word.length(); return hash; } // Create a new node with the given string and its has value t_word* wordlist::create (string str) { unsigned int hash = calcHash (str); t_word *t = new t_word; if (NULL == t) { cerr << "[Error] memory allocation failed." << endl; exit (1); } // cout << hash << " [" << str << "]" << endl; t->word = str; t->hash = hash; t->parent = NULL; t->left = NULL; t->right = NULL; t->next = NULL; t->balanceFactor = '='; return t; } void wordlist::rotateLeft (t_word *n) { t_word *t = n->right; n->right = t->left; if (NULL != t->left) { t->left->parent = n; } if (NULL != n->parent) { if (n->parent->left == n) { n->parent->left = t; } else { n->parent->right = t; } } else { this->dictionary = t; t->parent = NULL; } t->left = n; t->parent = n->parent; n->parent = t; } void wordlist::rotateRight (t_word *n) { t_word *t = n->left; if (NULL == n->left && NULL == n->right) return; n->left = t->right; if (NULL != t->right) { t->right->parent = n; } if (NULL != n->parent) { if (n->parent->left == n) { n->parent->left = t; } else { n->parent->right = t; } } else { this->dictionary = t; t->parent = NULL; } t->right = n; t->parent = n->parent; n->parent = t; } void wordlist::balanceFactors (t_word *ancestor, t_word *newLeaf) { t_word *t = newLeaf->parent; while (t != NULL && t != ancestor) { if (newLeaf->hash <= t->hash) t->balanceFactor = 'L'; else t->balanceFactor = 'R'; t = t->parent; } } void wordlist::balanceLeftRight (t_word *ancestor, t_word *newLeaf) { if (this->dictionary == ancestor) { ancestor->balanceFactor = '='; } else { t_word *t = ancestor; if (newLeaf->hash <= ancestor->parent->hash) { ancestor->balanceFactor = 'R'; t = ancestor->parent->left; } else { ancestor->balanceFactor = '='; ancestor->parent->left->balanceFactor = 'L'; } balanceFactors (t, newLeaf); } } void wordlist::balanceRightLeft (t_word *ancestor, t_word *newLeaf) { if (this->dictionary == ancestor) { ancestor->balanceFactor = '='; } else { t_word *t = ancestor; if (newLeaf->hash > ancestor->parent->hash) { ancestor->balanceFactor = 'L'; t = ancestor->parent->right; } else { ancestor->balanceFactor = '='; ancestor->parent->left->balanceFactor = 'R'; } balanceFactors (t, newLeaf); } } void wordlist::balance (t_word *ancestor, t_word *newLeaf) { // Balanced tree if (NULL == ancestor) { if (newLeaf->hash <= this->dictionary->hash) { this->dictionary->balanceFactor = 'L'; } else { this->dictionary->balanceFactor = 'R'; } balanceFactors (this->dictionary, newLeaf); } // Added to the other side of the balanced factor else if ((ancestor->balanceFactor == 'L' && newLeaf->hash > ancestor->hash) || (ancestor->balanceFactor == 'R' && newLeaf->hash <= ancestor->hash)) { ancestor->balanceFactor = '='; balanceFactors (ancestor, newLeaf); } // Added to the same side of the ancestor's child tree else if ((ancestor->balanceFactor == 'R' && newLeaf->hash > ancestor->right->hash) || (ancestor->balanceFactor == 'L' && newLeaf->hash <= ancestor->left->hash)) { char bf = ancestor->balanceFactor; ancestor->balanceFactor = '='; switch (bf) { case 'R': rotateLeft(ancestor); break; case 'L': rotateRight(ancestor); break; } balanceFactors (ancestor->parent, newLeaf); } // Added to the right of ancestor's left child else if (ancestor->balanceFactor == 'L' && newLeaf->hash > ancestor->left->hash) { rotateLeft(ancestor->left); rotateRight(ancestor); balanceLeftRight (ancestor, newLeaf); } // Added to the left of ancestor's right child else if (ancestor->balanceFactor == 'R' && newLeaf->hash <= ancestor->right->hash) { rotateRight(ancestor->right); rotateLeft(ancestor); balanceRightLeft (ancestor, newLeaf); } else { cerr << "[Error]: Not a valid operation." << endl; exit (1); } } void wordlist::insert (string str) { t_word *t = create (str); t_word *ancestor = NULL; if (NULL == this->dictionary) { this->dictionary = t; } else { t_word *r = this->dictionary; while (NULL != r) { if (r->balanceFactor != '=') ancestor = r; if (t->hash < r->hash) { if (NULL != r->left) { r =r->left; } else { break; } } else if (t->hash > r->hash) { if (NULL != r->right) { r =r->right; } else { break; } } else { while (NULL != r->next) r = r->next; r->next = t; return; } } t->parent = r; if (t->hash <= r->hash) { r->left = t; } else { r->right = t; } // Balance the tree using AVL balance (ancestor, t); } } int wordlist::getScore (string s1, string s2) { int i = 0; int j = 0; int len = s2.length() * s1.length(); int *a_scores = new int [len]; int score = 0; /* cout << " "; for (i=0; i<s1.length(); i++) { cout << s1[i] << " "; } cout << endl; */ for (j=0; j<s2.length(); j++) { // cout << s2[j] << " "; for (i=0; i<s1.length(); i++) { if (s1[i] == s2[j]) { if (i == 0 || j == 0) { *(a_scores+(j*s1.length())+i) = 1; } else { int x = i-1; int y = j-1; int t_score = 0; for (;x>=0 && y>=0; x--,y--) { if (*(a_scores+(y*s1.length())+x) != 0) { t_score = *(a_scores+(y*s1.length())+x); break; } } for (x=i-1; x>=0; x--) { if (*(a_scores+(j*s1.length())+x) != 0) { if (t_score < *(a_scores+(j*s1.length())+x)) { t_score = *(a_scores+(j*s1.length())+x) - 1; } break; } } for (y=j-1; y>=0; y--) { if (*(a_scores+(y*s1.length())+i) != 0) { if (t_score < *(a_scores+(y*s1.length())+i)) { t_score = *(a_scores+(y*s1.length())+i) - 1; } break; } } *(a_scores+(j*s1.length())+i) = t_score + 1; } if (score < *(a_scores+(j*s1.length())+i)) { score = *(a_scores+(j*s1.length())+i); } } else { *(a_scores+(j*s1.length())+i) = 0; } // cout << *(a_scores+(j*s1.length())+i) << " "; } // cout << endl; } score = s1.length()-score; delete[] a_scores; return score; } int wordlist::findMatch (string str, vector<string>& result) { unsigned int hash = calcHash(str); t_word *t = this->dictionary; unsigned int count = 0; unsigned int level = 0; while (NULL != t) { level ++; if (hash < t->hash) { t = t->left; } else if (hash > t->hash) { t = t->right; } else { while (NULL != t) { result.resize(count+1); result.at(count) = t->word; t = t->next; count++; } return level; } } return level; }Filename: spellCheck.cpp
#include <iostream> #include <fstream> #include <vector> #include <iomanip> #include <sstream> #include <string> #include <algorithm> using namespace std; #include "dictionary.hpp" int main () { wordlist mydict ("wordsEn.txt"); // My Dictionary mydict.generateDictionary (); string str; // My wordlist which are to be checked ifstream ifs ("wordlist.txt"); while (getline (ifs, str)) { str.resize (str.length()-1); vector <string> matches; int count = mydict.findMatch (str, matches); if (0 < matches.size()) { for (long index=0; index < matches.size(); index++) { int score = mydict.getScore (str, matches.at(index)); ostringstream ss; ss << setw(2) << setfill('0') << score; matches.at(index) = ss.str() + matches.at(index); } sort (matches.begin(), matches.end()); string tmp = matches.at(0); // Exact match. Goto the next word in the list if ('0' == tmp[0] && '0' == tmp[1]) { cout << "Spelling is CORRECT for (" << str << ")" <<endl; continue; } } // Add each character from 'a' to 'z' one by one and check if we // get more suggesstions. string alpha = "abcdefghijklmnopqrstuvwxyz"; vector<string> tmp_matches; for (int i=0; i<alpha.length(); i++) { mydict.findMatch (str+alpha[i], tmp_matches); for (long index=0; index < tmp_matches.size(); index++) { int score = mydict.getScore (str, tmp_matches.at(index)); ostringstream ss; ss << setw(2) << setfill('0') << score + 1; matches.push_back(ss.str() + tmp_matches.at(index)); } tmp_matches.clear(); } // Check the suggesstions for the strings by removing one character // in different index and try all the posible index values. for (int i=0; i<str.length(); i++) { string tmp_str = str; tmp_str.erase (i, 1); mydict.findMatch (tmp_str, tmp_matches); for (long index=0; index < tmp_matches.size(); index++) { int score = mydict.getScore (tmp_str, tmp_matches.at(index)); ostringstream ss; ss << setw(2) << setfill('0') << score + 1; matches.push_back(ss.str() + tmp_matches.at(index)); } tmp_matches.clear(); } if (0 < matches.size()) { // Sort all the suggesstions with thr rank sort (matches.begin(), matches.end()); // Remove the rank element from the strings for (long index=0; index < matches.size(); index++) { matches.at(index).erase(0,2); } // Removes duplicate consecutive elements // matches.erase( unique( matches.begin(), matches.end() ), matches.end() ); // Remove all the duplicates vector<string> uniqueMatches; uniqueMatches.push_back(matches.at(0)); for (long index=1; index < matches.size(); index++) { if (uniqueMatches.end() == find (uniqueMatches.begin(), uniqueMatches.end(), matches.at(index))) { uniqueMatches.push_back(matches.at(index)); } } // Final result cout << "Matcing for (" << str << "): { "; for (long index=0; index < uniqueMatches.size(); index++) { cout <<uniqueMatches.at(index) << " "; } cout << "}" << endl; } else { cout << count << " Matche not found for (" << str << ")" << endl; } } return 0; }You can find the wordlist.txt from here
Subscribe to:
Posts
(
Atom
)