c++ \"Programming For Big Data [ DvcScheduleV2.cpp and StaticArray.h and/or DynamicArray.h ] Assignment 5's runtime was too slow --...

80.2K

Verified Solution

Question

Programming

c++ \"Programming For Big Data [DvcScheduleV2.cpp andStaticArray.h and/orDynamicArray.h ]
Assignment 5's runtime was too slow -- a couple of minutes or so.It's because of the duplicate-checking, with over 4 billioncompares.

Rewrite the duplicate-checking logic from Assignment 5, using atechnique from \"Techniques For Big Data, Reading\" to do fewercompares, and come up with the exact same results as Assignment5.

You may use your StaticArray.h from Assignment3 and/or your DynamicArray.h from assignments 4,but you may not use any STL containers. Submit the H file(s) youuse in your solution, even if there are no changes since yourprevious work. Your project will be compiled for grading usingthe default stack memory size of 1MB.

Progress Bar

Since this version is supposed to be fast, there is no longer aneed for a progress bar. Include one if you wish, or you may leaveit out -- your choice. But if you do have a progress bar, doremember to \"flush\"...

You should get the same result as V1 and should run an order ofmagnitude faster.\"

For some reason I'm getting different results from my firstversion of the programs and the program I've written for thisassignment.

Code for the prior assignment :

#define _CRT_SECURE_NO_WARNINGS

#include
#include
#include

using namespace std;

#include
#include \"DynamicArray.h\"

struct Class
{
string code;
int count;
};

int main()
{

DynamicArray sub;
DynamicArray sem;
DynamicArray sec;

int totalSubjects = 0;
int dup = 0;
int total = 0;
int counter = 0;
bool duplicate;
bool stored;

//for parsing inputfile
char* token;
char buf[1000];
const char* const tab = \"\t\";

//open input file
ifstream fin;
fin.open(\"dvc-schedule.txt\");
if (!fin.good())
cout << \"I/O error. File can't be found!\n\";

//read the input file
while (fin.good())
{
//progress bar
if(counter % 1000 == 0)
cout << '.'; cout.flush();
duplicate = false;
stored = false;
string line;
getline(fin, line);
total++; //total lines processed
strcpy(buf, line.c_str());
if (buf[0] == 0) continue; // skip blank lines
//parse the line
const string term(token = strtok(buf, tab));
const string section(token = strtok(0, tab));
const string course((token = strtok(0, tab)) ? token : \"\");
const string instructor((token = strtok(0, tab)) ? token :\"\");
const string whenWhere((token = strtok(0, tab)) ? token :\"\");
if (course.find('-') == string::npos) continue;
const string code(course.begin(), course.begin() +course.find('-'));

//check for duplicates
for(int i = 0; i < counter; i++)
{
if(sem[i] == term && sec[i] == section)
{
dup++;
duplicate = true;
break;
}
}

if(duplicate == true)
continue;

sem[counter] = term;
sec[counter] = section;
counter++;

for(int i = 0; i < totalSubjects; i++)
{
if (sub[i].code == code)
{
sub[i].count++;
stored = true;
break;
}
}

if(stored == true)
continue;

Class y;
y.code = code;
y.count = 1;
sub[totalSubjects] = y;
totalSubjects++;
}
fin.close();
cout << endl;

//sort
for (int i = 0; i < totalSubjects; i++)
for (int j = i + 1; j < totalSubjects; j++)
if (sub[j].code < sub[i].code)
swap(sub[j], sub[i]);

//output
for(int i = 0; i < totalSubjects; i++)
{
cout << sub[i].code << \", \" << sub[i].count<< \" section\" << endl;
}
cout << \"Total duplication: \" << dup <cout << \"Total counts: \" << total << endl;
cout << \"Total subjects: \" << totalSubjects <}

Code for this assignment:

#define _CRT_SECURE_NO_WARNINGS

#include
#include
#include

using namespace std;

#include
#include \"DynamicArray.h\"

struct SectionsForTerm
{
string term;
int numberOfSectionsSeen;
DynamicArray seenSectionNumbers;
};

struct Class
{
string code;
int count;
};

int main()
{

int numberOfTermsSeen = 0;
DynamicArray alreadySeen;
DynamicArray sub;

int totalSubjects = 0;
int dup = 0;
int total = 0;
int counter = 0;
bool match;
bool duplicate;
bool stored;

//for parsing inputfile
char* token;
char buf[1000];
const char* const tab = \"\t\";

//open input file
ifstream fin;
fin.open(\"dvc-schedule.txt\");
if (!fin.good())
cout << \"I/O error. File can't be found!\n\";

//read the input file
while (fin.good())
{
//check for false
match = false;
duplicate = false;
stored = false;

//read lines
string line;
getline(fin, line);
total++; //total lines processed
strcpy(buf, line.c_str());
if (buf[0] == 0) continue; // skip blank lines

//parse the line
const string term(token = strtok(buf, tab));
const string section(token = strtok(0, tab));
const string course((token = strtok(0, tab)) ? token : \"\");
const string instructor((token = strtok(0, tab)) ? token :\"\");
const string whenWhere((token = strtok(0, tab)) ? token :\"\");
if (course.find('-') == string::npos) continue;
const string code(course.begin(), course.begin() +course.find('-'));

//check for duplicates
int i;
for(int i = 0; i < numberOfTermsSeen; i++)
{
if(alreadySeen[i].term == term)
{
match = true;
break;
}
}

if(match == true)
{
for(int j = 0; j < alreadySeen[i].numberOfSectionsSeen;j++)
if (alreadySeen[i].seenSectionNumbers[j]== section)
{
duplicate = true;
dup++;
break;
}

if (duplicate == true)
continue;

else
{
alreadySeen[i].seenSectionNumbers[alreadySeen[i].numberOfSectionsSeen]=section;
alreadySeen[i].numberOfSectionsSeen++;
}
}

else
{
alreadySeen[numberOfTermsSeen].term = term;
alreadySeen[i].numberOfSectionsSeen = 1;
numberOfTermsSeen++;
}

// check for same section
for(i = 0; i < totalSubjects; i++)
{
if(sub[i].code == code)
{
stored = true;
break;
}
}

if(stored == true)
{
sub[i].count++;
}

else
{
sub[totalSubjects].code = code;
sub[totalSubjects].count = 1;
totalSubjects++;
}

counter++;
}
fin.close();
cout << endl;

//sort
for (int i = 0; i < totalSubjects; i++)
for (int j = i + 1; j < totalSubjects; j++)
if (sub[j].code < sub[i].code)
swap(sub[j], sub[i]);

//output
for(int i = 0; i < totalSubjects; i++)
{
cout << sub[i].code << \", \" << sub[i].count<< \" section\" << endl;
}
cout << \"Total duplication: \" << dup <cout << \"Total counts: \" << total << endl;
cout << \"Total subjects: \" << totalSubjects <}

*When I compare the results of both programs the subjects andtotal are the same but the total duplication number issignificantly off. Also the amount of sections per course is off?I'm not really sure how, why, or how to fix it?

Answer & Explanation Solved by verified expert
3.9 Ratings (775 Votes)
Before answering your question i believe you are comfortable with dry running I am saying this because in all my industrial experience biggest mistake developers make is they directly jump into the logic part    See Answer
Get Answers to Unlimited Questions

Join us to gain access to millions of questions and expert answers. Enjoy exclusive benefits tailored just for you!

Membership Benefits:
  • Unlimited Question Access with detailed Answers
  • Zin AI - 3 Million Words
  • 10 Dall-E 3 Images
  • 20 Plot Generations
  • Conversation with Dialogue Memory
  • No Ads, Ever!
  • Access to Our Best AI Platform: Flex AI - Your personal assistant for all your inquiries!
Become a Member

Other questions asked by students