So, a few years ago I wrote a program in Python that writes the English names of a sequential list of numbers to a file (one, two, three, etc.). I have been working on getting a C++ version working off and on for the last month (personal project), and I think I have it running pretty well. One problem: it's five times slower than the Python version. I've tried switching string concatenation methods ( <<
vs operator+
vs operator+=
vs .append()
), using fprinf()
instead of ofstream
, pre-allocating the string size (.reserve()
), and a lot of other things I can't remember, but I seemed to have run into a wall. Then I noticed that the C++ writing speed seems to max out around 70MB/s, whereas the Python version writes at around 350MB/s. The drive I am using is a 5400rpm disk (CrystalDiskMark gives a sequential write speed of 60-90 MB/s), so the C++ write speeds are believable, but the Python?
TL;DR: Python seems to be writing five times faster than possible, (near to the read speeds!) of the disk.
I've included the programs below, in case I am missing something obvious (plausible). "Benchmarking" involved running each program for the numbers 1-1,000,000, resulting in a file of 50,824KB. ~50s for C++, ~8.5s for Python.
Python:
##Code in Python version 2.7.5 for generating a file with the English names of a set range of numbers.
while 1:
s=input("Starting_Value:")
f=input("Ending_Value:")
filename=raw_input("Desired Name of File:")
##dictionary
one=["","one","two","three","four","five","six","seven","eight","nine","error_one"]
one2=["","-one","-two","-three","-four","-five","-six","-seven","-eight","-nine","error_one2"]
teen=["ten","eleven","twelve","thirteen","fourteen","fifteen","sixteen","seventeen","eighteen","nineteen","error_teen"]
ten=["","twenty","thirty","fourty","fifty","sixty","seventy","eighty","ninety","error_ten"]
##Functions
def translate(n): ##handles '' to 999
work=[]
if n>=100:
work.append(one[int(n/100)]+" hundred ")
n=n-(100*int(n/100))
if n>=10:
if n>=20:
work.append(ten[int(n/10)-1]+one2[n-(10*int(n/10))]+" ")
else:
work.append(teen[n%10]+" ")
elif n>=1:
work.append(one[n]+ " ")
elif n>=10:
if n>=20:
work.append(ten[int(n/10)-1]+one2[n-(10*int(n/10))]+" ")
else:
work.append(teen[n%10]+" ")
elif n>=1:
work.append(str(one[n])+" ")
end1=', '.join(work)
end1=end1.replace(", ","")
return end1
def english(m): ##handles billions, hundred millions, millions, hundred thousands, thousands
work2=[]
if m>=1000000000:
work2.append(str(translate(int(m/1000000000)))+"billion ")
elif m>=1000000:
work2.append(str(translate(int(m/1000000)))+"million "+str(translate(int(m-(1000000*int(m/1000000)))/1000))+"thousand "+str(translate(m-(1000*int(m/1000)))))
if ((int(m/1000)%1000)==0):
end3=str(', '.join(work2))
end4=end3.replace("thousand ", "")
work2[:]=[]
work2=[str(end4)]
else:
end3=str()
elif m>=1000:
work2.append(str(translate(int(m/1000)))+"thousand "+str(translate(m%1000)))
elif m>=1:
work2.append(translate(m))
end2=str(', '.join(work2))
end2=end2[:-1]
return end2
##Main Program - Iterator
file = open(str(filename), "a")
for i in range(f-s+1):
file.write(str(english(s))+", ")
s = s + 1
file.close()
a = raw_input("Close window to EXIT, or press ENTER to generate another file")
C++
//Generates a file of sequential numbers in English
//libraries
#include <iostream> //for output to terminal
#include <fstream> //for output to file
#include <string> //for handling strings
using namespace std; //yes
ofstream fout; //for convenience with 'cout'
//function prototypes
string thousands(int n); //translates 1 to 999
string millions(int m); //translates the hundred thousands, millions,
hundred millions
int lint(int j, int k); //outputs the digits of a number greater than the kth place i.e. lint(123456, 1000) = 123
//variables
int shi = 0; //starting value
int kut = 1; //ending value
int i = 0; //iterator
string fname = ""; //filename
string buffern = ""; //buffer for thousands
string bufferm = ""; //buffer for millions
string bufferf = ""; //first digit buffer
//dictionary
char one[10][7] = { ""," one"," two"," three"," four"," five"," six"," seven"," eight"," nine" };
char one2[10][7] = { "","-one","-two","-three","-four","-five","-six","-seven","-eight","-nine" };
char teen[10][11] = { " ten"," eleven"," twelve"," thirteen"," fourteen"," fifteen"," sixteen"," seventeen"," eighteen"," nineteen" };
char ten[10][9] = { "",""," twenty"," thirty"," fourty"," fifty"," sixty"," seventy"," eighty"," ninety" };
//main function
int main()
{
while (1)
{
//get user input
cout << " Starting Number: ";
cin >> shi;
cout << " Ending Number: ";
cin >> kut;
while (fout.is_open() != 1)
{
cout << " Desired Filename: ";
cin >> fname;
fname.append(".txt");
fout.open(fname);
if (fout.is_open() != 1)
cout << "\n Invalid file name. Please try again.\n";
}
//translate and write to file
if (shi == 0) { //handles starting at zero
fout << "zero,";
shi = 1;
}
else //handles spacing for next word
{
bufferf = millions(shi);
bufferf.erase(0, 1);
bufferf += ",";
fout << bufferf;
shi++;
}
for (i = shi; i < (kut); ++i) //Main Iterator
{
fout << millions(i) << ",";
}
fout << millions(kut) << "."; //handles last word
fout.close();
//display confirmation and prompt to exit/continue
cout << "\n Complete\n";
cin.get();
cin.ignore();
cout << endl;
}
}
//function definitions
string thousands(int n) //writes '' to 999
{
buffern = "";
buffern.reserve(30);
if (n >= 100) { //write hundreds place
buffern += one[lint(n, 100)];
buffern += " hundred";
n = n % 100;
if (n >= 10) { //write tens place
if (n >= 20) {
buffern += ten[lint(n, 10)];
buffern += one2[n % 10];
}
else { //deal with 'teens'
buffern += teen[n % 10];
}
}
else if (n >= 1) { //write ones place
buffern += one[n % 10];
}
}
else if (n >= 10) { //write tens place
if (n >= 20) {
buffern += ten[lint(n, 10)];
buffern += one2[n % 10];
}
else { //deal with 'teens'
buffern += teen[n % 10];
}
}
else if (n >= 1) { //write ones place
buffern += one[n];
}
return buffern;
}
string millions(int m)
{
bufferm = "";
bufferm.reserve(100);
if (m >= 1000000)
{
if (int(m / 1000) % 1000 == 0) { //if xxx,000,xxx
bufferm += thousands(lint(m, 1000000));
bufferm += " million";
bufferm += thousands(m % 1000);
}
else {
bufferm += thousands(lint(m, 1000000)); //millions
bufferm += " million";
bufferm += thousands(lint(m, 1000) % 1000); //hundred thousands
bufferm += " thousand";
bufferm += thousands(m % 1000); //thousands
}
}
else if (m >= 1000) {
bufferm += thousands(lint(m, 1000)); //hundred thousands
bufferm += " thousand";
bufferm += thousands(m % 1000); //thousands
}
else if (m >= 1) {
bufferm += thousands(m); //thousands
}
return bufferm;
}
int lint(int j, int k)
{
return ((j - (j % k)) / k);
}
I would appreciate any insights as to why the programs are running at different speeds, how the Python is writing so fast, or suggestions on speeding up the c++ code.
Edit: @VTT was right, not all of the C++ code was there. Added.