I have been using MATLAB on a daily basis for a few years, but recently I decided to switch to Python because of its additional capabilities (e.g. Machine learning libraries, etc.).
I started by translating a simple MATLAB code into Python using primarily NumPy, and run it through Spyder environment. The results are striking: Python is more than 100 slower! Below I attached both codes. For the given CSV file (gmr) Python executed the code in 25 secs, while MATLAB in several ms. More importantly, I want to repeat the same calculations for a large number of CSV files (e.g. 200). It is quite striking that MATLAB needs around 50 secs for 200 CSV files, while Python would need more than 1 hour.
Could you please suggest some modifications? What am I doing wrong here?
I have tried several alternatives ways to formulate the code, but unfortunately none of them worked.
MATLAB:
clear variables; close all ; clc ;
tic;
damp=0.05;
T=[0.01:0.01:3]';
Spectra = cell(num_records,2);
name='rec_1_out_scaled.csv';
gmr=load(name);
dt = gmr(3,1) - gmr(2,1);
cd (main_Dir)
Sa=zeros(length(T),1);
% Newmark's Direct Integration Method
for j=1:length(T)
Sa(j)=newmark_dim(gmr(:,2),T(j),damp,dt);
end
Spectra(i,:) = {name,Sa};
toc;
function Sa = newmark_dim(gacc,T,zeta,dt)
% newmark coefficients
beta = 1/4;
gamma = 1/2;
% natural circular frequency of SDOF system
wn = 2*pi/T;
% initialization
npun = length(gacc);
y = zeros(npun,1);
yp = zeros(npun,1);
ypp = zeros(npun,1);
ypp(1) = -gacc(1)-2*wn*zeta*yp0-wn^2*y0;
% Integration coefficients
keff = wn^2 + 1/(beta*dt^2) + gamma*2*wn*zeta/(beta*dt);
a1 = 1/(beta*dt^2)+gamma*2*wn*zeta/(beta*dt);
a2 = 1/(beta*dt)+2*wn*zeta*(gamma/beta-1);
a3 = (1/(2*beta)-1)+2*wn*zeta*dt*(gamma/(2*beta)-1);
for i=1:npun-1
y(i+1) = (-gacc(i+1)+a1*y(i)+a2*yp(i)+a3*ypp(i))/keff;
ypp(i+1) = (y(i+1)-y(i)-dt*yp(i)-dt^2*ypp(i)/2)/(beta*dt^2) + ypp(i);
yp(i+1) = yp(i)+dt*ypp(i)+dt*gamma*(ypp(i+1)-ypp(i));
end
Sd = max(abs(y));
Sa = Sd*wn^2;
return
PYTHON:
import time
import numpy as np
import math
import pandas as pd
start = time.time()
# Initialize
name = "rec_1_out_scaled.csv"
T = np.arange(0.01,3.01,0.01)
zeta = 0.05 # 5% damping
extension = 'csv'
num_records = 200
Spectra = np.zeros((num_records,T.size,2)) # Multidimentional array
"""
Newmark's Direct Integration Method
"""
def Newmark_DIM(gmr,T,zeta,dt):
# newmark coefficients for constant acceleration
beta = 1/4
gamma = 1/2
# natural circular frequency of the SDOF system
wn = 2*math.pi/T
# initialization
npun = len(gmr)
y = np.zeros(npun)
yp = np.zeros(npun)
ypp = np.zeros(npun)
ypp[0] = - gmr[0] - 2 * wn * zeta* yp[0] - wn**2 * y[0]
# Integration coefficients
keff = wn**2 + 1/(beta * dt**2) + gamma * 2 * wn * zeta /(beta * dt)
a1 = 1/(beta * dt**2) + gamma * 2 * wn * zeta/(beta * dt)
a2 = 1/(beta * dt) + 2 * wn * zeta * (gamma/beta - 1)
a3 = (1/(2 * beta) - 1) + 2 * wn * zeta * dt * (gamma/(2 * beta) - 1)
# Solves ODE
for i in range(npun-1):
y[i+1] = (-gmr[i+1] + a1 * y[i] + a2 * yp[i] + a3 * ypp[i])/keff
ypp[i+1] = (y[i+1] -y[i] - dt * yp[i] - dt**2 * ypp[i]/2) / (beta * dt**2) + ypp[i]
yp[i+1] = yp[i] + dt * ypp[i] + dt * gamma * (ypp[i+1] - ypp[i])
Sd = np.amax(np.abs(y))
Sa = Sd * wn**2
return (Sa)
# Calculates Spectrum
gmr = pd.read_csv(name, header=None)
gmr = gmr.values # makes it numpy array
dt = gmr[2,0] - gmr[1,0]
Sa = np.zeros(len(T))
for j in range(len(T)):
Sa[j] = Newmark_DIM(gmr[:,1],T[j],zeta,dt)
Spectra[0][:,0] = T
Spectra[0][:,1] = Sa
print("Finished")
end = time.time()
print(end - start)