I want to calculate mean square displacement of N particles for which I have the trajectory of particles' positions over time. The code I've written has 3 for loops that makes it extremely slow. Could you please help me how I can replace for loops with some sort of vectorized capabilities of numpy or pandas?
Here is my code:
ntime = 10 # number of times represented in data
atom_count = 3 # number of particles
norigin = 5 # number of origins is half number of time steps
nmin = 2 # minimum number of intervals to contribute to diffusivity
nmax = norigin # maximum number of intervals to contribute to diffusivity
dt = 1.0 # timestep
# creating sample trajectory of particles
traj = pd.DataFrame(np.random.rand(ntime*atom_count,3), columns=['x', 'y', 'z'])
traj['frame_id'] = np.repeat(np.arange(ntime)+1, atom_count)
traj['particle_id'] = np.tile(np.arange(atom_count)+1, ntime)
traj = traj[['frame_id', 'particle_id', 'x', 'y', 'z']]
print(traj.head(6))
ndata = traj.shape[0] # number of rows of data
# store mean square displacements in msd
time_vec= np.arange(dt, norigin*dt+1, dt)
msd_xyz = np.zeros((norigin,3))
# loop over all particles
for i in range(atom_count):
# loop over all time origins
for j in range(norigin):
jstart = j*atom_count + i
# loop over all time windows
for k in range(nmin, nmax):
kend = jstart + k*atom_count
msd_xyz[k, :] += (traj.ix[kend, ['x', 'y', 'z']].values -
traj.ix[jstart, ['x', 'y', 'z']].values)**2
msd_xyz = msd_xyz / (atom_count * norigin)
msd = np.mean(msd_xyz, axis=1) # total MSD averaged over x, y, z directions
print()
print("MSD (averaged over all particles and time origins):")
print(msd)