0

I need to determine if the probability density of my data follows power law or exponential law and plot a linear fit on a loglog scale or a semilog scale respectively. My code works very well if my data follows a power law, but gives me a very bad fit for the exponential law. I am not sure what am I doing wrong. I am using KS test to determine which fit is closer to my data. Here's my MWE:

    import numpy as np
    import matplotlib.pyplot as plt
    import scipy.stats as stats
    from scipy.stats import ks_2samp
    from scipy.stats import powerlaw, expon
    from scipy.stats import kstest
    from scipy.optimize import curve_fit
       
    #defining power law function
    def power_law(x, a, b):
        return a * np.power(x, b)
    
    # Define the exponential function
    def exponential_law(x, a, b):
        return a * np.exp(b * x)

    data = np.random.random(25)
    
    num_bins = 100 # Specify the number of bins
    
    counts, bin_edges = np.histogram(data, bins=num_bins)  # Compute PDF
    pdf = counts / sum(counts)
    
    # Compute x-values as bin centers
    x = (bin_edges[:-1] + bin_edges[1:]) / 2
    
    # Fit data to power law
    params_power, _ = curve_fit(power_law, x, pdf) 
    a_power, b_power = params_power
    data_fit_power = power_law(x, a_power, b_power)
    
    # Fit data to exponential law
    params_exponential, _ = curve_fit(exponential_law, x, pdf)
    a_exponential, b_exponential = params_exponential
    data_fit_exponential = exponential_law(x, a_exponential, b_exponential)
    
    
    # Perform the KS test to compare distributions
    D_powerlaw, p_powerlaw = stats.kstest(pdf, data_fit_power)
    D_explaw, p_explaw = stats.kstest(pdf, data_fit_exponential)
    
    if p_powerlaw > p_explaw:
        print(f'Data follows power law. The slope in loglog scale: {b_power}')
        # Plot the data and the fitted curves
        plt.figure(figsize=(12, 6))
        plt.loglog(x, pdf, 'r.', label='Data', markersize=10)
        plt.loglog(x, data_fit_power, 'k-', label='Power Law Fit')
        plt.legend()
    
    else:       
        print(f'Data follows exponential law. The decay rate = {b_exponential}')
        # Plot the data and the fitted curves
        plt.figure(figsize=(12, 6))
        plt.semilogy(x, pdf, 'r.', label='Data', markersize=10)
        plt.semilogy(x, data_fit_exponential, 'k-', label='Exponential Fit')
        plt.legend()
    
    plt.xlabel('Data')
    plt.ylabel('Probability Density')
    plt.title('Data Fit')
    plt.show()
mikuszefski
  • 3,943
  • 1
  • 25
  • 38
S C
  • 284
  • 2
  • 9

0 Answers0