-1

I have a data without any infinite value like below:

data<-c(6.87, 0.11, 0, 0.03, 0.08, 0, 0.01, 0, 0, 0.13, 0.17, 0, 0.53, 
0.01, 2.69, 0, 0, 0, 0, 0, 0, 5.44, 4.71, 3.57, 0.38, 0.31, 0.45, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.02, 0.09, 0.43, 0, 0.23, 2.31, 0, 
0.01, 0.96, 1.28, 0.07, 0, 0, 0, 0, 0.07, 0, 0, 0.03, 5.99, 0.08, 
6.23, 2.95, 0.04, 0, 0.98, 0.03, 17.2, 0, 0, 0.25, 1.99, 0, 0, 
0, 0, 0, 0, 0, 0, 0.01, 0, 0, 0, 1.94, 1.06, 0.05, 0, 0, 0, 0, 
0, 0.02, 0, 0.02, 0.05, 0.66, 0.57, 0.54, 0, 0.11, 0.75, 0.65, 
0, 7.35, 0.22, 0, 1.97, 0, 0, 0, 0, 0, 0, 0, 3.78, 0.28, 0.06, 
0.09, 0.03, 0.01, 0.14, 0.02, 0, 1.82, 0.04, 0, 0.01, 0, 0, 0, 
9.52, 10.38, 29.09, 0.1, 0.4, 0, 0.97, 0.57, 0.33, 0.16, 40.19, 
1.06, 5.02, 0.01, 0.79, 4.78, 0.44, 15.29, 7.26, 0.05, 0.01, 
6.47, 37.66, 0, 0.4, 0.15, 1.42, 0, 0.07, 0.01, 0, 0, 0.01, 7.43, 
0, 0.03, 0.02, 0.15, 0.09, 0.44, 1.45, 0.03, 0, 0.01, 0.01, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0.09, 0, 0.01, 0, 0, 0, 0, 0, 0, 0.04, 
1.52, 0.01, 0, 0.27, 0.01, 1.11, 2.41, 0, 0, 0.15, 0, 0, 1.28, 
0.02, 0.16, 0.17, 1.74, 3.75, 0.05, 0.18, 0.07, 0.39, 0, 0, 0, 
0.45, 0.78, 0, 0, 0, 0.01, 0, 0, 0, 0, 0, 0.05, 0, 0, 0, 0, 0.09, 
0.17, 0, 0.27, 0.04, 0.02, 0.88, 1.23, 0.01, 0.01, 0.13, 0.6, 
0.02, 0.4, 0, 0, 0.09, 0, 0, 0, 0, 0, 0, 0, 0.12, 0, 0.02, 0.02, 
0, 0.01, 0, 0, 0, 0, 0.05, 0, 0, 0, 2.71, 0.49, 0, 0, 0, 0.84, 
0.04, 0, 0, 0.02, 0.06, 0, 0, 0.02, 0.15, 0, 0.48, 5.34, 5.23, 
0.22, 2.37, 1.55, 3.29, 0, 0, 0.08, 0.03, 0.05, 0.06, 7.62, 0.84, 
0.48, 0.04, 0, 0, 0, 0, 0.6, 0, 0.02, 0.08, 0, 0, 0, 0, 0, 0, 
0, 0, 0.11, 0.1, 0, 0.01, 0, 0.24, 0, 2.4, 1.57, 0.12, 0.03, 
0, 0, 0, 0, 0.08, 0.56, 0, 0.09, 0.01, 0.09, 0.01, 0, 0, 0, 0, 
0, 0, 0, 0.01, 0, 0, 0, 0, 0, 0.03, 0, 0, 0, 0.38, 0.01, 0.03, 
0.14, 0.03, 0, 0, 0, 0, 0, 0, 0, 0, 0.08, 0, 6.1, 0, 0, 0, 0.31, 
0.01, 0.03, 0.2, 0.03, 0.02, 0.08, 0.12, 0, 5.79, 0.91, 0.95, 
0, 0.39, 1.3, 0, 0, 0, 0, 0.01, 0, 0.64, 0.02, 0, 1.14, 0.24, 
0, 0, 0.04, 0, 0, 0, 1.28, 0, 0.01, 0.02, 2.18, 0.05, 0, 0, 0, 
0, 0, 0.01, 0.02, 0.19, 3.9, 0.01, 0.03, 0, 5.93, 0.88, 0, 0, 
0, 0, 0.07, 0, 4.06, 0.41, 0.04, 0, 0, 0.08, 2.21, 0.03, 0, 8.07, 
0, 0, 0.66, 1.73, 0.17, 0.02, 1.37, 4.68, 0.26, 0.09, 0.2, 0.03, 
0, 0, 0, 0, 2.74, 0.1, 0.42, 0.65, 0.53, 0, 0, 0, 0, 0, 0.01, 
0.16, 0.17, 0, 0, 0.01, 0, 0, 0, 0.6, 0, 0, 0.09, 0.01, 0, 0, 
0.69, 0, 0, 0, 0, 0, 0, 0, 0, 0.01, 0.13, 0.54, 0, 0.16, 1.57, 
0.12, 0, 0, 0, 0, 0.15, 0.18, 0, 0.86, 0.01, 0.01, 0, 0, 0, 0.01, 
0, 0, 0, 0, 0, 0, 0, 0.03, 0.37, 0.18, 0, 0, 0, 0.08, 5.25, 1.1, 
0.68, 0.14, 0.01, 0, 0, 0, 0, 0, 0, 0.02, 0, 0.04, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1.95, 0.28, 0, 0, 0, 0, 0, 5.78, 
3.05, 0.39, 1.65, 0, 0.03, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0.25, 0.78, 0, 0, 0, 0.85, 0, 0, 0, 0, 0, 0, 0, 0.34, 0.45, 0, 
0, 0.02, 0.02, 0, 0, 0, 0.01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.7, 
0.69, 0, 0, 0, 0, 0, 0.02, 2.01, 0.05, 0, 0.77, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0.01, 0.01, 0.02, 0.01, 0.79, 0.01, 0, 0.97, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0.69, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0.47, 0.02, 0, 0, 0, 0.13, 0.01, 0, 0, 0, 0, 0.33, 
0.01, 0, 0, 0, 0, 0, 0, 0, 0.03, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0.01, 0, 0, 0.02, 0, 0.01, 12.31, 7.54, 0, 0, 
0, 0.05, 0, 0, 0, 0, 0, 0, 0.01, 0, 0, 0, 0, 0, 0.01, 1.02, 0, 
0, 0.54, 0.03, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.02, 0.01, 0.08, 0.03, 
0, 0, 0, 0, 0, 0.03, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, 0, 0.11, 0.25, 0.03, 0, 0, 0.09, 0, 0, 0.01, 
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.15, 1.41, 0, 0, 0, 0, 0)

When I try to plot CDF, curve is not starting from zero. It starts with 0.57. I read that infinite and zero values can cause this problem. To overcome this I replaced zeros with very small values.

data[data==0]<-0.000001
plot(ecdf(data), xlim = c(min(data),max(data)))

enter image description here

But still I am getting same result. Why is that happening?

  • 1
    The answer to your actual question "Why is that happening?" is because that is the correct and expected behaviour. The minimum value in your data (zero) has a relative frequency of 0.57. – dww Jun 03 '21 at 19:11
  • 1
    It is happening since the most frequent value of your data is the same value with the minimum value of your data. Run `length(data[data==0]) / length(data)`. It gives 0.5794045 which means that possibility of the taking 0 from a random choice is 0.5794045 . You may add a symbolic negative value to your data set to create a low frequency for plotting purpose. – maydin Jun 03 '21 at 19:12
  • 1
    Voting to close as a typo, since the question seems to be motivated by a misunderstanding about what the ecdf function should do. – dww Jun 03 '21 at 19:14

2 Answers2

2

I'm not sure it makes much sense to show empirical cdf for values outside of observed data. All values that are lower than the minimum of data will have ecdf of zero. You could try something like:

f = ecdf(data)
curve(f, min(data) - 0.01, max(data))
d.b
  • 32,245
  • 6
  • 36
  • 77
2

Like it is said in comments to the question 1 and 2, there are many zeros in your data, representing 57% of the total data points.

mean(data == 0)
#[1] 0.5794045

If you plot the ECDF and an horizontal line at that ordinate, you will see that the ECDF starts there.

plot(ecdf(data))
abline(h = mean(data == 0))
Rui Barradas
  • 70,273
  • 8
  • 34
  • 66