I have some data on prevalence of a given infection, provided for each country for 6 different age groups. I am trying to find a suitable distribution that may be suitable to model capture the prev using fitdistrplus.
A histogram of the prevalence variable shows it appears somehow not unimodal, but a logit or log transformation suggests some skewness. Now, i am unable to find a suitable distribution to fit to the data either because of starting values or sth i am unaware of. Could someone please advise?
Here's my data
prev=c( 0.4 ,0.4 ,0.3333 ,0.2273 ,0.2273 ,0.1818 ,0.0733 ,0.0807 ,0.2 ,0.2 ,0.2 ,0.1053 ,0.2083 ,0.1585 ,0.1017 ,0.1017 ,0.1017 ,0.1059 ,0.3902 ,0.3981 ,0.4103 ,0.4706 ,0.4706 ,0.4655 ,0.037 ,0.0432 ,0.0488 ,0.1538 ,0.1667 ,0.0556 ,0.1277 ,0.101 ,0.0641 ,0.034 ,0.0267 ,0.0463 ,0.0152 ,0.0277 ,0.0268 ,0.0211 ,0.0185 ,0.019 ,0.1818 ,0.2384 ,0.1442 ,0.1481 ,0.1111 ,0.1333 ,0.5018 ,0.2983 ,0.2649 ,0.2649 ,0.2649 ,0.2593 ,0.3442 ,0.2774 ,0.1269 ,0.1269 ,0.1269 ,0.1272 ,0.1708 ,0.136 ,0.048 ,0.048 ,0.048 ,0.0478 ,0.4261 ,0.303 ,0.1891 ,0.1891 ,0.1891 ,0.1891 ,0.12 ,0.0779 ,0.0306 ,0.0476 ,0.1 ,0.0862 ,0.1733 ,0.1386 ,0.0947 ,0.0822 ,0.0392 ,0 ,0.453 ,0.4287 ,0.3898 ,0.3756 ,0.3953 ,0.3776 ,0.3818 ,0.278 ,0.184 ,0.1529 ,0.1077 ,0.0769 ,0.2398 ,0.1421 ,0.1353 ,0.1269 ,0.1158 ,0.1228 ,0.1 ,0.1233 ,0.1162 ,0.1078 ,0.1238 ,0.0532 ,0.2636 ,0.1948 ,0.0767 ,0.0821 ,0.0661 ,0 ,0 ,0.0625 ,0.0635 ,0.0576 ,0.0455 ,0)
prev_log =c(,-0.916290731874155 ,-0.916290731874155 ,-1.09871229366844 ,-1.48148454812364 ,-1.48148454812364 ,-1.70484809723876 ,-2.61319467008953 ,-2.51701670370623 ,-1.6094379124341 ,-1.6094379124341 ,-1.6094379124341 ,-2.25094185984221 ,-1.56877593071521 ,-1.8420006856648 ,-2.28572797592762 ,-2.28572797592762 ,-2.28572797592762 ,-2.24526002637478 ,-0.941095850793126 ,-0.921052048975866 ,-0.890866679533997 ,-0.753746802688875 ,-0.753746802688875 ,-0.764643182265015 ,-3.29683736633791 ,-3.14191478373207 ,-3.02002496612304 ,-1.87210222191059 ,-1.79155948922539 ,-2.8895720777256 ,-2.05807151594364 ,-2.29263476214088 ,-2.74731091505551 ,-3.38139475436598 ,-3.62309171357593 ,-3.07261331788995 ,-4.18645985112991 ,-3.58632286578884 ,-3.61935339146533 ,-3.85848223850012 ,-3.98998454689786 ,-3.9633162998157 ,-1.70484809723876 ,-1.43380534379094 ,-1.93655405413129 ,-1.90986755770838 ,-2.19732458233655 ,-2.01515305179747 ,-0.689553645049815 ,-1.20965558746143 ,-1.32840288270411 ,-1.32840288270411 ,-1.32840288270411 ,-1.34976958643752 ,-1.0665323952047 ,-1.28229477110141 ,-2.0643559042618 ,-2.0643559042618 ,-2.0643559042618 ,-2.06199462807612 ,-1.76726199762767 ,-1.99510039324608 ,-3.03655426807425 ,-3.03655426807425 ,-3.03655426807425 ,-3.04072963948473 ,-0.853081218476271 ,-1.19402247347277 ,-1.66547930331773 ,-1.66547930331773 ,-1.66547930331773 ,-1.66547930331773 ,-2.12026353620009 ,-2.55232932610543 ,-3.4867552700238 ,-3.04492251774476 ,-2.30258509299405 ,-2.45108510131249 ,-1.75273108226058 ,-1.97616319222633 ,-2.3570412787901 ,-2.49859997692 ,-3.23907853218572 , ,-0.791863153499103 ,-0.846997905378206 ,-0.942121491908677 ,-0.979230531648029 ,-0.928110308679497 ,-0.973919844710791 ,-0.96285836769049 ,-1.2801341652915 ,-1.69281952137315 ,-1.87797116604712 ,-2.22840569481979 ,-2.56524940247054 ,-1.42795003638872 ,-1.95122424387908 ,-2.00026074380539 ,-2.0643559042618 ,-2.15589071384324 ,-2.0971982632691 ,-2.30258509299405 ,-2.09313486881184 ,-2.15244243456433 ,-2.22747762050724 ,-2.08908791873164 ,-2.93369688263454 ,-1.33332247635378 ,-1.6357818877737 ,-2.56785357060893 ,-2.49981726252375 ,-2.7165865321245 , , ,-2.77258872223978 ,-2.75671537308349 ,-2.85423271128029 ,-3.09004295302523 )
prev_logit= c(-0.405465108108164 ,-0.405465108108164 ,-0.693297184310321 ,-1.22362014408104 ,-1.22362014408104 ,-1.50419962375192 ,-2.53706927970004 ,-2.43287393559381 ,-1.38629436111989 ,-1.38629436111989 ,-1.38629436111989 ,-2.13967504741362 ,-1.33520318391046 ,-1.66943142031353 ,-2.17847678518065 ,-2.17847678518065 ,-2.17847678518065 ,-2.13332237313391 ,-0.446471606365144 ,-0.413388088547454 ,-0.362725333558318 ,-0.117735813499739 ,-0.117735813499739 ,-0.138219633747978 ,-3.2591354991539 ,-3.0977538879463 ,-2.96999403251747 ,-1.70510268121442 ,-1.60919793163141 ,-2.83236660395482 ,-1.92144963840068 ,-2.18616251763036 ,-2.68106426923659 ,-3.34680330959636 ,-3.59602879402624 ,-3.02520719549268 ,-4.17114314701801 ,-3.55823198562232 ,-3.59218772339256 ,-3.83715645178536 ,-3.97131128163224 ,-3.94413348039892 ,-1.50419962375192 ,-1.16147154828596 ,-1.78083547913187 ,-1.74958142777504 ,-2.07955404660204 ,-1.87209066895564 ,0.00720003110424186 ,-0.85540627073754 ,-1.02065414810202 ,-1.02065414810202 ,-1.02065414810202 ,-1.04960999247462 ,-0.644632980633109 ,-0.957395310832809 ,-1.92865072209642 ,-1.92865072209642 ,-1.92865072209642 ,-1.92594578361284 ,-1.57996809920715 ,-1.848917883068 ,-2.98736402388347 ,-2.98736402388347 ,-2.98736402388347 ,-2.99174945726301 ,-0.297781104608451 ,-0.833052605251155 ,-1.4558687662861 ,-1.4558687662861 ,-1.4558687662861 ,-1.4558687662861 ,-1.99243016469021 ,-2.47122772466839 ,-3.45567731445294 ,-2.99615235337533 ,-2.19722457733622 ,-2.36094155145952 ,-1.56241767553707 ,-1.82696688586421 ,-2.25755238028936 ,-2.4128241998986 ,-3.19908952396936 , ,-0.188556676938947 ,-0.287157092126485 ,-0.448152985209109 ,-0.508266441307169 ,-0.425087496976527 ,-0.499747538592836 ,-0.481915118406369 ,-0.954404025202189 ,-1.48947859735512 ,-1.71203463850249 ,-2.11445281474376 ,-2.48523169448451 ,-1.15377631396173 ,-1.79795650747586 ,-1.85488809078964 ,-1.92865072209642 ,-2.03281871625325 ,-1.96617800083069 ,-2.19722457733622 ,-1.96154444842424 ,-2.02891794827883 ,-2.11341266421927 ,-1.9569270151293 ,-2.87902948129223 ,-1.02734064673057 ,-1.4191173025572 ,-2.48805250040537 ,-2.41415043576666 ,-2.64820061925803 , , ,-2.70805020110221 ,-2.6911096159855 ,-2.79490724519548 ,-3.0434753172089 ,
group= c(1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6 ,1 ,2 ,3 ,4 ,5 ,6)
id= c(950 ,950 ,950 ,950 ,950 ,950 ,979 ,979 ,979 ,979 ,979 ,979 ,982 ,982 ,982 ,982 ,982 ,982 ,1008 ,1008 ,1008 ,1008 ,1008 ,1008 ,1151 ,1151 ,1151 ,1151 ,1151 ,1151 ,1166 ,1166 ,1166 ,1166 ,1166 ,1166 ,1199 ,1199 ,1199 ,1199 ,1199 ,1199 ,1244 ,1244 ,1244 ,1244 ,1244 ,1244 ,1267 ,1267 ,1267 ,1267 ,1267 ,1267 ,1277 ,1277 ,1277 ,1277 ,1277 ,1277 ,1286 ,1286 ,1286 ,1286 ,1286 ,1286 ,1292 ,1292 ,1292 ,1292 ,1292 ,1292 ,1306 ,1306 ,1306 ,1306 ,1306 ,1306 ,1323 ,1323 ,1323 ,1323 ,1323 ,1323 ,1367 ,1367 ,1367 ,1367 ,1367 ,1367 ,1399 ,1399 ,1399 ,1399 ,1399 ,1399 ,1438 ,1438 ,1438 ,1438 ,1438 ,1438 ,1447 ,1447 ,1447 ,1447 ,1447 ,1447 ,1488 ,1488 ,1488 ,1488 ,1488 ,1488 ,1521 ,1521 ,1521 ,1521 ,1521 ,1521)
If this doesnt work, could i potentially consider working with splines? Or perhaps some GAM model because i have study level data, and for each i have prev for 6 groups within each study.
plotdist(mydat$p_exact, histo = TRUE, demp = TRUE, breaks=40)
plotdist(mydat$p_log, histo = TRUE, demp = TRUE, breaks=40)
plotdist(mydat$p_logit, histo = TRUE, demp = TRUE, breaks=40)
fit_w <- fitdist(mydat$prev, "weibull")
fit_g <- fitdist(mydat$prev, "gamma")
fit_b <- fitdist(mydat$prev, "beta")
fit_ln <- fitdist(mydat$prev, "lnorm")
when i tried some staring values or added the option lower=c(0,0)
to fitdist i got some error the function mle failed to estimate the parameters, with the error code 100