3

My most recent assignment has been to parse PMML back into R models. (I've searched extensively, and there is no library that does this conversion for you.) I'm trying to convert PMML containing a Multinomial Logistic Regression back into an R model, but I don't know how to convert any of the coefficients held in the PMML document to the coefficients held by the R model.

The PMML is below:

<?xml version="1.0"?>
<PMML version="4.2" xmlns="http://www.dmg.org/PMML-4_2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dmg.org/PMML-4_2 http://www.dmg.org/v4-2/pmml-4-2.xsd">
 <Header copyright="Copyright (c) 2014 hlin117" description="Generalized Linear Regression Model">
  <Extension name="user" value="hlin117" extender="Rattle/PMML"/>
  <Application name="Rattle/PMML" version="1.4"/>
  <Timestamp>2014-06-23 13:04:17</Timestamp>
 </Header>
 <DataDictionary numberOfFields="13">
  <DataField name="audit.train$TARGET_Adjusted" optype="continuous" dataType="double"/>
  <DataField name="ID" optype="continuous" dataType="double"/>
  <DataField name="Age" optype="continuous" dataType="double"/>
  <DataField name="Employment" optype="categorical" dataType="string">
   <Value value="Consultant"/>
   <Value value="Private"/>
   <Value value="PSFederal"/>
   <Value value="PSLocal"/>
   <Value value="PSState"/>
   <Value value="SelfEmp"/>
   <Value value="Volunteer"/>
  </DataField>
  <DataField name="Education" optype="categorical" dataType="string">
   <Value value="Associate"/>
   <Value value="Bachelor"/>
   <Value value="College"/>
   <Value value="Doctorate"/>
   <Value value="HSgrad"/>
   <Value value="Master"/>
   <Value value="Preschool"/>
   <Value value="Professional"/>
   <Value value="Vocational"/>
   <Value value="Yr10"/>
   <Value value="Yr11"/>
   <Value value="Yr12"/>
   <Value value="Yr1t4"/>
   <Value value="Yr5t6"/>
   <Value value="Yr7t8"/>
   <Value value="Yr9"/>
  </DataField>
  <DataField name="Marital" optype="categorical" dataType="string">
   <Value value="Absent"/>
   <Value value="Divorced"/>
   <Value value="Married"/>
   <Value value="Married-spouse-absent"/>
   <Value value="Unmarried"/>
   <Value value="Widowed"/>
  </DataField>
  <DataField name="Occupation" optype="categorical" dataType="string">
   <Value value="Cleaner"/>
   <Value value="Clerical"/>
   <Value value="Executive"/>
   <Value value="Farming"/>
   <Value value="Home"/>
   <Value value="Machinist"/>
   <Value value="Military"/>
   <Value value="Professional"/>
   <Value value="Protective"/>
   <Value value="Repair"/>
   <Value value="Sales"/>
   <Value value="Service"/>
   <Value value="Support"/>
   <Value value="Transport"/>
  </DataField>
  <DataField name="Income" optype="continuous" dataType="double"/>
  <DataField name="Gender" optype="categorical" dataType="string">
   <Value value="Female"/>
   <Value value="Male"/>
  </DataField>
  <DataField name="Deductions" optype="continuous" dataType="double"/>
  <DataField name="Hours" optype="continuous" dataType="double"/>
  <DataField name="IGNORE_Accounts" optype="categorical" dataType="string">
   <Value value="Canada"/>
   <Value value="China"/>
   <Value value="Columbia"/>
   <Value value="Cuba"/>
   <Value value="Ecuador"/>
   <Value value="England"/>
   <Value value="Fiji"/>
   <Value value="Germany"/>
   <Value value="Greece"/>
   <Value value="Guatemala"/>
   <Value value="Hong"/>
   <Value value="Hungary"/>
   <Value value="India"/>
   <Value value="Indonesia"/>
   <Value value="Iran"/>
   <Value value="Ireland"/>
   <Value value="Italy"/>
   <Value value="Jamaica"/>
   <Value value="Japan"/>
   <Value value="Malaysia"/>
   <Value value="Mexico"/>
   <Value value="NewZealand"/>
   <Value value="Nicaragua"/>
   <Value value="Philippines"/>
   <Value value="Poland"/>
   <Value value="Portugal"/>
   <Value value="Scotland"/>
   <Value value="Singapore"/>
   <Value value="Taiwan"/>
   <Value value="UnitedStates"/>
   <Value value="Vietnam"/>
   <Value value="Yugoslavia"/>
  </DataField>
  <DataField name="RISK_Adjustment" optype="continuous" dataType="double"/>
 </DataDictionary>
 <GeneralRegressionModel modelName="General_Regression_Model" modelType="generalizedLinear" functionName="regression" algorithmName="glm" distribution="binomial" linkFunction="logit">
  <MiningSchema>
   <MiningField name="audit.train$TARGET_Adjusted" usageType="predicted"/>
   <MiningField name="ID" usageType="active"/>
   <MiningField name="Age" usageType="active"/>
   <MiningField name="Employment" usageType="active"/>
   <MiningField name="Education" usageType="active"/>
   <MiningField name="Marital" usageType="active"/>
   <MiningField name="Occupation" usageType="active"/>
   <MiningField name="Income" usageType="active"/>
   <MiningField name="Gender" usageType="active"/>
   <MiningField name="Deductions" usageType="active"/>
   <MiningField name="Hours" usageType="active"/>
   <MiningField name="IGNORE_Accounts" usageType="active"/>
   <MiningField name="RISK_Adjustment" usageType="active"/>
  </MiningSchema>
  <Output>
   <OutputField name="Predicted_audit.train$TARGET_Adjusted" feature="predictedValue"/>
  </Output>
  <ParameterList>
   <Parameter name="p0" label="(Intercept)"/>
   <Parameter name="p1" label="ID"/>
   <Parameter name="p2" label="Age"/>
   <Parameter name="p3" label="EmploymentPrivate"/>
   <Parameter name="p4" label="EmploymentPSFederal"/>
   <Parameter name="p5" label="EmploymentPSLocal"/>
   <Parameter name="p6" label="EmploymentPSState"/>
   <Parameter name="p7" label="EmploymentSelfEmp"/>
   <Parameter name="p8" label="EmploymentVolunteer"/>
   <Parameter name="p9" label="EducationBachelor"/>
   <Parameter name="p10" label="EducationCollege"/>
   <Parameter name="p11" label="EducationDoctorate"/>
   <Parameter name="p12" label="EducationHSgrad"/>
   <Parameter name="p13" label="EducationMaster"/>
   <Parameter name="p14" label="EducationPreschool"/>
   <Parameter name="p15" label="EducationProfessional"/>
   <Parameter name="p16" label="EducationVocational"/>
   <Parameter name="p17" label="EducationYr10"/>
   <Parameter name="p18" label="EducationYr11"/>
   <Parameter name="p19" label="EducationYr12"/>
   <Parameter name="p20" label="EducationYr1t4"/>
   <Parameter name="p21" label="EducationYr5t6"/>
   <Parameter name="p22" label="EducationYr7t8"/>
   <Parameter name="p23" label="EducationYr9"/>
   <Parameter name="p24" label="MaritalDivorced"/>
   <Parameter name="p25" label="MaritalMarried"/>
   <Parameter name="p26" label="MaritalMarried-spouse-absent"/>
   <Parameter name="p27" label="MaritalUnmarried"/>
   <Parameter name="p28" label="MaritalWidowed"/>
   <Parameter name="p29" label="OccupationClerical"/>
   <Parameter name="p30" label="OccupationExecutive"/>
   <Parameter name="p31" label="OccupationFarming"/>
   <Parameter name="p32" label="OccupationHome"/>
   <Parameter name="p33" label="OccupationMachinist"/>
   <Parameter name="p34" label="OccupationMilitary"/>
   <Parameter name="p35" label="OccupationProfessional"/>
   <Parameter name="p36" label="OccupationProtective"/>
   <Parameter name="p37" label="OccupationRepair"/>
   <Parameter name="p38" label="OccupationSales"/>
   <Parameter name="p39" label="OccupationService"/>
   <Parameter name="p40" label="OccupationSupport"/>
   <Parameter name="p41" label="OccupationTransport"/>
   <Parameter name="p42" label="Income"/>
   <Parameter name="p43" label="GenderMale"/>
   <Parameter name="p44" label="Deductions"/>
   <Parameter name="p45" label="Hours"/>
   <Parameter name="p46" label="IGNORE_AccountsChina"/>
   <Parameter name="p47" label="IGNORE_AccountsColumbia"/>
   <Parameter name="p48" label="IGNORE_AccountsCuba"/>
   <Parameter name="p49" label="IGNORE_AccountsEcuador"/>
   <Parameter name="p50" label="IGNORE_AccountsEngland"/>
   <Parameter name="p51" label="IGNORE_AccountsFiji"/>
   <Parameter name="p52" label="IGNORE_AccountsGermany"/>
   <Parameter name="p53" label="IGNORE_AccountsGreece"/>
   <Parameter name="p54" label="IGNORE_AccountsGuatemala"/>
   <Parameter name="p55" label="IGNORE_AccountsHong"/>
   <Parameter name="p56" label="IGNORE_AccountsHungary"/>
   <Parameter name="p57" label="IGNORE_AccountsIndia"/>
   <Parameter name="p58" label="IGNORE_AccountsIndonesia"/>
   <Parameter name="p59" label="IGNORE_AccountsIran"/>
   <Parameter name="p60" label="IGNORE_AccountsIreland"/>
   <Parameter name="p61" label="IGNORE_AccountsItaly"/>
   <Parameter name="p62" label="IGNORE_AccountsJamaica"/>
   <Parameter name="p63" label="IGNORE_AccountsJapan"/>
   <Parameter name="p64" label="IGNORE_AccountsMalaysia"/>
   <Parameter name="p65" label="IGNORE_AccountsMexico"/>
   <Parameter name="p66" label="IGNORE_AccountsNewZealand"/>
   <Parameter name="p67" label="IGNORE_AccountsNicaragua"/>
   <Parameter name="p68" label="IGNORE_AccountsPhilippines"/>
   <Parameter name="p69" label="IGNORE_AccountsPoland"/>
   <Parameter name="p70" label="IGNORE_AccountsPortugal"/>
   <Parameter name="p71" label="IGNORE_AccountsScotland"/>
   <Parameter name="p72" label="IGNORE_AccountsSingapore"/>
   <Parameter name="p73" label="IGNORE_AccountsTaiwan"/>
   <Parameter name="p74" label="IGNORE_AccountsUnitedStates"/>
   <Parameter name="p75" label="IGNORE_AccountsVietnam"/>
   <Parameter name="p76" label="IGNORE_AccountsYugoslavia"/>
   <Parameter name="p77" label="RISK_Adjustment"/>
  </ParameterList>
  <FactorList>
   <Predictor name="Employment"/>
   <Predictor name="Education"/>
   <Predictor name="Marital"/>
   <Predictor name="Occupation"/>
   <Predictor name="Gender"/>
   <Predictor name="IGNORE_Accounts"/>
  </FactorList>
  <CovariateList>
   <Predictor name="ID"/>
   <Predictor name="Age"/>
   <Predictor name="Income"/>
   <Predictor name="Deductions"/>
   <Predictor name="Hours"/>
   <Predictor name="RISK_Adjustment"/>
  </CovariateList>
  <PPMatrix>
   <PPCell value="1" predictorName="ID" parameterName="p1"/>
   <PPCell value="1" predictorName="Age" parameterName="p2"/>
   <PPCell value="Private" predictorName="Employment" parameterName="p3"/>
   <PPCell value="PSFederal" predictorName="Employment" parameterName="p4"/>
   <PPCell value="PSLocal" predictorName="Employment" parameterName="p5"/>
   <PPCell value="PSState" predictorName="Employment" parameterName="p6"/>
   <PPCell value="SelfEmp" predictorName="Employment" parameterName="p7"/>
   <PPCell value="Volunteer" predictorName="Employment" parameterName="p8"/>
   <PPCell value="Bachelor" predictorName="Education" parameterName="p9"/>
   <PPCell value="College" predictorName="Education" parameterName="p10"/>
   <PPCell value="Doctorate" predictorName="Education" parameterName="p11"/>
   <PPCell value="HSgrad" predictorName="Education" parameterName="p12"/>
   <PPCell value="Master" predictorName="Education" parameterName="p13"/>
   <PPCell value="Preschool" predictorName="Education" parameterName="p14"/>
   <PPCell value="Professional" predictorName="Education" parameterName="p15"/>
   <PPCell value="Vocational" predictorName="Education" parameterName="p16"/>
   <PPCell value="Yr10" predictorName="Education" parameterName="p17"/>
   <PPCell value="Yr11" predictorName="Education" parameterName="p18"/>
   <PPCell value="Yr12" predictorName="Education" parameterName="p19"/>
   <PPCell value="Yr1t4" predictorName="Education" parameterName="p20"/>
   <PPCell value="Yr5t6" predictorName="Education" parameterName="p21"/>
   <PPCell value="Yr7t8" predictorName="Education" parameterName="p22"/>
   <PPCell value="Yr9" predictorName="Education" parameterName="p23"/>
   <PPCell value="Divorced" predictorName="Marital" parameterName="p24"/>
   <PPCell value="Married" predictorName="Marital" parameterName="p25"/>
   <PPCell value="Married-spouse-absent" predictorName="Marital" parameterName="p26"/>
   <PPCell value="Unmarried" predictorName="Marital" parameterName="p27"/>
   <PPCell value="Widowed" predictorName="Marital" parameterName="p28"/>
   <PPCell value="Clerical" predictorName="Occupation" parameterName="p29"/>
   <PPCell value="Executive" predictorName="Occupation" parameterName="p30"/>
   <PPCell value="Farming" predictorName="Occupation" parameterName="p31"/>
   <PPCell value="Home" predictorName="Occupation" parameterName="p32"/>
   <PPCell value="Machinist" predictorName="Occupation" parameterName="p33"/>
   <PPCell value="Military" predictorName="Occupation" parameterName="p34"/>
   <PPCell value="Professional" predictorName="Occupation" parameterName="p35"/>
   <PPCell value="Protective" predictorName="Occupation" parameterName="p36"/>
   <PPCell value="Repair" predictorName="Occupation" parameterName="p37"/>
   <PPCell value="Sales" predictorName="Occupation" parameterName="p38"/>
   <PPCell value="Service" predictorName="Occupation" parameterName="p39"/>
   <PPCell value="Support" predictorName="Occupation" parameterName="p40"/>
   <PPCell value="Transport" predictorName="Occupation" parameterName="p41"/>
   <PPCell value="1" predictorName="Income" parameterName="p42"/>
   <PPCell value="Male" predictorName="Gender" parameterName="p43"/>
   <PPCell value="1" predictorName="Deductions" parameterName="p44"/>
   <PPCell value="1" predictorName="Hours" parameterName="p45"/>
   <PPCell value="China" predictorName="IGNORE_Accounts" parameterName="p46"/>
   <PPCell value="Columbia" predictorName="IGNORE_Accounts" parameterName="p47"/>
   <PPCell value="Cuba" predictorName="IGNORE_Accounts" parameterName="p48"/>
   <PPCell value="Ecuador" predictorName="IGNORE_Accounts" parameterName="p49"/>
   <PPCell value="England" predictorName="IGNORE_Accounts" parameterName="p50"/>
   <PPCell value="Fiji" predictorName="IGNORE_Accounts" parameterName="p51"/>
   <PPCell value="Germany" predictorName="IGNORE_Accounts" parameterName="p52"/>
   <PPCell value="Greece" predictorName="IGNORE_Accounts" parameterName="p53"/>
   <PPCell value="Guatemala" predictorName="IGNORE_Accounts" parameterName="p54"/>
   <PPCell value="Hong" predictorName="IGNORE_Accounts" parameterName="p55"/>
   <PPCell value="Hungary" predictorName="IGNORE_Accounts" parameterName="p56"/>
   <PPCell value="India" predictorName="IGNORE_Accounts" parameterName="p57"/>
   <PPCell value="Indonesia" predictorName="IGNORE_Accounts" parameterName="p58"/>
   <PPCell value="Iran" predictorName="IGNORE_Accounts" parameterName="p59"/>
   <PPCell value="Ireland" predictorName="IGNORE_Accounts" parameterName="p60"/>
   <PPCell value="Italy" predictorName="IGNORE_Accounts" parameterName="p61"/>
   <PPCell value="Jamaica" predictorName="IGNORE_Accounts" parameterName="p62"/>
   <PPCell value="Japan" predictorName="IGNORE_Accounts" parameterName="p63"/>
   <PPCell value="Malaysia" predictorName="IGNORE_Accounts" parameterName="p64"/>
   <PPCell value="Mexico" predictorName="IGNORE_Accounts" parameterName="p65"/>
   <PPCell value="NewZealand" predictorName="IGNORE_Accounts" parameterName="p66"/>
   <PPCell value="Nicaragua" predictorName="IGNORE_Accounts" parameterName="p67"/>
   <PPCell value="Philippines" predictorName="IGNORE_Accounts" parameterName="p68"/>
   <PPCell value="Poland" predictorName="IGNORE_Accounts" parameterName="p69"/>
   <PPCell value="Portugal" predictorName="IGNORE_Accounts" parameterName="p70"/>
   <PPCell value="Scotland" predictorName="IGNORE_Accounts" parameterName="p71"/>
   <PPCell value="Singapore" predictorName="IGNORE_Accounts" parameterName="p72"/>
   <PPCell value="Taiwan" predictorName="IGNORE_Accounts" parameterName="p73"/>
   <PPCell value="UnitedStates" predictorName="IGNORE_Accounts" parameterName="p74"/>
   <PPCell value="Vietnam" predictorName="IGNORE_Accounts" parameterName="p75"/>
   <PPCell value="Yugoslavia" predictorName="IGNORE_Accounts" parameterName="p76"/>
   <PPCell value="1" predictorName="RISK_Adjustment" parameterName="p77"/>
  </PPMatrix>
  <ParamMatrix>
   <PCell parameterName="p0" df="1" beta="-12.0199804097759"/>
   <PCell parameterName="p1" df="1" beta="3.62329433275629e-08"/>
   <PCell parameterName="p2" df="1" beta="0.0380676635766761"/>
   <PCell parameterName="p3" df="1" beta="0.756901134378277"/>
   <PCell parameterName="p4" df="1" beta="0.375762595900717"/>
   <PCell parameterName="p5" df="1" beta="0.50309824514625"/>
   <PCell parameterName="p6" df="1" beta="0.470897191210805"/>
   <PCell parameterName="p7" df="1" beta="-2.10284542055317"/>
   <PCell parameterName="p8" df="1" beta="-15.5455611068614"/>
   <PCell parameterName="p9" df="1" beta="0.0997435072074993"/>
   <PCell parameterName="p10" df="1" beta="-1.22905386951777"/>
   <PCell parameterName="p11" df="1" beta="-6.76667195830752"/>
   <PCell parameterName="p12" df="1" beta="-1.01297363710822"/>
   <PCell parameterName="p13" df="1" beta="-0.340407862763258"/>
   <PCell parameterName="p14" df="1" beta="-15.8841924243017"/>
   <PCell parameterName="p15" df="1" beta="3.18173392385448"/>
   <PCell parameterName="p16" df="1" beta="-0.569821531302005"/>
   <PCell parameterName="p17" df="1" beta="-3.3033217141108"/>
   <PCell parameterName="p18" df="1" beta="-0.430994461878221"/>
   <PCell parameterName="p19" df="1" beta="-17.0972305473487"/>
   <PCell parameterName="p20" df="1" beta="-15.929168040244"/>
   <PCell parameterName="p21" df="1" beta="-17.7483980280451"/>
   <PCell parameterName="p22" df="1" beta="-16.1514804898207"/>
   <PCell parameterName="p23" df="1" beta="-10.3889654044557"/>
   <PCell parameterName="p24" df="1" beta="-0.690592385956069"/>
   <PCell parameterName="p25" df="1" beta="2.53630505787246"/>
   <PCell parameterName="p26" df="1" beta="1.41541804527502"/>
   <PCell parameterName="p27" df="1" beta="1.49491086815453"/>
   <PCell parameterName="p28" df="1" beta="0.174099244312997"/>
   <PCell parameterName="p29" df="1" beta="1.01865424623088"/>
   <PCell parameterName="p30" df="1" beta="1.73213477081248"/>
   <PCell parameterName="p31" df="1" beta="-1.80877402327631"/>
   <PCell parameterName="p32" df="1" beta="-12.4454410582178"/>
   <PCell parameterName="p33" df="1" beta="-0.417346874910574"/>
   <PCell parameterName="p34" df="1" beta="-12.475145396564"/>
   <PCell parameterName="p35" df="1" beta="1.45214141089004"/>
   <PCell parameterName="p36" df="1" beta="1.64050123149924"/>
   <PCell parameterName="p37" df="1" beta="0.134775653612853"/>
   <PCell parameterName="p38" df="1" beta="0.948585540443075"/>
   <PCell parameterName="p39" df="1" beta="0.144171863863442"/>
   <PCell parameterName="p40" df="1" beta="0.789971116324262"/>
   <PCell parameterName="p41" df="1" beta="0.842781801750256"/>
   <PCell parameterName="p42" df="1" beta="-9.63129083571953e-07"/>
   <PCell parameterName="p43" df="1" beta="-0.52313575926474"/>
   <PCell parameterName="p44" df="1" beta="0.00125611277933667"/>
   <PCell parameterName="p45" df="1" beta="0.0109489183058056"/>
   <PCell parameterName="p46" df="1" beta="-2.86790934232277"/>
   <PCell parameterName="p47" df="1" beta="-10.4586048958891"/>
   <PCell parameterName="p48" df="1" beta="-11.8078344468555"/>
   <PCell parameterName="p49" df="1" beta="-8.15369086351991"/>
   <PCell parameterName="p50" df="1" beta="-15.1509749621394"/>
   <PCell parameterName="p51" df="1" beta="-12.6588234930477"/>
   <PCell parameterName="p52" df="1" beta="7.44342418994783"/>
   <PCell parameterName="p53" df="1" beta="-8.80415604321149"/>
   <PCell parameterName="p54" df="1" beta="-0.909551298634999"/>
   <PCell parameterName="p55" df="1" beta="3.21333791872318"/>
   <PCell parameterName="p56" df="1" beta="-9.7080063371067"/>
   <PCell parameterName="p57" df="1" beta="-9.94640566996892"/>
   <PCell parameterName="p58" df="1" beta="-7.34469543656762"/>
   <PCell parameterName="p59" df="1" beta="-10.1375079207868"/>
   <PCell parameterName="p60" df="1" beta="4.03786237290128"/>
   <PCell parameterName="p61" df="1" beta="-9.95289672035589"/>
   <PCell parameterName="p62" df="1" beta="-11.2800534550324"/>
   <PCell parameterName="p63" df="1" beta="-8.5259456003378"/>
   <PCell parameterName="p64" df="1" beta="-11.1183864482514"/>
   <PCell parameterName="p65" df="1" beta="-3.17790587178398"/>
   <PCell parameterName="p66" df="1" beta="7.62183148791729"/>
   <PCell parameterName="p67" df="1" beta="-9.29840834254978"/>
   <PCell parameterName="p68" df="1" beta="5.87739404847556"/>
   <PCell parameterName="p69" df="1" beta="-11.0988711939497"/>
   <PCell parameterName="p70" df="1" beta="-5.78171399043641"/>
   <PCell parameterName="p71" df="1" beta="-11.009822161619"/>
   <PCell parameterName="p72" df="1" beta="-7.98831399897464"/>
   <PCell parameterName="p73" df="1" beta="-14.2857685874083"/>
   <PCell parameterName="p74" df="1" beta="4.89065048867447"/>
   <PCell parameterName="p75" df="1" beta="-2.21686920486685"/>
   <PCell parameterName="p76" df="1" beta="-10.0494769160447"/>
   <PCell parameterName="p77" df="1" beta="0.0044395180546043"/>
  </ParamMatrix>
 </GeneralRegressionModel>
</PMML>

The coefficients held by the R model are below:

Coefficients:
                               Estimate Std. Error z value Pr(>|z|)
(Intercept)                  -5.779e+00  1.108e+04  -0.001 0.999584
ID                            3.922e-08  6.187e-08   0.634 0.526164
Age                           2.705e-02  1.388e-02   1.949 0.051314 .
EmploymentPrivate             1.087e+00  6.774e-01   1.605 0.108550
EmploymentPSFederal           1.155e+00  1.050e+00   1.101 0.271105
EmploymentPSLocal             1.262e+00  8.811e-01   1.432 0.152036
EmploymentPSState             8.151e-01  1.011e+00   0.806 0.420221
EmploymentSelfEmp             2.217e-01  9.859e-01   0.225 0.822066
EmploymentVolunteer          -1.667e+01  1.075e+04  -0.002 0.998764
EducationBachelor             4.297e-01  7.768e-01   0.553 0.580154
EducationCollege             -1.234e+00  8.393e-01  -1.470 0.141592
EducationDoctorate            1.604e+00  1.697e+00   0.945 0.344690
EducationHSgrad              -5.332e-01  7.613e-01  -0.700 0.483661
EducationMaster              -3.705e-01  1.117e+00  -0.332 0.740081
EducationPreschool           -1.306e+01  3.588e+03  -0.004 0.997096
EducationProfessional         1.600e+00  1.251e+00   1.279 0.200733
EducationVocational          -3.887e-01  1.023e+00  -0.380 0.703998
EducationYr10                -2.121e+00  1.897e+00  -1.118 0.263626
EducationYr11                -3.222e-01  1.294e+00  -0.249 0.803322
EducationYr12                -4.786e+00  1.235e+01  -0.388 0.698298
EducationYr1t4               -1.588e+01  4.174e+03  -0.004 0.996965
EducationYr5t6               -1.779e+01  2.356e+03  -0.008 0.993976
EducationYr7t8               -1.659e+01  1.951e+03  -0.009 0.993214
EducationYr9                 -1.672e+01  2.680e+03  -0.006 0.995022
MaritalDivorced              -6.700e-01  8.277e-01  -0.809 0.418238
MaritalMarried                2.269e+00  5.238e-01   4.332 1.48e-05 ***
MaritalMarried-spouse-absent  1.299e+00  1.385e+00   0.938 0.348362
MaritalUnmarried              1.570e+00  9.025e-01   1.740 0.081926 .
MaritalWidowed                7.018e-01  1.209e+00   0.581 0.561438
OccupationClerical            1.060e+00  1.224e+00   0.866 0.386731
OccupationExecutive           1.851e+00  1.138e+00   1.627 0.103649
OccupationFarming             1.189e-01  1.530e+00   0.078 0.938065
OccupationHome               -1.296e+01  6.601e+03  -0.002 0.998434
OccupationMachinist           2.869e-01  1.299e+00   0.221 0.825190
OccupationMilitary           -1.318e+01  1.075e+04  -0.001 0.999022
OccupationProfessional        1.589e+00  1.187e+00   1.339 0.180656
OccupationProtective          1.099e+00  1.622e+00   0.678 0.497935
OccupationRepair              1.641e-01  1.204e+00   0.136 0.891597
OccupationSales               7.170e-01  1.205e+00   0.595 0.551929
OccupationService            -5.600e-02  1.348e+00  -0.042 0.966858
OccupationSupport             8.431e-01  1.348e+00   0.626 0.531515
OccupationTransport           3.488e-01  1.242e+00   0.281 0.778911
Income                        1.442e-06  3.112e-06   0.463 0.643050
GenderMale                    1.510e-01  5.361e-01   0.282 0.778254
Deductions                    1.476e-03  4.109e-04   3.593 0.000327 ***
Hours                         2.116e-02  1.433e-02   1.476 0.139922
IGNORE_AccountsChina         -2.048e+01  1.867e+04  -0.001 0.999125
IGNORE_AccountsColumbia      -2.085e+01  1.294e+04  -0.002 0.998715
IGNORE_AccountsCuba          -1.942e+01  1.544e+04  -0.001 0.998997
IGNORE_AccountsEcuador       -1.701e+01  1.544e+04  -0.001 0.999121
IGNORE_AccountsEngland       -1.418e+01  1.109e+04  -0.001 0.998980
IGNORE_AccountsGermany       -4.952e-02  1.108e+04   0.000 0.999996
IGNORE_AccountsGreece        -1.645e+01  1.544e+04  -0.001 0.999150
IGNORE_AccountsGuatemala     -2.767e+00  1.459e+04   0.000 0.999849
IGNORE_AccountsHong          -3.325e+00  1.557e+04   0.000 0.999830
IGNORE_AccountsIndia         -1.506e+01  1.110e+04  -0.001 0.998918
IGNORE_AccountsIndonesia     -1.692e+01  1.225e+04  -0.001 0.998897
IGNORE_AccountsIreland       -3.329e+00  1.108e+04   0.000 0.999760
IGNORE_AccountsItaly         -1.663e+01  1.304e+04  -0.001 0.998982
IGNORE_AccountsJamaica       -2.174e+01  2.163e+04  -0.001 0.999198
IGNORE_AccountsJapan         -1.577e+01  1.544e+04  -0.001 0.999185
IGNORE_AccountsMalaysia      -1.903e+01  1.206e+04  -0.002 0.998741
IGNORE_AccountsMexico        -9.440e+00  1.108e+04  -0.001 0.999320
IGNORE_AccountsNewZealand     1.773e-01  1.562e+04   0.000 0.999991
IGNORE_AccountsNicaragua     -1.786e+01  1.200e+04  -0.001 0.998812
IGNORE_AccountsPhilippines   -9.526e-01  1.108e+04   0.000 0.999931
IGNORE_AccountsPoland        -1.878e+01  1.544e+04  -0.001 0.999030
IGNORE_AccountsPortugal      -1.432e+00  1.557e+04   0.000 0.999927
IGNORE_AccountsSingapore     -1.778e+01  1.225e+04  -0.001 0.998842
IGNORE_AccountsTaiwan        -1.922e+01  1.259e+04  -0.002 0.998782
IGNORE_AccountsUnitedStates  -2.519e+00  1.108e+04   0.000 0.999819
IGNORE_AccountsVietnam       -1.984e+01  1.250e+04  -0.002 0.998734
IGNORE_AccountsYugoslavia    -1.774e+01  1.544e+04  -0.001 0.999083
RISK_Adjustment               3.802e-03  6.819e-04   5.575 2.47e-08 ***

(The script in R that generated this GLM model and the corresponding PMML is below:

library(pmml)
auditDF <- read.csv("http://rattle.togaware.com/audit.csv")
auditDF <- na.omit(auditDF)
target <- auditDF$TARGET_Adjusted
N <- length(target); M <- N - 500
i.train <- sample(N, M)
audit.train <- auditDF[i.train,]
audit.test <- auditDF[-i.train,]
glm.model <- glm(audit.train$TARGET_Adjusted ~ ., data = audit.train, family = "binomial")
glm.pmml <- pmml(glm.model, name = "glm model", data = trainDF)
xmlFile <- file.path(getwd(), "audit-glm.xml")
saveXML(glm.pmml, xmlFile)

Source: http://blog.revolutionanalytics.com/2011/03/predicting-r-models-with-pmml.html)

hlin117
  • 20,764
  • 31
  • 72
  • 93

1 Answers1

5

I suppose that depends on exactly what you want to do with the model once you have it back into R. At one point i helped someone create a pseudo-gml object that knew the coefficients for the variables and could be used with predict(). Many other functions required the fill dataset to be present.

If that may be of possible interested to you. The function is called makeglm.R. You will want to copy and paste just that function into your R session. But it will be necessary to transform your data first. Here are some helper functions to do just that.

getdata <- function(xml, ns=attr(xml,"ns")) {
    names<-xpathSApply(xml, "//d:DataField/@name", namespaces = ns)
    vals<-xpathApply(xml, "//d:DataField", function(x) {
        if(xmlGetAttr(x, "optype")=="categorical") {
            levels<-xpathSApply(x, "Value/@value")
            factor(character(0), levels=levels)
        } else if (xmlGetAttr(x, "optype")=="continuous"){
            numeric(0)
        }
    }, namespaces = ns)
    names(vals)<-names
    as.data.frame(vals)
}

getformula <- function(xml, ns=attr(xml,"ns")) {
    resp<-xpathSApply(xml, "//d:MiningField[@usageType=\"predicted\"]/@name",
        namespaces = ns)
    covar<-xpathSApply(xml, "//d:MiningField[@usageType=\"active\"]/@name",
        namespaces = ns)
    fmc<-paste(paste(resp, collapse=" + "), "~", paste(covar, collapse=" + "))
    as.formula(fmc)
}

getestimates <- function(xml, ns=attr(xml,"ns")) {
    betas <- setNames(as.numeric(xpathSApply(xml, "//d:PCell/@beta", namespaces = ns)), 
        xpathSApply(xml, "//d:PCell/@parameterName", namespaces = ns))
    numericparam <- unname(xpathSApply(xml, "//d:CovariateList/d:Predictor/@name", namespaces = ns))
    factorparam <- unname(xpathSApply(xml, "//d:FactorList/d:Predictor/@name", namespaces = ns))
    values <- do.call(rbind, Map(function(x,y,z) data.frame(p=x, val=y, pred=z, stringsAsFactors=F), 
        unname(xpathSApply(xml,"//d:PPCell/@parameterName", namespaces = ns)), 
        xpathSApply(xml, "//d:PPCell/@value", namespaces = ns),
        xpathSApply(xml, "//d:PPCell/@predictorName", namespaces = ns)))
    lf<-Map(function(x) {
        vv <- values[values$pred==x, ]
        setNames(betas[vv$p], vv$val)
    }, factorparam)
    ln<-Map(function(x) {
        vv <- values[values$pred==x, ]
        unname(betas[vv$p])
    }, numericparam)
    estimates<-c(lf,ln)
    intercept<-getNodeSet(xml,"//d:Parameter[@label=\"(Intercept)\"]", namespaces = ns)
    if(length(intercept)) {
        estimates<-c(unname(betas[xmlGetAttr(intercept[[1]],"name")]), estimates)
    }
    estimates
}

I'm not at all familiar with the PMML format but i put these together based on your sample document. I tried extracting all the correct information needed to build a formula, data.frame stub, and parameter estimates from the data in order to use the makeglm() function. Once you've loaded that function and these helper functions, you can run

library(XML)
mypmml <- xmlParse("pmml.xml")
attr(mypmml, "ns")<-"d"

dd <- getdata(mypmml)
ff <- getformula(mypmml)
ee <- getestimates(mypmml)
do.call(makeglm, c(list(ff, family="binomial", data=dd), ee))

to actually run the function. This will return a glm object that you can use with predict(). I did have to change one thing in your sample data. For some reason you had the table name as part of the formula in the glm model

glm(audit.train$TARGET_Adjusted ~ .,  data = audit.train, ...)

rather than

glm(TARGET_Adjusted ~ .,  data = audit.train, ...)

which can cause problems. So I just took out the "audit.train$" from the xml file before I read it in. It maybe possible to more error checking, but I wasn't even sure if this is ultimately what you were after.

MrFlick
  • 195,160
  • 17
  • 277
  • 295
  • Okay, it's possible that we might be running different versions of R; the scripts above aren't working for me. I'm running R 3.1.0, and my XML package is 3.98-1.1. I run: > library(XML) > mypmml <- xmlParse("audit-glm.xml") > dd <- getdata(mypmml) But when I print dd, it says: data frame with 0 columns and 0 rows I'm assuming this isn't what your output is. When I run getformula(), I actually get a parse error within the function. I'll triple check to make sure that the script was copy-pasted correctly. – hlin117 Jun 24 '14 at 15:44
  • (Apologies for not adding in the error message before. While I was trying to post my previous comment, I hit instead of + . I also apologize for the formatting of my above comment. I didn't know that multi-lined comments were not permitted. But the gist is, getdata() and getformula() both don't seem to be parsing my XML correctly.) – hlin117 Jun 24 '14 at 15:52
  • @hlin117 Sorry about that. I forgot i took out the namespaces for my testing because they mess with the xpath expressions. I've updated the code to be able to deal with the default namespace. Go head and try again. – MrFlick Jun 24 '14 at 17:39
  • So I notice that you're not changing any of the input numbers from the PMML. Is that intentional? The numbers from the PMML's ParamMatrix are different than the numbers stored in R's glm model. – hlin117 Jun 24 '14 at 17:50
  • @hlin117 Maybe i'm confused as to what you mean by converting the PMML model back into an R model. What exactly do you want the output to be? I interpreted that as pretending you never ran the model in R, and trying to create a glm object from the xml file alone. Again, you never really said what you plan to do with it once you get it into R. – MrFlick Jun 24 '14 at 17:54
  • I created the model in R and converted it to PMML for testing purposes; our actual assignment requires us to generate an R model from the PMML alone. (Your interpretation of my objective is correct.) What I'm currently looking, though, is a math conversion between the numbers stored in the PMML's ParamMatrix and the numbers stored in the R model. For instance, the value of Intercept in the PMML is -12.0199804097759, while the value of Intercept in the R model is -5.779e+00. Do you know of the conversion? – hlin117 Jun 24 '14 at 19:44
  • @hlin177 Oh. you expect the parameter values to be exactly the same between the two. I assumed you were running them on different data. I can't really help you there because I have no idea what PMML is doing when it runs the regression. Perhaps it's centering/scaling the data first. – MrFlick Jun 24 '14 at 20:13
  • Thank you for your help though. I really appreciate it. I'll fiddle around with it a bit more. – hlin117 Jun 24 '14 at 20:41
  • I'm reading this conversation and my eyes glaze over. Can you please reduce the answer and add specifics. What's the final answer to the question ***"Can you convert MLR coefficients stored in PMML back to an R model?"*** a) Yes b) No c) Yes, with restrictions (which?) – smci Aug 22 '14 at 04:30
  • @smci This answer does what the question asks with the given sample input. The discussion is the comments is not relevant to the original problem. I am not familiar with the PMML specification so I cannot say if it would work for all such input files. It also depends greatly on what you plan to do with that object in R. If your situation is different than this one, I encourage you to start your own question. – MrFlick Aug 22 '14 at 04:35
  • You did a great job with the code (thanks), but it's impossible to figure out what the limitations of using PMML for coefficients are, as per user's question. This being SO, if we attempted to ask that again more explicitly, it would get closed as 'subjective' or some nonsense. – smci Aug 22 '14 at 04:44
  • @smci I can't answer "what are the limitations" because that's too broad. I have no idea what you might try to do with them and the only way I'll know for sure if they work is to test. If there is a specific task you wish to perform with the coefficients, then ask about that directly. – MrFlick Aug 22 '14 at 04:57