I have a XML file in S3 contains the Schema for my table:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<DATA CHARSET="UTF8" DELIMITER="\t">
<COLUMNS>
<COLUMN DATA_PRECISION="10" DATA_SCALE="0" DATA_TYPE="NUMBER" ID="APPLICATION_ID" />
<COLUMN DATA_LENGTH="40" DATA_TYPE="VARCHAR2" ID="DESCRIPTIVE_FLEXFIELD_NAME"/>
<COLUMN DATA_LENGTH="30" DATA_TYPE="VARCHAR2" ID="LANGUAGE"/>
<COLUMN DATA_LENGTH="60" DATA_TYPE="VARCHAR2" ID="TITLE"/>
<COLUMN DATA_TYPE="DATE" DATE_MASK="YYYY/MM/DD.HH24:MI:SS" ID="LAST_UPDATE_DATE"/>
<COLUMN DATA_PRECISION="15" DATA_SCALE="0" DATA_TYPE="NUMBER" ID="LAST_UPDATED_BY" />
<COLUMN DATA_TYPE="DATE" DATE_MASK="YYYY/MM/DD.HH24:MI:SS" ID="CREATION_DATE"/>
<COLUMN DATA_PRECISION="15" DATA_SCALE="0" DATA_TYPE="NUMBER" ID="CREATED_BY" />
<COLUMN DATA_PRECISION="10" DATA_SCALE="0" DATA_TYPE="NUMBER" ID="LAST_UPDATE_LOGIN" />
</COLUMNS>
</DATA>
And I would need to get all the ID and DATA_TYPE out.
I read the file using sc.textFile("s3://XXX/schemas/XXXX.xml")
but wouldn't be able to process.
Anyone can help me on this?