1

I'm struggling reading a XML:

<?xml version="1.0" encoding="utf-8"?>
        <tmx version="1.4">
          <header creationtool="B&amp;R Automation Studio" creationtoolversion="4.2" datatype="unknown" segtype="sentence" adminlang="en" srclang="en" o-tmf="TMX">
            <note>Change the namespace to define where this text module should be located within the logical structure of your texts</note>
            <prop type="x-BR-TS:Namespace">ZagVision</prop>
          </header>
          <body>
            <tu tuid="BcrWenglor.Init">
              <tuv xml:lang="en">
                <seg>Not initialized. </seg>
              </tuv>
              <tuv xml:lang="de">
                <seg>Nicht initialisiert. </seg>
              </tuv>
            </tu>
            <tu tuid="BcrUsbHid.WarnScanner">
              <tuv xml:lang="fr">
                <seg>BcrUsbHid : Avertissement général Scanner, scanner non reconnu ou interface défectueuse. </seg>
              </tuv>
              <tuv xml:lang="en">
                <seg>BcrUsbHid: General warning Scanner, scanner not recognized or interface faulty. </seg>
              </tuv>
              <tuv xml:lang="de">
                <seg>BcrUsbHid: Allgemeine Warnung Scanner, Scanner wurde nicht erkannt oder Schnittstelle fehlerhaft. </seg>
              </tuv>
            </tu>
      </body>
    </tmx>

I tried to acces to a single Node by XPath by "SelectSingleNode" as described here: XPath Syntax

$Path = "C:\Temp\ZagVision.tmx"

$result = Get-ChildItem -Path $Path -Filter '*.tmx'  -Recurse | 
    ForEach-Object {
        [xml]$xml = Get-Content -Path $_.FullName
        foreach ($node in $xml.tmx.body.tu) {
            [PSCustomObject]@{
              'FileName'          = $_.BaseName
              'NameSpace'         = $node.ParentNode.ParentNode.header.prop.'#text'
              'LastChangeFile'    = $_.LastWriteTime
              'TextId'            = $node.tuid
              
              'fr'         = $node.SelectSingleNode("//tuv[@lang='fr']") 
              'en'         = $node.ChildNodes[1].seg # works fine , but only if we have 3 nodes
              'de'         = $node.ChildNodes[2].seg
          
            }                
        }
 }
 $result

Output:

FileName       : ZagVision
NameSpace      : ZagVision
LastChangeFile : 16.07.2020 08:41:38
TextId         : BcrUsbHid.WarnScanner
fr             : 
en             : BcrUsbHid: General warning Scanner, scanner not recognized or 
                 interface faulty. 
de             : BcrUsbHid: Allgemeine Warnung Scanner, Scanner wurde nicht 
                 erkannt oder Schnittstelle fehlerhaft. 

As i just startet with powershell, it is probably something very basic i'm doing wrong here... Thx in advance.

Txitxarro
  • 25
  • 5
  • I can't tell you the "right" way to do it necessarily but if you replace it with `'fr' = $($node.tuv | where {$_.lang -eq "fr"}).seg` you should get the data you are looking for. – Jeramy Aug 05 '20 at 19:55

2 Answers2

0

You needed to register the xml namespace in order to use the xml:lang attribute with selectSingleNode:


$result = Get-ChildItem -Path $Path -Filter '*.tmx'  -Recurse | 
    ForEach-Object {
        [xml]$xml = Get-Content -Path $_.FullName
        
        $ns = New-Object System.Xml.XmlNamespaceManager($xml.NameTable) # added this line
        $ns.AddNamespace("xml", "http://www.w3.org/XML/1998/namespace") # added this line
        
        foreach ($node in $xml.tmx.body.tu) {
            [PSCustomObject]@{
              'FileName'          = $_.BaseName
              'NameSpace'         = $node.ParentNode.ParentNode.header.prop.'#text'
              'LastChangeFile'    = $_.LastWriteTime
              'TextId'            = $node.tuid
              
              'fr'         = $node.SelectSingleNode("//tuv[@xml:lang='fr']", $ns).seg # modified this line
              'en'         = $node.ChildNodes[1].seg # works if we have 3 nodes
              'de'         = $node.ChildNodes[2].seg
          
            }                
        }
 }
 $result

I figured this out from the error thrown by trying to do this: "//tuv[@xml:lang='fr']"

Uuuuuumm
  • 608
  • 5
  • 21
0

@Uuuuuumm Yes, thank you! Register the Namespace and change the path expressions from

$node.SelectSingleNode("//tuv[@xml:lang='fr']", $ns).seg 

to

$node.SelectSingleNode("tuv[@xml:lang='fr']", $ns).seg 

did it.

Here is the working code:

#$Path = "C:\Temp\ZagVision.tmx"

$result = Get-ChildItem -Path $Path -Filter '*.tmx'  -Recurse | 
    ForEach-Object {
        [xml]$xml = Get-Content -Path $_.FullName

        $ns = New-Object System.Xml.XmlNamespaceManager($xml.NameTable)
        $ns.AddNamespace("xml", "http://www.w3.org/XML/1998/namespace")

        foreach ($node in $xml.tmx.body.tu) {
            [PSCustomObject]@{
              'FileName'          = $_.BaseName
              'NameSpace'         = $node.ParentNode.ParentNode.header.prop.'#text'
              'LastChangeFile'    = $_.LastWriteTime
              'TextId'            = $node.tuid
              
              'fr'         = $node.SelectSingleNode("tuv[@xml:lang='fr']", $ns).seg
              'en'         = $node.SelectSingleNode("tuv[@xml:lang='en']", $ns).seg
              'de'         = $node.SelectSingleNode("tuv[@xml:lang='de']", $ns).seg
          
            }                
        }
 }
 #$result
$result | Export-Csv -NoTypeInformation "C:\Temp\Messages.csv" -Encoding Default  -Delimiter ';' #-Append
Invoke-Item "C:\Temp\Messages.csv"
Txitxarro
  • 25
  • 5