Skip to: Site menu | Main content

Groovy 

      Download | Documentation | Developers | Community

An agile dynamic language for the Java Platform

Reading XML with Groovy and DOM4J Add comment to Wiki View in Wiki Edit Wiki page Printable Version

This example assumes the following class is already on your CLASSPATH:

XmlExamples.groovy
class XmlExamples {
  static def CAR_RECORDS = '''
    <records>
      <car name='HSV Maloo' make='Holden' year='2006'>
        <country>Australia</country>
        <record type='speed'>Production Pickup Truck with speed of 271kph</record>
      </car>
      <car name='P50' make='Peel' year='1962'>
        <country>Isle of Man</country>
        <record type='size'>Smallest Street-Legal Car at 99cm wide and 59 kg in weight</record>
      </car>
      <car name='Royale' make='Bugatti' year='1931'>
        <country>France</country>
        <record type='price'>Most Valuable Car at $15 million</record>
      </car>
    </records>
  '''
}

Here is an example of using DOM4J with Groovy to process an existing XML file:

// require(groupId:'dom4j', artifactId:'dom4j', version:'1.6.1')
import org.dom4j.io.SAXReader

def reader   = new StringReader(XmlExamples.CAR_RECORDS)
def records  = new SAXReader().read(reader).rootElement
def messages = []

records.elementIterator().each{ car ->
    def make = car.attributeValue('make')
    def country = car.elementText('country')
    def type = car.element('record').attributeValue('type')
    messages << make + ' of ' + country + ' has a ' + type + ' record'
}

assert messages == [
    'Holden of Australia has a speed record',
    'Peel of Isle of Man has a size record',
    'Bugatti of France has a price record'
]

DOM4J also supports a streaming mode which lets you manually prune parts of the DOM tree during processing to facilitate processing large documents. Here is an example which uses DOM4J in that mode:

// require(groupId:'dom4j', artifactId:'dom4j', version:'1.6.1')
import org.dom4j.io.SAXReader
import org.dom4j.*

class PruningCarHandler implements ElementHandler {
    def messages = []
    public void onStart(ElementPath path) { }
    public void onEnd(ElementPath path) {
        def car = path.current
        def make = car.attributeValue('make')
        def country = car.elementText('country')
        def type = car.element('record').attributeValue('type')
        messages << make + ' of ' + country + ' has a ' + type + ' record'
        car.detach() // prune the tree
    }
}

def xml     = new StringReader(XmlExamples.CAR_RECORDS)
def reader  = new SAXReader()
def handler = new PruningCarHandler()

reader.addHandler('/records/car', handler)
reader.read(xml)

assert handler.messages == [
    'Holden of Australia has a speed record',
    'Peel of Isle of Man has a size record',
    'Bugatti of France has a price record'
]

In the above example, we actually did the processing as part of the ElementHandler. Instead, we could have used a hybrid approach which just pruned away parts of the tree we weren't interested in and then performed tree-walking/navigation style coding after that.