Skip to: Site menu | Main content

Groovy 

      Download | Documentation | Developers | Community

An agile dynamic language for the Java Platform

Parsing Groovy Doc Online Add comment to Wiki View in Wiki Edit Wiki page Printable Version

Contributing to online documentation website http://gotapi.com, we have to generate a xml document describing groovy objects and methods hierarchy and links to javadoc.

This is done with the following groovy script and the excellent HTML parser NekoHTML (see http://people.apache.org/~andyc/neko/doc/index.html )

package free.cafekiwi.gotapi;
import groovy.xml.MarkupBuilder
/**
* @author Marc DEXET
**/
class XmlGotApiGenerator {


   	static void main(args) {
   	  def writer = new FileWriter( new File('gotapi.xml'))
   	  def xml = new MarkupBuilder(writer)
   	  def pages = []

	  def apidocRoot = 'http://groovy.codehaus.org/api'
	  def parser = new JavaDocParser();

	  def methodNameFilter  = {
			anchor ->
	  		def name = anchor.'@name'
	  		name != null && name.indexOf('(') >=0
	  }


	  def packageFilter 	= {
			  anchor ->
			  def url = anchor.'@href'
			  url != null  && url.endsWith( "/package-frame.html")
	  }

	  def classFilter =  {
			  anchor ->
			  def url = anchor.'@href'
			  url != null && ! url.contains ('/')
	  }

	  def parseAndGet = {
			  url, filter ->
			  parser.parse( url ).depthFirst().A.findAll( filter )
	  }

	 parseAndGet("$apidocRoot/overview-frame.html", packageFilter).each {

		  def packBaseURL = it.'@href' - "/package-frame.html"
		  def packPage = new Page( 	title: packBaseURL.replace('/', '.') ,
				  					type: 'package',
				  					url: "${apidocRoot}/${it.'@href'}"
				  					);

		  pages << packPage
		  println "PackPAGE ${packPage}"

		  parseAndGet( packPage.url, classFilter).each {

			  def url = it.'@href'
			  def fullUrl = "${apidocRoot}/${packBaseURL}/${url}"
			  def fqn = url.replace('/', '.')- ".html"
			  def parts = fqn.tokenize('.')

			  def classPage = new Page( title: fqn ,
					  					type: 'class',
					  					url: fullUrl
					  					);

			  packPage.children << classPage

			  println "Class PAGE: ${classPage}"

			  parseAndGet( classPage.url, methodNameFilter ).each {
				  def methodeName = it.'@name'
				  def methodPage =  new Page( 	title: methodeName ,
						  						type: 'method',
						  						url: classPage.url+'#'+methodeName);
				  classPage.children << methodPage
				  println "Method PAGE: ${methodPage}"
			  }
		  }
	  }
	xml.pages() {
		pages.each{
			packIt ->
			xml.page(title: packIt.title , type: packIt.type, url: packIt.url) {
				packIt.children.each {
					classIt ->
					xml.page(title: classIt.title , type: classIt.type, url: classIt.url) {
						classIt.children.each {
							methodIt ->
							xml.page(title: methodIt.title , type: methodIt.type, url: methodIt.url)
						}
					}
				}
			}
		}
	}
	println 'END'
   	}

}

class Page {
	def title
	def type
	def url
	def children = []
	public String toString() {
		"Titre: ${title} Type: ${type} URL:${url} \\n\\t ${children.collect{it.toString()+'\\n\\t' }}"
	}
}

class JavaDocParser {
	def parser
	/* Constructor */
	JavaDocParser() {
	  def nekoparser = new org.cyberneko.html.parsers.SAXParser()
	  nekoparser.setFeature('http://xml.org/sax/features/namespaces', false)
	  parser = new XmlParser(nekoparser);
	}

	def parse(url) {
		return parser.parse(url)
	}
}