/* This is FOAFlicious It generates FOAF files from del.icio.us inboxes Copyright (c) 2005, Philip wilson All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name FOAFlicious nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ import java.net.URL import java.io.File import java.io.FileOutputStream import java.io.FileInputStream import java.io.FileWriter import java.io.FileReader import java.io.BufferedReader import org.apache.commons.httpclient.HttpClient import org.apache.commons.httpclient.UsernamePasswordCredentials import org.apache.commons.httpclient.methods.GetMethod import javax.servlet.* import org.w3c.tidy.* // Set up global variables and data types System.getProperties().setProperty("httpclient.useragent", "foaflicious 0.2 - delicious@philwilson.org") base = session.getServletContext().getRealPath("/") + "foaflicious-data"+System.getProperty("file.separator") server = "http://del.icio.us" username = request.getParameter("user") inboxitems = [] class Item { username; name; url; foafUrl; String toString() { return "[${username}, ${name}, ${url}, ${foafUrl}]" } } client = new HttpClient() client.setTimeout(5000) // 5 secs for any page. a bit short, but hey tidy = new Tidy() tidy.setQuiet(true) tidy.setShowWarnings(false) tidy.setXHTML(true) // a generic method for retrieving the contents of a URL. note no 304/Etag/last-modified support! def callURL(url) { try { get = new GetMethod(url) client.executeMethod(get) return get.getResponseBodyAsStream() } catch (Exception e) { println "Error retrieving <${url}>: ${e}" return "" } } // a specific method for getting a del.icio.us page. spot the unnecessary copy and paste def callDelicious(url) { try { get = new GetMethod(server + url) client.executeMethod(get) return get.getResponseBodyAsStream() } catch (Exception e) { println "Error retrieving <${url}>: ${e}" return "" } } // download a del.icio.us user's homepage to ${username}.home and get the href from a element which points to a FOAF file def getFoafUrl(item) { url = new String() userwebpage = new File(base+"${item.username}.home") try { tidy.parse(callURL(item.url), new FileOutputStream(userwebpage)) myhtml = new XmlParser(false, false).parse(new FileInputStream(userwebpage)) myhtml.head.link.each() { link | if (link['@title'] && (link['@title'].contains("FOAF") || link['@title'].contains("foaf"))) { //println "got foaf file ${link['@href']}" url = link['@href'] } } userwebpage.close(); userwebpage.delete(); } catch (Exception e) { println e // swallow for the time being } finally { //userwebpage.deleteOnExit() return url } } // strip the user's inbox and get an array of user objects // then retrieve that user's homepage on delicious and retrieve their webpage and name and FOAF file URL def parseOutline(parsedXml) { // navigate down the tree extracting people the user is subscribed to, not tags parsedXml.each() { div | if (div['@class'] == null && div.a['@href'] != null && div.a["@href"][0].lastIndexOf('/')==18) { // get their name item = new Item(username:div.a['@href'][0].substring(19)) // check if we have already downloaded this user's details file = new File(base+"${item.username}.properties") // somewhere to store the user details userprops = new Properties() // if not then we have to do all the laborious retrieval and parsing if (!file.exists()) { // if not then retrieve it tidy.parse(callDelicious("/"+item.username), new FileOutputStream(file)) // make it XHTML myhtml = new XmlParser(false, false).parse(new FileInputStream(file)) // extract the bits of it we need (name and homepage URL) if (myhtml.body.div.a[0]['@title']==null) { item.url = myhtml.body.div.a['@href'][0] item.name = myhtml.body.div.a[0].value()[0] } // then, if they have a homepage, see if it links to a FOAF file if (item.url!=null) { //item.foafUrl = getFoafUrl(item) } // now we have all their details we can delete their del.icio.us page and homepage and store their details in a properties file // which should be much quicker to read next time file.delete() fos = new FileOutputStream(file) userprops.setProperty("username", item.username) if (item.name) userprops.setProperty("name", item.name) if (item.url) userprops.setProperty("url", item.url) if (item.foafUrl) userprops.setProperty("foafUrl", item.foafUrl) userprops.store(fos, "Details for user http://del.icio.us/${item.username}") fos.flush() fos.close() } else { // otherwise we can just load the properties into the object! hurrah! fis = new FileInputStream(file) userprops.load(fis) item.name = userprops.getProperty("name") item.url = userprops.getProperty("url") item.foafUrl = userprops.getProperty("foafUrl") fis.close() } inboxitems.add(item) } } } // the RDF for the main user def userDetails() { user = new StringBuffer() user << "${username} ${username} " return user } // the RDF version of the people in their del.icio.us inbox def knows() { knows = new StringBuffer() inboxitems.each() { knows << " " if (it.name) { knows << "${it.name}\n" } if (it.url) { knows << "\n" } if (it.foafUrl && it.foafUrl.length()>0) { knows << "\n" } knows << " " } return knows } def printfile(file) { reader = new FileReader(foaffile) bf = new BufferedReader(reader, 8192) line = null; while ((line = bf.readLine()) != null) { println line } // clean up bf.close() reader.close() } // actually start the program! // if we've done it before, just output our cached result from last time // todo: could do with a force flag so we don't always return the cached results foaffile = new File(base+"${username}.foaf") if (foaffile.exists()) { printfile(foaffile) // println session.getServletContext().getRealPath("/") //System.exit(0) } else { // if we've retrieved this person's inbox before, don't retrieve it again, just get on with the parsing userinbox = new File(base+username+".inbox") if (!userinbox.exists()) { tidy.parse(callDelicious("/inbox/${username}"), new FileOutputStream(userinbox)) } // Get the page and parse it into a GPath structure myhtml = new XmlParser(false, false).parse(new FileInputStream(userinbox)) // take their inbox and turn it into an array of users // go off and do the grunt work parseOutline(myhtml.body.div.div) template = " ${userDetails()} ${knows()} Del.icio.us " foafwriter = new FileWriter(foaffile) foafwriter.write(template) foafwriter.flush() foafwriter.close() printfile(foaffile) }