/*
This is FOAFlicious
It generates FOAF files from del.icio.us inboxes
Copyright (c) 2005, Philip wilson
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name FOAFlicious nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
import java.net.URL
import java.io.File
import java.io.FileOutputStream
import java.io.FileInputStream
import java.io.FileWriter
import java.io.FileReader
import java.io.BufferedReader
import org.apache.commons.httpclient.HttpClient
import org.apache.commons.httpclient.UsernamePasswordCredentials
import org.apache.commons.httpclient.methods.GetMethod
import javax.servlet.*
import org.w3c.tidy.*
// Set up global variables and data types
System.getProperties().setProperty("httpclient.useragent", "foaflicious 0.1 - delicious@philwilson.org")
base = session.getServletContext().getRealPath("/") + "foaflicious-data"+System.getProperty("file.separator")
server = "http://del.icio.us"
username = "pip"
inboxitems = []
class Item { username; name; url; foafUrl; String toString() { return "[${username}, ${name}, ${url}, ${foafUrl}]" } }
client = new HttpClient()
client.setTimeout(5000) // 5 secs for any page. a bit short, but hey
tidy = new Tidy()
tidy.setQuiet(true)
tidy.setShowWarnings(false)
tidy.setXHTML(true)
// a generic method for retrieving the contents of a URL. note no 304/Etag/last-modified support!
def callURL(url)
{
try
{
get = new GetMethod(url)
client.executeMethod(get)
return get.getResponseBodyAsStream()
}
catch (Exception e)
{
println "Error retrieving <${url}>: ${e}"
return ""
}
}
// a specific method for getting a del.icio.us page. spot the unnecessary copy and paste
def callDelicious(url)
{
try
{
get = new GetMethod(server + url)
client.executeMethod(get)
return get.getResponseBodyAsStream()
}
catch (Exception e)
{
println "Error retrieving <${url}>: ${e}"
return ""
}
}
// download a del.icio.us user's homepage to ${username}.home and get the href from a element which points to a FOAF file
def getFoafUrl(item)
{
url = new String()
userwebpage = new File(base+"${item.username}.home")
try
{
tidy.parse(callURL(item.url), new FileOutputStream(userwebpage))
html = new XmlParser(false, false).parse(new FileInputStream(userwebpage))
html.head.link.each() { link |
if (link['@title'] && (link['@title'].contains("FOAF") || link['@title'].contains("foaf")))
{
//println "got foaf file ${link['@href']}"
url = link['@href']
}
}
userwebpage.close();
userwebpage.delete();
}
catch (Exception e)
{
println e
// swallow for the time being
}
finally
{
//userwebpage.deleteOnExit()
return url
}
}
// strip the user's inbox and get an array of user objects
// then retrieve that user's homepage on delicious and retrieve their webpage and name and FOAF file URL
def parseOutline(parsedXml)
{
// navigate down the tree extracting people the user is subscribed to, not tags
parsedXml.each() { div |
if (div['@class'] == null && div.a['@href'] != null && div.a["@href"][0].lastIndexOf('/')==18)
{
// get their name
item = new Item(username:div.a['@href'][0].substring(19))
// check if we have already downloaded this user's details
file = new File(base+"${item.username}.properties")
// somewhere to store the user details
userprops = new Properties()
// if not then we have to do all the laborious retrieval and parsing
if (!file.exists())
{
// if not then retrieve it
tidy.parse(callDelicious("/"+item.username), new FileOutputStream(file))
// make it XHTML
html = new XmlParser(false, false).parse(new FileInputStream(file))
// extract the bits of it we need (name and homepage URL)
if (html.body.div.a[0]['@title']==null)
{
item.url = html.body.div.a['@href'][0]
item.name = html.body.div.a[0].value()[0]
}
// then, if they have a homepage, see if it links to a FOAF file
if (item.url!=null)
{
item.foafUrl = getFoafUrl(item)
}
// now we have all their details we can delete their del.icio.us page and homepage and store their details in a properties file
// which should be much quicker to read next time
file.delete()
fos = new FileOutputStream("${item.username}.properties")
userprops.setProperty("username", item.username)
if (item.name)
userprops.setProperty("name", item.name)
if (item.url)
userprops.setProperty("url", item.url)
if (item.foafUrl)
userprops.setProperty("foafUrl", item.foafUrl)
userprops.store(fos, "Details for user http://del.icio.us/${item.username}")
fos.flush()
fos.close()
}
else
{
// otherwise we can just load the properties into the object! hurrah!
fis = new FileInputStream(file)
userprops.load(fis)
item.name = userprops.getProperty("name")
item.url = userprops.getProperty("url")
item.foafUrl = userprops.getProperty("foafUrl")
fis.close()
}
inboxitems.add(item)
}
}
}
// the RDF for the main user
def userDetails() {
user = new StringBuffer()
user << "${username}${username}"
return user
}
// the RDF version of the people in their del.icio.us inbox
def knows() {
knows = new StringBuffer()
inboxitems.each() {
knows << "
"
if (it.name)
{
knows << "${it.name}\n"
}
if (it.url)
{
knows << "\n"
}
if (it.foafUrl && it.foafUrl.length()>0)
{
knows << "\n"
}
knows << "
"
}
return knows
}
def printfile(file)
{
reader = new FileReader(foaffile)
bf = new BufferedReader(reader, 8192)
line = null;
while ((line = bf.readLine()) != null) {
println line
}
// clean up
bf.close()
reader.close()
}
// actually start the program!
// if we've done it before, just output our cached result from last time
// todo: could do with a force flag so we don't always return the cached results
foaffile = new File(base+"${username}.foaf")
if (foaffile.exists())
{
printfile(foaffile)
// println session.getServletContext().getRealPath("/")
//System.exit(0)
}
else
{
// if we've retrieved this person's inbox before, don't retrieve it again, just get on with the parsing
userinbox = new File(base+username+".inbox")
if (!userinbox.exists())
{
tidy.parse(callDelicious("/inbox/${username}"), new FileOutputStream(userinbox))
}
// Get the page and parse it into a GPath structure
html = new XmlParser(false, false).parse(new FileInputStream(userinbox))
// take their inbox and turn it into an array of users
// go off and do the grunt work
parseOutline(html.body.div.div)
template = "
${userDetails()}
${knows()}
Del.icio.us
"
foafwriter = new FileWriter(foaffile)
foafwriter.write(template)
foafwriter.flush()
foafwriter.close()
printfile(foaffile)
}