/* A dirty code tweak that can search the meta tags
* of a webpage for its rss feed url.
* If a feed exists for the page,
* the rss feed link portion of the page source
* will be extracted using regex.
*/
def getRSSFeedURLFor(String urlStr){
def url = urlStr.toURL()
def result = []
def str
def lookForStart = '''<link rel="alternate"'''
def lookForEnd = '''>\n'''
url.eachLine {
if(it.contains("RSS"))
result << it + "\n"
}
result.each{
if(it.contains(lookForStart)) {
strtIdx = it.indexOf(lookForStart)
lastIdx = (it.indexOf(lookForEnd)+2)
str = it.substring(strtIdx,lastIdx)
}
}
str = str.replaceAll(/(.*)(href.+)"(.*)"(.*)\n/,'$3')
str = str.trim()
return str
}
def urlStr = "http://www.ashes-phoenix.blogspot.com"
def rssURL = getRSSFeedURLFor(urlStr).toURL()
println rssURL
PS: For most of the standard websites, if an RSS feed exists for the page, it will be specified in one of the metatags of the page source that looks like:
<link rel = "alternate" type=atom/application/rss+xml href=''...>
The logic lies in extracting the link within the href attribute of this tag which is actually the rss feed of the page we are trying to retrieve