import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
public class GrabURLs {
private URL url;
public static void main(String[] args){
GrabURLs gu = new GrabURLs("http://in.yahoo.com");
ArrayList<String> AL = gu.getLinks();
for(String line : AL){
System.out.println(line);
}
}
public GrabURLs(String urls){
try {
url = new URL(urls);
} catch (MalformedURLException e) {
e.printStackTrace();
}
}
public ArrayList<String> getLinks(){
BufferedReader urlIn = null;
ArrayList<String> links = new ArrayList<String>();
try {
urlIn = new BufferedReader(new InputStreamReader(url.openStream()));
} catch (IOException e) {
e.printStackTrace();
}
String s = null, t;
try {
while( ( t = urlIn.readLine()) != null){
s += t;
}
} catch (IOException e) {
e.printStackTrace();
}
String baseHREF = null;
baseHREF = s.substring(s.indexOf("<base href=") + 12 , s.indexOf("<base href=") + 12 + s.substring(s.indexOf("<base href=") + 12).indexOf("\""));
System.out.println(baseHREF);
while(s.indexOf("<a href=") != -1){
links.add(s.substring(s.indexOf("<a href=") + 9 , s.indexOf("<a href=") + 9 + s.substring(s.indexOf("<a href=") + 9).indexOf((s.substring(s.indexOf("<a href=") + 8, s.indexOf("<a href=") + 9).equals("'")) ? "'" : "\"")));
s = s.substring(s.indexOf("<a href=") + 9 + s.substring(s.indexOf("<a href=") + 9).indexOf("\""));
}
return links;
}
}
It's not perfect but you get the idea
Regards,
Chris