I created this program which grabs all <img> links in a single html file. But I can't get it work.
/* * To change this template, choose Tools | Templates * and open the template in the editor. */ package network; import java.io.FileReader; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.List; import javax.swing.text.MutableAttributeSet; import javax.swing.text.html.HTML.Attribute; import javax.swing.text.html.HTML.Tag; import javax.swing.text.html.HTMLEditorKit.ParserCallback; import javax.swing.text.html.parser.ParserDelegator; /** * * @author ztron */ public class LinkGrabber { private LinkGrabber(){} public static List<String> extractLinks(Reader reader) throws IOException{ final ArrayList<String> list = new ArrayList<String>(); ParserDelegator parserDelegator = new ParserDelegator(); ParserCallback parsercallback = new ParserCallback(){ public void handleText(final char[] data, final int pos){} public void handleStartTag(Tag tag, MutableAttributeSet attribute, int pos){ if(tag == Tag.IMG){ String address = (String)attribute.getAttribute(Attribute.ALT); list.add(address); } } public void handleEndTag(Tag t, final int pos){} public void handleSimpleTag(Tag t, MutableAttributeSet a, final int pos){} public void handleComment(final char[] data, final int pos){} public void handleError(final java.lang.String errMsg, final int pos){} //public void handleError(final java.lang.String errMsg, final int pos){} }; parserDelegator.parse(reader, parsercallback, true); return list; } public final static void main(String args[]) throws Exception{ FileReader reader = new FileReader("java.html"); List<String> links = LinkGrabber.extractLinks(reader); for(String link : links){ System.out.println(link); } } }