hi i have this codde for scraping data from web site. i have downloaded jtidy and installed jtidy library to package
but i cant seem to fix the import for:
"Tidy tidy=new Tidy();" in method "getDocumentFromHTMLInputStream()" //tidy has red line under it
in method getDocumentFromHTMLInputStream(){
i have the library aded so what am i doing wrong i allways have a problem like this when i add a library but cant figure out what im doing wrong any help would be great thanks.
the code is:
package dompaser;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.InputStream;
import java.util.Vector;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
class DOMParser{
private InputStream is=null;
public static void main(String[] args)throws FileNotFoundException{
DOMParser parser =new DOMParser(new FileInputStream("http://sockslist.net"));
Document d=parser.getDocumentFromHTMLInputStream();
//this is the root of the html document in most cases its the html element
Node root=d.getDocumentElement();
//we can check that for ouurselfs
System.out.println(root.getNodeName());
System.out.println("aa"+root.getNodeValue());
}
//this is the part that must parse input stream
public DOMParser(InputStream is){//constructor
this.is=is;
}
//this method uses the input stream corresponding to the
//htmml file and returns a document corresponding to same returning a document
public Document getDocumentFromHTMLInputStream(){
Tidy tidy=new Tidy();//this is the object of parsing library jtidy
Document doc=tidy.parseDom(is,null);//provoids ipstrream and no need for op strream
return doc;
}
//define an interface###############################
}
//interface IValueExtractor {
////assuming outputs are vectors of strings
//Vector<String> getRequiredValue(Document doc)throws XPathExpressionException;
//}
class ZillowValueExtractor {
public Vector<String> getRequestedValues(Document doc)throws XPathExpressionException{
//remember the xpath from site
XPathFactory factory=XPathFactory.newInstance();
XPath xpath=factory.newXPath();
XPathExpression expr = xpath.compile("xpath location rcapfromsite");
Object result =expr.evaluate(doc,XPathConstants.NODESET);
NodeList nodes =(NodeList)result;
for(int i=0;i<nodes.getLength();i++){
System.out.println(nodes.item(i).getNodeValue());//LAST WAS JUST .getNodes()
}
//todo a maunual search for xpath
NodeList childNodes =nodes.item(0).getChildNodes();
for(int i=0 ;i<childNodes.getLength();i++){
if (childNodes.item(i).getNodeName().equals("strong") );{
//if this child is a <strong> tag then the price is it child
String price=childNodes.item(i).getChildNodes().item(0).g etValue();
System.out.println(price);
}
}
return null;
}
}