使用jsoup解析html, 获取页面各个类型的元素,使用
jsoup选择器来对元素进行检索的
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import
java.io.File;
import java.io.IOException;
import java.util.Iterator;
public class GetElementsByJSoup {
public String url;
public Document doc;
public GetElementsByJSoup(String urlOrFilePath) throws IOException{
this.url = urlOrFilePath;
this.doc = GetDocumentObject();
}
public Document GetDocumentObject() throws IOException{
Document doc = null;
try{
if(url.contains("http://")){
doc = Jsoup.connect(url).get();}
else{
File input = new File(url);
doc = Jsoup.parse(input, "UTF-8");
}
}catch(IOException e){
e.printStackTrace();
}
return doc;
}
public Elements getPageTextElements(){
Elements eles =
doc.select("input[type~=text|password]");
System.out.println("size = " + eles.size());
for(int i=0;i<eles.size();i++){
String strName = eles.get(i).attr("name");
System.out.println("strName = " + strName + ", text=" + eles.get(i).text());
}
return eles;
}
public void getPageListElements(){
Elements eles = doc.select("select");
System.out.println("size = " + eles.size());
for(int i=0;i<eles.size();i++){
String strName = eles.get(i).attr("name");
System.out.println("strName = " + strName + ", text=" + eles.get(i).text());
}
}
public void getPageButtonElements(){
Elements eles = doc.select("input[class=button]");
System.out.println("size = " + eles.size());
for(int i=0;i<eles.size();i++){
String strName = eles.get(i).attr("value");
System.out.println("value = " + strName + ", text=" + eles.get(i).text());
}
}
public void getPageCheckBoxElements(){
Elements eles = doc.select("input[class=checkbox]");
System.out.println("size = " + eles.size());
for(int i=0;i<eles.size();i++){
String strName = eles.get(i).attr("name");
System.out.println("strName = " + strName + ", text=" + eles.get(i).text());
}
}
public void getPageLinkElements(){
Elements eles = doc.select("a[href]");
System.out.println("size = " + eles.size());
for(int i=0;i<eles.size();i++){
String strName = eles.get(i).text();
if(strName != ""){
System.out.println("strName = " + strName);
}
}
}
public void getPageImageElements(){
Elements eles = doc.select("img");
System.out.println("size = " + eles.size());
for(int i=0;i<eles.size();i++){
String strName = eles.get(i).attr("alt");
if(strName != ""){
System.out.println("strName = " + strName);
}
}
}
public void getPageTableElements(){
Elements eles = doc.select("table");
System.out.println("size = " + eles.size());
for(int i=0;i<eles.size();i++){
String strId = eles.get(i).attr("id");
if(strId != ""){
System.out.println("table id = " + strId + ", i = " + i);
}
}
}
}