当前所在位置:珠峰网资料 >> 计算机 >> 计算机等级考试 >> 正文
java读取网站内容的两种方法
发布时间:2010/10/26 10:28:35 来源:城市学习网 编辑:ziteng
  1,HttpClient
  利用apache的虚拟客户端包获取某个地址的内容  1import java.io.UnsupportedEncodingException;
  2import java.util.HashSet;
  3import java.util.Iterator;
  4import java.util.Set;
  5import java.util.regex.Matcher;
  6import java.util.regex.Pattern;
  7
  8import org.apache.commons.httpclient.HttpClient;
  9import org.apache.commons.httpclient.NameValuePair;
  10import org.apache.commons.httpclient.methods.PostMethod;
  11
  12public class catchMain {
  13
  14    /** *//**
  15     * @param args
  16     */
  17    public static void main(String[] args) {
  18
  19
  20        String url = "http://search.foodqs.com/companysearch.asp";
  21        String keyword="食";
  22        String response=createhttpClient(url,keyword);
  23    }
  24
  25public static String createhttpClient(String url,String param){
  26        HttpClient client = new HttpClient();
  27        String response=null;
  28        String keyword=null;
  29        PostMethod postMethod = new PostMethod(url);
  30        try {
  31            if(param!=null)
  32           keyword = new String(param.getBytes("gb2312"),"ISO-8859-1");
  33        } catch (UnsupportedEncodingException e1) {
  34            // TODO Auto-generated catch block
  35            e1.printStackTrace();
  36        }
  37
  38        NameValuePair[] data = { new NameValuePair("keyword", keyword) };
  39        // 将表单的值放入postMethod中
  40        postMethod.setRequestBody(data);
  41
  42        try {
  43            int statusCode = client.executeMethod(postMethod);
  44            response = new String(postMethod.getResponseBodyAsString()
  45                    .getBytes("ISO-8859-1"), "GBK");
  46        } catch (Exception e) {
  47
  48            e.printStackTrace();
  49        }
  50        return response;
  51
  52    }
  53
  2.java自带的HttpURLConnection
  1public static String getPageContent(String strUrl, String strPostRequest,
  2            int maxLength) {
  3        //读取结果网页
  4        StringBuffer buffer = new StringBuffer();
  5        System.setProperty("sun.net.client.defaultConnectTimeout", "5000");
  6        System.setProperty("sun.net.client.defaultReadTimeout", "5000");
  7        try {
  8            URL newUrl = new URL(strUrl);
  9            HttpURLConnection hConnect = (HttpURLConnection) newUrl
  10                    .openConnection();
  11            //POST方式的额外数据
  12            if (strPostRequest.length() > 0) {
  13                hConnect.setDoOutput(true);
  14                OutputStreamWriter out = new OutputStreamWriter(hConnect
  15                        .getOutputStream());
  16                out.write(strPostRequest);
  17                out.flush();
  18                out.close();
  19            }
  20            //读取内容
  21            BufferedReader rd = new BufferedReader(new InputStreamReader(
  22                    hConnect.getInputStream()));
  23            int ch;
  24            for (int length = 0; (ch = rd.read()) > -1
  25                    && (maxLength <= 0 || length < maxLength); length++)
  26                buffer.append((char) ch);
  27            rd.close();
  28            hConnect.disconnect();
  29            return buffer.toString().trim();
  30        } catch (Exception e) {
  31            // return "错误:读取网页失败!";
  32            return null;
  33        }
  34    }
广告合作:400-664-0084 全国热线:400-664-0084
Copyright 2010 - 2017 www.my8848.com 珠峰网 粤ICP备15066211号
珠峰网 版权所有 All Rights Reserved