`
foxjj123
  • 浏览: 5853 次
  • 性别: Icon_minigender_1
  • 来自: 杭州
最近访客 更多访客>>
社区版块
存档分类
最新评论

读取和讯网十大流通股东数据

阅读更多
import java.io.*;
import java.net.*;
import java.util.regex.*;

public class HexunData {
	private static String getHexunIn(String urlString) {
		StringBuffer hexun = new StringBuffer();
		try {
			// 获取所要访问的URL
			URL url = new URL(urlString);
			URLConnection uc = url.openConnection();
			BufferedReader reader = new BufferedReader(new InputStreamReader(uc
					.getInputStream()));// 创建输入流
			String line = null;
			while ((line = reader.readLine()) != null) {
				hexun.append(line + " ");
			}
			reader.close();
		} catch (MalformedURLException e) {
			System.out.print("invalid url:" + urlString);
		} catch (IOException e) {
			e.printStackTrace();
		}
		return hexun.toString();
	}

	public void extractor(String urlString) throws IOException {
		FileOutputStream fos = new FileOutputStream("E:\\HexunData_股东人数.txt");
		OutputStreamWriter ows = new OutputStreamWriter(fos);
		try {
			// 获得网页文本内容
			String str = HexunData.getHexunIn(urlString);
			// 创建提取股票数据来源的正则表达式
			/*
			 * Pattern gp_source = Pattern .compile("(?<=<th colspan=\"7\">|\"blue\">|</FONT>).*?(?=<|FONT|</th>)");
			 * Matcher mc = gp_source.matcher(str); String s1; while (mc.find()) { //
			 * 提取股票数据来源 s1 = String.valueOf(mc.group()); ows.write(s1);
			 * System.out.printf("%s", mc.group()); } System.out.println();
			 * ows.write("\r\n");
			 */

			String s2;
			Pattern gp_item = Pattern
					.compile("(?<=<strong>).*?(?=(</strong>))");
			Matcher n = gp_item.matcher(str);
			while (n.find()) {
				s2 = String.valueOf(n.group());
				ows.write(s2 + "          ");
				System.out.printf("%-40s", n.group());
			}

			Pattern gp_data = Pattern
					.compile("(?<=<span class=\"font10\">).*?(?=(</span>))");
			Matcher m = gp_data.matcher(str);
			String s3;
			int i = 0;
			while (m.find()) {
				if (i == 0)
					System.out.println();
				i++;
				s3 = String.valueOf(m.group());
				ows.write(s3 + "          ");

				System.out.printf("%-20s", m.group());
				if (i % 6 == 0) {
					System.out.println();
					ows.write("\r\n");
				}
			}
		} catch (IOException e) {
			e.printStackTrace();
		} catch (PatternSyntaxException e) {
			System.out.println("Regular expression syntax error");
		} catch (IllegalStateException e) {
			System.out.println("Do not find the pattern");
		} finally {
			if (ows != null) {
				ows.close();
				fos.close();
			}
		}
	}

	public static void main(String[] args) {
		HexunData test = new HexunData();
		try {
			test
					.extractor("http://stockdata.stock.hexun.com/2009_cgjzd_601398.shtml");
		} catch (IOException e) {
			e.printStackTrace();
		}

	}
}
分享到:
评论
1 楼 teaofnit 2009-11-29  
不能光贴个代码,没个说明啊。

相关推荐

Global site tag (gtag.js) - Google Analytics