Yhdjsndbxjx

← разместить еще код

Gbdbdnxbxjdbx

Расшаренный код:

package ru.sanek;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

import com.google.gson.Gson;

public class HtmlParser {
	public static String GetPage(String urlString,String ipProxy, int portProxy) throws IOException {
		String result = null;
			URL url = new URL(urlString);
			  Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ipProxy, portProxy)); // or whatever your proxy is
			  HttpURLConnection uc = (HttpURLConnection)url.openConnection(proxy);
			  uc.connect();
			    String line = null;
			    StringBuffer tmp = new StringBuffer();
			    BufferedReader in = new BufferedReader(new InputStreamReader(uc.getInputStream()));
			    while ((line = in.readLine()) != null) {
			      tmp.append(line);
			    }
			    result = String.valueOf(tmp);
		return result;
	}
	
	public static int GetCount(Document doc) {
		int count = 0;
		count = Integer.parseInt(doc
				.getElementById("content")
				.select("div.pagination > label")
				.first()
				.text()
				.replaceAll("[^0-9]", ""));
		return count;
	}
	
	public static ArrayList<HashMap<String, String>> GetTableData(Document doc) {
		ArrayList<HashMap<String, String>> table = new ArrayList<>();
		Element tableEl = doc
		.getElementById("content")
		.select("table.persons.notaries")
		.first();
		for (Element row : tableEl.select("tbody > tr")) {
			HashMap<String, String> hashMap = new HashMap<String, String>();
			for (Element col : row.select("td:not(.empty)")) {
				hashMap.put(tableEl.select("thead > tr > th:eq(" + col.elementSiblingIndex() +")").text(),col.text());
				//System.out.println(col.text() + ":" + tableEl.select("thead > tr > th:eq(" + col.elementSiblingIndex() +")").text());
				
			}
			//System.out.println(hashMap);
			table.add(hashMap);
		}
		//System.out.println(table);
		return table;
	}
	public static String Parse(String url, String proxyIp, int proxyPort) throws IOException {
		ArrayList<HashMap<String, String>> data = new ArrayList<>();
		String result = null;
		int count;
		int max = 100;
		Document doc = Jsoup.parse(GetPage(url,proxyIp,proxyPort));
		System.out.println(GetCount(doc));
		count = GetCount(doc);
		GetTableData(doc);
		//count = 300;
		for (int i = 0;i <= count/max;i++) {
			//System.out.println(url + "?max=" + max + "&offset=" + i*max);
			data.addAll(GetTableData(Jsoup.parse(GetPage(url + "?max=" + max + "&offset=" + i*max,proxyIp,proxyPort))));
		}
		
		Gson gson = new Gson();
		String jsonInString = gson.toJson(data);
		System.out.println(jsonInString);
		result = doc.text();
		return result;
	}
	
	
	public static void main(String[] args) throws IOException {
		// TODO Auto-generated method stub
		for (int i = 0; i < 10; i++) {

			MultiThread thread = new MultiThread();
	           thread.start();

	       }
		String url = "http://lawyers.minjust.ru/Lawyers";
		String proxyIp = "176.111.73.57";
		int proxyPort = 8081;
		//System.out.println(Parse(url,proxyIp,proxyPort));
	}
	


		       

}