第一个问题:可以用Apache的HttpClient库来请求网页的HTML内容,然后通过HTML解析库比如Jsoup来获取Html中的元素。 第二个问题:可以用无头浏览器(Headless Browser),它可以模拟浏览器执行Js,来获取并生成html。 如果给一个url地址,可以通过以下代码获取该地址的html,并对html页面的元素进行操作: String url = "https://mobile.yangkeduo.com/goods.html?goods_id=484209795384"; try { // 1.创建URL对象和连接对象 URL urlObj = new URL(url); HttpURLConnection connection = (HttpURLConnection) urlObj.openConnection(); // 2.设置请求方法,获取响应代码和内容 connection.setRequestMethod("GET"); int responseCode = connection.getResponseCode(); if (responseCode == HttpURLConnection.HTTP_OK) { InputStream inputStream = connection.getInputStream(); BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream)); String line; StringBuilder htmlContent = new StringBuilder(); while ((line = reader.readLine()) != null) { htmlContent.append(line); } reader.close(); inputStream.close(); //3.使用 Jsoup 解析 HTML Document document = Jsoup.parse(htmlContent.toString()); //4.操作html中的a元素 Elements links = document.select("a"); for (Element link : links) { String linkText = link.text(); String linkUrl = link.attr("href"); } } else { System.out.println("HTTP request failed with response code: " + responseCode); } connection.disconnect(); } catch (IOException e) { e.printStackTrace(); }