import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
public class pdffiledownloadfromwebsite {
public static void main(String[] args) throws MalformedURLException, IOException {
List<String> uniqurl = new ArrayList();
// Initialize Webdriver driver
WebDriver driver = new HtmlUnitDriver();
// Go to pdf page
driver.get("http://www.banglakitab.com/kitab.htm");
// get all page urls
List<WebElement> pdfurllist = driver.findElements(By.tagName("a"));
for (WebElement elemnet : pdfurllist) {
String pdfurl = elemnet.getAttribute("href").trim();
//check pdf url
if (pdfurl.contains(".pdf")) {
// check one file download one time
if (!(uniqurl.contains(pdfurl))) {
uniqurl.add(pdfurl);
//print file name and download url
System.out.println(filename(pdfurl) + " ---> " + pdfurl);
//download file
URL url = new URL(pdfurl);
InputStream in = new BufferedInputStream(url.openStream());
OutputStream out = new BufferedOutputStream(new FileOutputStream(filename(pdfurl) + ".pdf"));
for (int i; (i = in.read()) != -1;) {
out.write(i);
}
in.close();
out.close();
} }
}
// close driver
driver.quit();
}
// get file name. It depend on download url pattent.
private static String filename(String url) {
String file[] = url.split("\\/");
String filename[] = file[4].split("\\.");
return filename[0];
}
}
import java.io.BufferedOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
public class pdffiledownloadfromwebsite {
public static void main(String[] args) throws MalformedURLException, IOException {
List<String> uniqurl = new ArrayList();
// Initialize Webdriver driver
WebDriver driver = new HtmlUnitDriver();
// Go to pdf page
driver.get("http://www.banglakitab.com/kitab.htm");
// get all page urls
List<WebElement> pdfurllist = driver.findElements(By.tagName("a"));
for (WebElement elemnet : pdfurllist) {
String pdfurl = elemnet.getAttribute("href").trim();
//check pdf url
if (pdfurl.contains(".pdf")) {
// check one file download one time
if (!(uniqurl.contains(pdfurl))) {
uniqurl.add(pdfurl);
//print file name and download url
System.out.println(filename(pdfurl) + " ---> " + pdfurl);
//download file
URL url = new URL(pdfurl);
InputStream in = new BufferedInputStream(url.openStream());
OutputStream out = new BufferedOutputStream(new FileOutputStream(filename(pdfurl) + ".pdf"));
for (int i; (i = in.read()) != -1;) {
out.write(i);
}
in.close();
out.close();
} }
}
// close driver
driver.quit();
}
// get file name. It depend on download url pattent.
private static String filename(String url) {
String file[] = url.split("\\/");
String filename[] = file[4].split("\\.");
return filename[0];
}
}
No comments:
Post a Comment