Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions .classpath
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JRE-1.1">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/bin
/target
37 changes: 37 additions & 0 deletions .project
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>WebCrawler</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.wst.common.project.facet.core.builder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.wst.validation.validationbuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
<nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature>
<nature>org.sonar.ide.eclipse.core.sonarNature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.wst.common.project.facet.core.nature</nature>
</natures>
</projectDescription>
12 changes: 12 additions & 0 deletions .settings/org.eclipse.jdt.core.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.compliance=1.7
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.7
4 changes: 4 additions & 0 deletions .settings/org.eclipse.m2e.core.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
7 changes: 7 additions & 0 deletions .settings/org.eclipse.wst.common.component
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?><project-modules id="moduleCoreId" project-version="1.5.0">
<wb-module deploy-name="WebCrawler">
<wb-resource deploy-path="/" source-path="/src/main/resources" tag="defaultRootSource"/>
<wb-resource deploy-path="/" source-path="/src"/>
<property name="java-output-path" value="/WebCrawler/target/classes"/>
</wb-module>
</project-modules>
5 changes: 5 additions & 0 deletions .settings/org.eclipse.wst.common.project.facet.core.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<faceted-project>
<installed facet="java" version="1.7"/>
<installed facet="jst.ejb" version="2.1"/>
</faceted-project>
2 changes: 2 additions & 0 deletions .settings/org.eclipse.wst.validation.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
disabled=06target
eclipse.preferences.version=1
5 changes: 5 additions & 0 deletions .settings/org.sonar.ide.eclipse.core.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
eclipse.preferences.version=1
extraProperties=
projectKey=WebCrawler\:WebCrawler
serverUrl=http\://localhost\:9000
version=2
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
JavaWebCrawler
==============

This is an implementation of <a href ="http://www.programcreek.com/2012/12/how-to-make-a-web-crawler-using-java/" >How to make a Web crawler using Java</a> forked from <a href="https://github.com/ryanlr/JavaWebCrawler">Ryan</a>.

How to start the crawler.

1. Run the scripts from scripts/DbScripts.sql
2. Run the code in eclipse
28 changes: 20 additions & 8 deletions WebCrawler/.classpath
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" path="src"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
<classpathentry kind="lib" path="lib/jsoup-1.6.2.jar"/>
<classpathentry kind="lib" path="lib/mysql-connector-java-5.1.20-bin.jar"/>
<classpathentry kind="output" path="bin"/>
</classpath>
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JRE-1.1">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
1 change: 1 addition & 0 deletions WebCrawler/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
/bin
/target
20 changes: 20 additions & 0 deletions WebCrawler/.project
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,33 @@
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.wst.common.project.facet.core.builder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.wst.validation.validationbuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
<nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature>
<nature>org.sonar.ide.eclipse.core.sonarNature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.wst.common.project.facet.core.nature</nature>
</natures>
</projectDescription>
1 change: 1 addition & 0 deletions WebCrawler/.settings/org.eclipse.jdt.core.prefs
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.7
4 changes: 4 additions & 0 deletions WebCrawler/.settings/org.eclipse.m2e.core.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
7 changes: 7 additions & 0 deletions WebCrawler/.settings/org.eclipse.wst.common.component
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?><project-modules id="moduleCoreId" project-version="1.5.0">
<wb-module deploy-name="WebCrawler">
<wb-resource deploy-path="/" source-path="/src/main/resources" tag="defaultRootSource"/>
<wb-resource deploy-path="/" source-path="/src"/>
<property name="java-output-path" value="/WebCrawler/target/classes"/>
</wb-module>
</project-modules>
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
<faceted-project>
<installed facet="java" version="1.7"/>
<installed facet="jst.ejb" version="2.1"/>
</faceted-project>
2 changes: 2 additions & 0 deletions WebCrawler/.settings/org.eclipse.wst.validation.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
disabled=06target
eclipse.preferences.version=1
5 changes: 5 additions & 0 deletions WebCrawler/.settings/org.sonar.ide.eclipse.core.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
eclipse.preferences.version=1
extraProperties=
projectKey=WebCrawler\:WebCrawler
serverUrl=http\://localhost\:9000
version=2
Binary file removed WebCrawler/lib/jsoup-1.6.2.jar
Binary file not shown.
Binary file removed WebCrawler/lib/mysql-connector-java-5.1.20-bin.jar
Binary file not shown.
32 changes: 32 additions & 0 deletions WebCrawler/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>WebCrawler</groupId>
<artifactId>WebCrawler</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>WebCrawler</name>
<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.3</source>
<target>1.1</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.3</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.32</version>
</dependency>
</dependencies>
</project>
7 changes: 7 additions & 0 deletions WebCrawler/scripts/DbScripts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
create database Crawler;
use Crawler;
CREATE TABLE IF NOT EXISTS `Record` (
`RecordID` INT(11) NOT NULL AUTO_INCREMENT,
`URL` text NOT NULL,
PRIMARY KEY (`RecordID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
2 changes: 1 addition & 1 deletion WebCrawler/src/DB.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public DB() {
try {
Class.forName("com.mysql.jdbc.Driver");
String url = "jdbc:mysql://localhost:3306/Crawler";
conn = DriverManager.getConnection(url, "root", "admin213");
conn = DriverManager.getConnection(url, "root", "root");
System.out.println("conn built");
} catch (SQLException e) {
e.printStackTrace();
Expand Down
32 changes: 32 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>WebCrawler</groupId>
<artifactId>WebCrawler</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>WebCrawler</name>
<build>
<sourceDirectory>src</sourceDirectory>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>1.3</source>
<target>1.1</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.7.3</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.32</version>
</dependency>
</dependencies>
</project>
7 changes: 7 additions & 0 deletions scripts/DbScripts.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
create database Crawler;
use Crawler;
CREATE TABLE IF NOT EXISTS `Record` (
`RecordID` INT(11) NOT NULL AUTO_INCREMENT,
`URL` text NOT NULL,
PRIMARY KEY (`RecordID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 AUTO_INCREMENT=1 ;
40 changes: 40 additions & 0 deletions src/DB.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;

public class DB {

public Connection conn = null;

public DB() {
try {
Class.forName("com.mysql.jdbc.Driver");
String url = "jdbc:mysql://localhost:3306/Crawler";
conn = DriverManager.getConnection(url, "root", "root");
System.out.println("conn built");
} catch (SQLException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
}

public ResultSet runSql(String sql) throws SQLException {
Statement sta = conn.createStatement();
return sta.executeQuery(sql);
}

public boolean runSql2(String sql) throws SQLException {
Statement sta = conn.createStatement();
return sta.execute(sql);
}

@Override
protected void finalize() throws Throwable {
if (conn != null || !conn.isClosed()) {
conn.close();
}
}
}
Loading