Paging in Simple HTML DOM XPATH
Enrique Gervaso
21/09/2016 11:51:32Kan iemand mij helpen met de mogelijkheid om in onderstaande script Paging mogelijk te maken?
Ik run de pagina mbv simple html dom. en start op de pagina:
http://www.imdb.com/search/title?year=2016,2016&title_type=feature&sort=moviemeter,asc
De XPATH om naar de volgende pagina te gaan is:
//*[@id="main"]/div/div/div[4]/div/a
Ik run de pagina mbv simple html dom. en start op de pagina:
http://www.imdb.com/search/title?year=2016,2016&title_type=feature&sort=moviemeter,asc
De XPATH om naar de volgende pagina te gaan is:
//*[@id="main"]/div/div/div[4]/div/a
Code (php)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
<?php
$current_year = date("Y");
//$current_year = "2015";
$movie_title = $row["title"];
//Start xpath scraping
$source = file_get_contents('http://www.imdb.com/search/title?year='.$current_year.','.$current_year.'&title_type=feature&sort=moviemeter,asc');
$dom = new DOMDocument();
@$dom->loadHTML($source);
$xpath = new DOMXPath($dom);
$rows = $xpath->query("//*[@id=\"main\"]/div/div/div[3]/div/div[3]/h3/a");
foreach($rows as $index => $row){
//echo ($index + 1) . ') ' . $row->textContent . '<br />';
$title_results = $row->textContent;
//preg_replace('/[^A-Za-z0-9\-\']/', '', $title_results);
$title_results_clean = preg_replace('/[^a-zA-Z0-9_ %\[\]\.\(\)%&-]/s', '', $title_results);
//echo $title_results_clean;
//Check if title exist
$query = mysqli_query($connection, "SELECT * FROM video WHERE title='".$title_results_clean."'");
if(mysqli_num_rows($query) > 0){
//echo "title already exists";
}else{
// insert the new item
mysqli_query($connection, "INSERT INTO video (`title`) VALUES ('".$title_results_clean."')");
}
}
?>
$current_year = date("Y");
//$current_year = "2015";
$movie_title = $row["title"];
//Start xpath scraping
$source = file_get_contents('http://www.imdb.com/search/title?year='.$current_year.','.$current_year.'&title_type=feature&sort=moviemeter,asc');
$dom = new DOMDocument();
@$dom->loadHTML($source);
$xpath = new DOMXPath($dom);
$rows = $xpath->query("//*[@id=\"main\"]/div/div/div[3]/div/div[3]/h3/a");
foreach($rows as $index => $row){
//echo ($index + 1) . ') ' . $row->textContent . '<br />';
$title_results = $row->textContent;
//preg_replace('/[^A-Za-z0-9\-\']/', '', $title_results);
$title_results_clean = preg_replace('/[^a-zA-Z0-9_ %\[\]\.\(\)%&-]/s', '', $title_results);
//echo $title_results_clean;
//Check if title exist
$query = mysqli_query($connection, "SELECT * FROM video WHERE title='".$title_results_clean."'");
if(mysqli_num_rows($query) > 0){
//echo "title already exists";
}else{
// insert the new item
mysqli_query($connection, "INSERT INTO video (`title`) VALUES ('".$title_results_clean."')");
}
}
?>
Er zijn nog geen reacties op dit bericht.