This repository has been archived by the owner on Jul 8, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlazada.py
48 lines (40 loc) · 1.76 KB
/
lazada.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Web Scraping
from selenium import webdriver
from selenium.common.exceptions import *
# Data manipulation
import pandas as pd
def LazadaSearch(search_item):
# Intializing global variables
webdriver_path = "chromedriver.exe"
Lazada_url = 'https://www.lazada.sg/' # Your lazada website URL (Singapore one is used here)
# Select custom Chrome options
options = webdriver.ChromeOptions()
options.add_argument('--headless') # Run script with browser in background
options.add_argument('start-maximized')
options.add_argument('disable-infobars')
options.add_argument('--disable-extensions')
options.add_argument('--log-level=OFF')
# Open the Chrome browser
browser = webdriver.Chrome(webdriver_path, options=options)
browser.get(Lazada_url)
# Search Bar
search_bar = browser.find_element_by_id('q')
search_bar.send_keys(search_item)
search_bar.submit()
item_items = browser.find_elements_by_class_name('c16H9d')
item_prices = browser.find_elements_by_class_name('c13VH6')
# Initialize empty lists
items_list = []
prices_list = []
links_list = []
# Loop over the item_items and item_prices
for item in item_items:
items_list.append(item.text)
links_list.append(item.find_element_by_xpath('a').get_attribute("href"))
for price in item_prices:
prices_list.append(price.text)
# Convert the two lists into a dataframe
lazadaPD = pd.DataFrame(zip(items_list, prices_list, links_list), columns=['ItemName', 'Price ($)','Link'])
lazadaPD['Price ($)'] = lazadaPD['Price ($)'].str.replace('$', '').astype(float) # You might need to change "$" into whatever currency symbol is used based on which Lazada you access
lazadaPD['Platform'] = 'Lazada'
return lazadaPD