-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwebForm.py
152 lines (140 loc) · 5.71 KB
/
webForm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import sys
import webbrowser
import requests as req
from bs4 import BeautifulSoup as bs
from requests_html import HTMLSession
from urllib.parse import urljoin, urlparse
from pprint import pprint
# English : How to Extract and Submit Web Forms from a URL using Python
# Türkçe : Python kullanarak bir URL'den Web Formlarını Çıkarma ve Gönderme
# kurulum : pip install requests_html
# pip install requests
# pip install bs4
class Form:
def __init__(self, url):
self.session = HTMLSession()
self.url = url
def is_valid(self):
"""
English:
Checks whether `url` is a valid URL.
Türkçe:
"Url" nin geçerli bir URL olup olmadığını kontrol eder.
"""
temp = urlparse(self.url)
# temp.netloc -> example.com
# temp.scheme -> https
return bool(temp.netloc) and bool(temp.scheme)
def get_all_forms(self):
"""
English:
Returns all form tags found on a web page's `url`
Türkçe:
Bir web sayfasının "url" sinde bulunan tüm form etiketlerini döndürür
"""
# GET request
response = self.session.get(self.url)
# for javascript driven website / javascript tabanlı web siteleri için
# res.html.render()
# for html driven website / html tabanlı web siteleri için
# res.html.html
soup = bs(response.html.html, "html.parser")
return soup.find_all("form")
def get_form_details(self, form):
"""
English:
Returns the HTML details of a form, including action,
method and list of form controls (inputs, etc)
Türkçe:
Eylem, yöntem ve form denetimlerinin listesi (girdiler,
vb.) Dahil olmak üzere bir formun HTML ayrıntılarını verir
"""
details = {} # detaylar
# get the form action (target url) / form eylemini al (hedef url)
try:
action = form.attrs.get("action").lower()
except:
action = None
# get the form method (POST, GET, etc.) / form yöntemini alın (POST, GET, vb.)
method = form.attrs.get("method", "get").lower()
# get all the input details such as type and name / tür ve ad gibi tüm giriş ayrıntılarını alın
inputs = []
for input_tag in form.find_all("input"):
input_type = input_tag.attrs.get("type","text")
input_name = input_tag.attrs.get("name")
input_value = input_tag.attrs.get("value", "")
inputs.append({"type": input_type, "name": input_name, "value": input_value})
details["action"] = action
details["method"] = method
details["inputs"] = inputs
return details
def submit(self, value):
"""
English:
Submitting Web Forms
Türkçe:
Web formlarin gönderme
"""
# get the first form / ilk form getir
first_form = self.get_all_forms()[0]
# extract all form details / tüm form ayrıntılarını cikar
formDetails = self.get_form_details(first_form)
# the data body we want to submit / göndermek istediğimiz veri gövdesi
data = {}
for input_tag in formDetails["inputs"]:
if input_tag["type"] == "hiddden":
data[input_tag["name"]] = input_tag["value"]
elif input_tag["type"] != "submit":
# value = input(f"Enter the value of the field '{input_tag['name']}' (type: {input_tag['type']}): ")
data[input_tag["name"]] = value
new_url = urljoin(self.url, formDetails["action"])
if formDetails["method"] == "post":
response = self.session.post(new_url,data=data)
elif formDetails["method"] == "get":
response = self.session.get(new_url, params=data)
# session.put () ve session.delete () yöntemleride var!!!
self.copy_site(response)
def copy_site(self, res):
soup = bs(res.content, "html.parser")
for link in soup.find_all("link"):
try:
link.attrs["href"] = urljoin(url, link.attrs["href"])
except:
pass
for script in soup.find_all("script"):
try:
script.attrs["src"] = urljoin(url, script.attrs["src"])
except:
pass
for img in soup.find_all("img"):
try:
img.attrs["src"] = urljoin(url, img.attrs["src"])
except:
pass
for a in soup.find_all("a"):
try:
a.attrs["href"] = urljoin(url, a.attrs["href"])
except:
pass
open("page.html","w",encoding="utf-8").write(str(soup))
webbrowser.open("page.html")
def printALL(self):
forms = self.get_all_forms()
for i, form in enumerate(forms, start=1):
formDetails = self.get_form_details(form)
print("="*50, f"form #{i}", "="*50)
pprint(formDetails)
if __name__ == "__main__":
if len(sys.argv) < 2:
print("You did not enter a URL.") # URL girmediniz.
print("Example: https://example.com") # Örnek: https://example.com
sys.exit()
url = sys.argv[1]
form = Form(url)
if form.is_valid():
form.printALL()
if len(sys.argv) == 3:
form.submit(sys.argv[2])
else:
print("Please enter a valid URL") # Lütfen geçerli bir adres girin
print("Example: https://example.com") # Örnek: https://example.com