Skip to content

Commit

Permalink
Merge pull request zaproxy#2779 from thc202/spider-submit-fields
Browse files Browse the repository at this point in the history
Use all form submit fields when spidering
  • Loading branch information
psiinon authored Aug 25, 2016
2 parents fc44eaf + bfc5a40 commit 89bc7f1
Show file tree
Hide file tree
Showing 21 changed files with 1,080 additions and 83 deletions.
242 changes: 174 additions & 68 deletions src/org/zaproxy/zap/spider/parser/SpiderHtmlFormParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
Expand Down Expand Up @@ -56,15 +57,19 @@ public class SpiderHtmlFormParser extends SpiderParser {
private static final String DEFAULT_PASS_VALUE = DEFAULT_TEXT_VALUE;

/** The spider parameters. */
SpiderParam param;
private final SpiderParam param;

/**
* Instantiates a new spider html form parser.
*
* @param param the parameters for the spider
* @throws IllegalArgumentException if {@code param} is null.
*/
public SpiderHtmlFormParser(SpiderParam param) {
super();
if (param == null) {
throw new IllegalArgumentException("Parameter param must not be null.");
}
this.param = param;
}

Expand All @@ -82,12 +87,7 @@ public boolean parseResource(HttpMessage message, Source source, int depth) {
}

// Get the context (base url)
String baseURL;
if (message == null) {
baseURL = "";
} else {
baseURL = message.getRequestHeader().getURI().toString();
}
String baseURL = message.getRequestHeader().getURI().toString();

// Try to see if there's any BASE tag that could change the base URL
Element base = source.getFirstElement(HTMLElementName.BASE);
Expand Down Expand Up @@ -121,37 +121,39 @@ public boolean parseResource(HttpMessage message, Source source, int depth) {
continue;
}

// Prepare data set
List<HtmlParameter> formDataSet = prepareFormDataSet(form.getFormFields());
FormData formData = prepareFormDataSet(form.getFormFields());

// Process the case of a POST method
if (method != null && method.trim().equalsIgnoreCase(METHOD_POST)) {
String query = "";
// Build the absolute canonical URL
String fullURL = URLCanonicalizer.getCanonicalURL(action, baseURL);
if (fullURL == null) {
return false;
}
log.debug("Canonical URL constructed using '" + action + "': " + fullURL);

/*
* Ignore encoding, as we will not POST files anyway, so using
* "application/x-www-form-urlencoded" is adequate
*/
// String encoding = form.getAttributeValue("enctype");
// if (encoding != null && encoding.equals("multipart/form-data"))
query = buildEncodedUrlQuery(formDataSet);
log.debug("Submiting form with POST method and message body with form parameters (normal encoding): "
+ query);

// Build the absolute canonical URL
String fullURL = URLCanonicalizer.getCanonicalURL(action, baseURL);
if (fullURL == null) {
return false;
String baseRequestBody = buildEncodedUrlQuery(formData.getFields());
if (formData.getSubmitFields().isEmpty()) {
notifyPostResourceFound(message, depth, fullURL, baseRequestBody);
continue;
}

log.debug("Canonical URL constructed using '" + action + "': " + fullURL);
notifyListenersPostResourceFound(message, depth + 1, fullURL, query);
for (HtmlParameter submitField : formData.getSubmitFields()) {
notifyPostResourceFound(
message,
depth,
fullURL,
appendEncodedUrlQueryParameter(baseRequestBody, submitField));
}

} // Process anything else as a GET method
else {
String query = buildEncodedUrlQuery(formDataSet);
log.debug("Submiting form with GET method and query with form parameters: " + query);

// Clear the fragment, if any, as it does not have any relevance for the server
if (action.contains("#")) {
int fs = action.lastIndexOf("#");
Expand All @@ -161,12 +163,12 @@ public boolean parseResource(HttpMessage message, Source source, int depth) {
// Process the final URL
if (action.contains("?")) {
if (action.endsWith("?")) {
processURL(message, depth, action + query, baseURL);
processGetForm(message, depth, action, baseURL, formData);
} else {
processURL(message, depth, action + "&" + query, baseURL);
processGetForm(message, depth, action + "&", baseURL, formData);
}
} else {
processURL(message, depth, action + "?" + query, baseURL);
processGetForm(message, depth, action + "?", baseURL, formData);
}
}

Expand All @@ -175,6 +177,32 @@ public boolean parseResource(HttpMessage message, Source source, int depth) {
return false;
}

/**
* Processes the given GET form data into, possibly, several URLs.
* <p>
* For each submit field present in the form data is processed one URL, which includes remaining normal fields.
*
* @param message the source message
* @param depth the current depth
* @param action the action
* @param baseURL the base URL
* @param formData the GET form data
* @see #processURL(HttpMessage, int, String, String)
*/
private void processGetForm(HttpMessage message, int depth, String action, String baseURL, FormData formData) {
String baseQuery = buildEncodedUrlQuery(formData.getFields());
if (formData.getSubmitFields().isEmpty()) {
log.debug("Submiting form with GET method and query with form parameters: " + baseQuery);
processURL(message, depth, action + baseQuery, baseURL);
} else {
for (HtmlParameter submitField : formData.getSubmitFields()) {
String query = appendEncodedUrlQueryParameter(baseQuery, submitField);
log.debug("Submiting form with GET method and query with form parameters: " + query);
processURL(message, depth, action + query, baseURL);
}
}
}

/**
* Prepares the form data set. A form data set is a sequence of control-name/current-value pairs
* constructed from successful controls, which will be sent with a GET/POST request for a form.
Expand All @@ -186,8 +214,9 @@ public boolean parseResource(HttpMessage message, Source source, int depth) {
* @param form the form
* @return the list
*/
private List<HtmlParameter> prepareFormDataSet(FormFields form) {
private FormData prepareFormDataSet(FormFields form) {
List<HtmlParameter> formDataSet = new LinkedList<>();
List<HtmlParameter> submitFields = new ArrayList<>();

// Process each form field
Iterator<FormField> it = form.iterator();
Expand All @@ -197,55 +226,86 @@ private List<HtmlParameter> prepareFormDataSet(FormFields form) {
log.debug("New form field: " + field.getDebugInfo());
}

// Get its value(s)
List<String> values = field.getValues();
if (log.isDebugEnabled()) {
log.debug("Existing values: " + values);
List<HtmlParameter> currentList = formDataSet;
if (field.getFormControl().getFormControlType().isSubmit()) {
currentList = submitFields;
}
for (String value : getValues(field)) {
currentList.add(new HtmlParameter(Type.form, field.getName(), value));
}
}

// If there are no values at all or only an empty value
if (values.isEmpty() || (values.size() == 1 && values.get(0).isEmpty())) {
String finalValue = DEFAULT_EMPTY_VALUE;
return new FormData(formDataSet, submitFields);
}

// Check if we can use predefined values
Collection<String> predefValues = field.getPredefinedValues();
if (!predefValues.isEmpty()) {
// Try first elements
Iterator<String> iterator = predefValues.iterator();
finalValue = iterator.next();
/**
* Gets the values for the given {@code field}.
* <p>
* If the field is of submit type it returns its predefined values.
*
* @param field the field
* @return a list with the values
* @see #getDefaultTextValue(FormField)
*/
private static List<String> getValues(FormField field) {
if (field.getFormControl().getFormControlType().isSubmit()) {
return new ArrayList<>(field.getPredefinedValues());
}

// If there are more values, don't use the first, as it usually is a "No select"
// item
if (iterator.hasNext()) {
finalValue = iterator.next();
}
} else {
/*
* In all cases, according to Jericho documentation, the only left option is for
* it to be a TEXT field, without any predefined value. We check if it has only
* one userValueCount, and, if so, fill it with a default value.
*/
if (field.getUserValueCount() > 0) {
finalValue = getDefaultTextValue(field);
}
}
// Get its value(s)
List<String> values = field.getValues();
if (log.isDebugEnabled()) {
log.debug("Existing values: " + values);
}

// Save the finalValue in the FormDataSet
log.debug("No existing value for field " + field.getName() + ". Generated: " + finalValue);
HtmlParameter p = new HtmlParameter(Type.form, field.getName(), finalValue);
formDataSet.add(p);
}
// If there are preselected values for the fields, use them
else {
for (String v : values) {
// Save the finalValue in the FormDataSet
HtmlParameter p = new HtmlParameter(Type.form, field.getName(), v);
formDataSet.add(p);
// If there are no values at all or only an empty value
if (values.isEmpty() || (values.size() == 1 && values.get(0).isEmpty())) {
String finalValue = DEFAULT_EMPTY_VALUE;

// Check if we can use predefined values
Collection<String> predefValues = field.getPredefinedValues();
if (!predefValues.isEmpty()) {
// Try first elements
Iterator<String> iterator = predefValues.iterator();
finalValue = iterator.next();

// If there are more values, don't use the first, as it usually is a "No select"
// item
if (iterator.hasNext()) {
finalValue = iterator.next();
}
} else {
/*
* In all cases, according to Jericho documentation, the only left option is for
* it to be a TEXT field, without any predefined value. We check if it has only
* one userValueCount, and, if so, fill it with a default value.
*/
if (field.getUserValueCount() > 0) {
finalValue = getDefaultTextValue(field);
}
}

log.debug("No existing value for field " + field.getName() + ". Generated: " + finalValue);

values = new ArrayList<>(1);
values.add(finalValue);
}

return formDataSet;
return values;
}

/**
* Notifies listeners that a new POST resource was found.
*
* @param message the source message
* @param depth the current depth
* @param url the URL of the resource
* @param requestBody the request body
* @see #notifyListenersPostResourceFound(HttpMessage, int, String, String)
*/
private void notifyPostResourceFound(HttpMessage message, int depth, String url, String requestBody) {
log.debug("Submiting form with POST method and message body with form parameters (normal encoding): " + requestBody);
notifyListenersPostResourceFound(message, depth + 1, url, requestBody);
}

/**
Expand All @@ -268,7 +328,7 @@ private List<HtmlParameter> prepareFormDataSet(FormFields form) {
* @param field the field
* @return the default text value
*/
private String getDefaultTextValue(FormField field) {
private static String getDefaultTextValue(FormField field) {
FormControl fc = field.getFormControl();
if (fc.getFormControlType() == FormControlType.TEXT) {
// If the control type was reduced to a TEXT type by the Jericho library, check the
Expand Down Expand Up @@ -370,9 +430,55 @@ private String buildEncodedUrlQuery(List<HtmlParameter> formDataSet) {
return request.toString();
}

/**
* Appends the given {@code parameter} into the given {@code query}.
*
* @param query the query
* @param parameter the parameter to append
* @return the query with the parameter appended
*/
private static String appendEncodedUrlQueryParameter(String query, HtmlParameter parameter) {
StringBuilder strBuilder = new StringBuilder(query);
if (strBuilder.length() != 0) {
strBuilder.append('&');
}
try {
strBuilder.append(URLEncoder.encode(parameter.getName(), ENCODING_TYPE))
.append('=')
.append(URLEncoder.encode(parameter.getValue(), ENCODING_TYPE));
} catch (UnsupportedEncodingException e) {
log.warn("Error while encoding query for form.", e);
}
return strBuilder.toString();
}

@Override
public boolean canParseResource(HttpMessage message, String path, boolean wasAlreadyConsumed) {
// Fallback parser - if it's a HTML message which has not already been processed
return !wasAlreadyConsumed && message.getResponseHeader().isHtml();
}

/**
* The fields (and its values) of a HTML form.
* <p>
* Normal fields and submit fields are kept apart.
*/
private static class FormData {

private final List<HtmlParameter> fields;
private final List<HtmlParameter> submitFields;

public FormData(List<HtmlParameter> fields, List<HtmlParameter> submitFields) {
this.fields = fields;
this.submitFields = submitFields;
}

public List<HtmlParameter> getFields() {
return fields;
}

public List<HtmlParameter> getSubmitFields() {
return submitFields;
}
}
}
Loading

0 comments on commit 89bc7f1

Please sign in to comment.