Skip to content

Commit

Permalink
Use all form submit fields when spidering
Browse files Browse the repository at this point in the history
Change class SpiderHtmlFormParser to use all submit fields when
spidering, using each submit field in turn with remaining form fields.
Add some tests to assert the expected behaviour of SpiderHtmlFormParser.
Other minor changes were done to "normalise" the behaviour/expectations
of class SpiderHtmlFormParser:
 - Throw an exception when creating an instance with null SpiderParam,
 it's required to check if HTML POST forms should be parsed;
 - Change the instance variable "param" to private and final.
 - Expect always a message when parsing (letting a NullPointerException
 be thrown if not).

Fix zaproxy#2748 - ZAP Spidering HTML Forms with multiple submit buttons
  • Loading branch information
thc202 committed Aug 24, 2016
1 parent 0b2a005 commit bfc5a40
Show file tree
Hide file tree
Showing 21 changed files with 1,080 additions and 83 deletions.
242 changes: 174 additions & 68 deletions src/org/zaproxy/zap/spider/parser/SpiderHtmlFormParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.Iterator;
Expand Down Expand Up @@ -56,15 +57,19 @@ public class SpiderHtmlFormParser extends SpiderParser {
private static final String DEFAULT_PASS_VALUE = DEFAULT_TEXT_VALUE;

/** The spider parameters. */
SpiderParam param;
private final SpiderParam param;

/**
* Instantiates a new spider html form parser.
*
* @param param the parameters for the spider
* @throws IllegalArgumentException if {@code param} is null.
*/
public SpiderHtmlFormParser(SpiderParam param) {
super();
if (param == null) {
throw new IllegalArgumentException("Parameter param must not be null.");
}
this.param = param;
}

Expand All @@ -82,12 +87,7 @@ public boolean parseResource(HttpMessage message, Source source, int depth) {
}

// Get the context (base url)
String baseURL;
if (message == null) {
baseURL = "";
} else {
baseURL = message.getRequestHeader().getURI().toString();
}
String baseURL = message.getRequestHeader().getURI().toString();

// Try to see if there's any BASE tag that could change the base URL
Element base = source.getFirstElement(HTMLElementName.BASE);
Expand Down Expand Up @@ -121,37 +121,39 @@ public boolean parseResource(HttpMessage message, Source source, int depth) {
continue;
}

// Prepare data set
List<HtmlParameter> formDataSet = prepareFormDataSet(form.getFormFields());
FormData formData = prepareFormDataSet(form.getFormFields());

// Process the case of a POST method
if (method != null && method.trim().equalsIgnoreCase(METHOD_POST)) {
String query = "";
// Build the absolute canonical URL
String fullURL = URLCanonicalizer.getCanonicalURL(action, baseURL);
if (fullURL == null) {
return false;
}
log.debug("Canonical URL constructed using '" + action + "': " + fullURL);

/*
* Ignore encoding, as we will not POST files anyway, so using
* "application/x-www-form-urlencoded" is adequate
*/
// String encoding = form.getAttributeValue("enctype");
// if (encoding != null && encoding.equals("multipart/form-data"))
query = buildEncodedUrlQuery(formDataSet);
log.debug("Submiting form with POST method and message body with form parameters (normal encoding): "
+ query);

// Build the absolute canonical URL
String fullURL = URLCanonicalizer.getCanonicalURL(action, baseURL);
if (fullURL == null) {
return false;
String baseRequestBody = buildEncodedUrlQuery(formData.getFields());
if (formData.getSubmitFields().isEmpty()) {
notifyPostResourceFound(message, depth, fullURL, baseRequestBody);
continue;
}

log.debug("Canonical URL constructed using '" + action + "': " + fullURL);
notifyListenersPostResourceFound(message, depth + 1, fullURL, query);
for (HtmlParameter submitField : formData.getSubmitFields()) {
notifyPostResourceFound(
message,
depth,
fullURL,
appendEncodedUrlQueryParameter(baseRequestBody, submitField));
}

} // Process anything else as a GET method
else {
String query = buildEncodedUrlQuery(formDataSet);
log.debug("Submiting form with GET method and query with form parameters: " + query);

// Clear the fragment, if any, as it does not have any relevance for the server
if (action.contains("#")) {
int fs = action.lastIndexOf("#");
Expand All @@ -161,12 +163,12 @@ public boolean parseResource(HttpMessage message, Source source, int depth) {
// Process the final URL
if (action.contains("?")) {
if (action.endsWith("?")) {
processURL(message, depth, action + query, baseURL);
processGetForm(message, depth, action, baseURL, formData);
} else {
processURL(message, depth, action + "&" + query, baseURL);
processGetForm(message, depth, action + "&", baseURL, formData);
}
} else {
processURL(message, depth, action + "?" + query, baseURL);
processGetForm(message, depth, action + "?", baseURL, formData);
}
}

Expand All @@ -175,6 +177,32 @@ public boolean parseResource(HttpMessage message, Source source, int depth) {
return false;
}

/**
* Processes the given GET form data into, possibly, several URLs.
* <p>
* For each submit field present in the form data is processed one URL, which includes remaining normal fields.
*
* @param message the source message
* @param depth the current depth
* @param action the action
* @param baseURL the base URL
* @param formData the GET form data
* @see #processURL(HttpMessage, int, String, String)
*/
private void processGetForm(HttpMessage message, int depth, String action, String baseURL, FormData formData) {
String baseQuery = buildEncodedUrlQuery(formData.getFields());
if (formData.getSubmitFields().isEmpty()) {
log.debug("Submiting form with GET method and query with form parameters: " + baseQuery);
processURL(message, depth, action + baseQuery, baseURL);
} else {
for (HtmlParameter submitField : formData.getSubmitFields()) {
String query = appendEncodedUrlQueryParameter(baseQuery, submitField);
log.debug("Submiting form with GET method and query with form parameters: " + query);
processURL(message, depth, action + query, baseURL);
}
}
}

/**
* Prepares the form data set. A form data set is a sequence of control-name/current-value pairs
* constructed from successful controls, which will be sent with a GET/POST request for a form.
Expand All @@ -186,8 +214,9 @@ public boolean parseResource(HttpMessage message, Source source, int depth) {
* @param form the form
* @return the list
*/
private List<HtmlParameter> prepareFormDataSet(FormFields form) {
private FormData prepareFormDataSet(FormFields form) {
List<HtmlParameter> formDataSet = new LinkedList<>();
List<HtmlParameter> submitFields = new ArrayList<>();

// Process each form field
Iterator<FormField> it = form.iterator();
Expand All @@ -197,55 +226,86 @@ private List<HtmlParameter> prepareFormDataSet(FormFields form) {
log.debug("New form field: " + field.getDebugInfo());
}

// Get its value(s)
List<String> values = field.getValues();
if (log.isDebugEnabled()) {
log.debug("Existing values: " + values);
List<HtmlParameter> currentList = formDataSet;
if (field.getFormControl().getFormControlType().isSubmit()) {
currentList = submitFields;
}
for (String value : getValues(field)) {
currentList.add(new HtmlParameter(Type.form, field.getName(), value));
}
}

// If there are no values at all or only an empty value
if (values.isEmpty() || (values.size() == 1 && values.get(0).isEmpty())) {
String finalValue = DEFAULT_EMPTY_VALUE;
return new FormData(formDataSet, submitFields);
}

// Check if we can use predefined values
Collection<String> predefValues = field.getPredefinedValues();
if (!predefValues.isEmpty()) {
// Try first elements
Iterator<String> iterator = predefValues.iterator();
finalValue = iterator.next();
/**
* Gets the values for the given {@code field}.
* <p>
* If the field is of submit type it returns its predefined values.
*
* @param field the field
* @return a list with the values
* @see #getDefaultTextValue(FormField)
*/
private static List<String> getValues(FormField field) {
if (field.getFormControl().getFormControlType().isSubmit()) {
return new ArrayList<>(field.getPredefinedValues());
}

// If there are more values, don't use the first, as it usually is a "No select"
// item
if (iterator.hasNext()) {
finalValue = iterator.next();
}
} else {
/*
* In all cases, according to Jericho documentation, the only left option is for
* it to be a TEXT field, without any predefined value. We check if it has only
* one userValueCount, and, if so, fill it with a default value.
*/
if (field.getUserValueCount() > 0) {
finalValue = getDefaultTextValue(field);
}
}
// Get its value(s)
List<String> values = field.getValues();
if (log.isDebugEnabled()) {
log.debug("Existing values: " + values);
}

// Save the finalValue in the FormDataSet
log.debug("No existing value for field " + field.getName() + ". Generated: " + finalValue);
HtmlParameter p = new HtmlParameter(Type.form, field.getName(), finalValue);
formDataSet.add(p);
}
// If there are preselected values for the fields, use them
else {
for (String v : values) {
// Save the finalValue in the FormDataSet
HtmlParameter p = new HtmlParameter(Type.form, field.getName(), v);
formDataSet.add(p);
// If there are no values at all or only an empty value
if (values.isEmpty() || (values.size() == 1 && values.get(0).isEmpty())) {
String finalValue = DEFAULT_EMPTY_VALUE;

// Check if we can use predefined values
Collection<String> predefValues = field.getPredefinedValues();
if (!predefValues.isEmpty()) {
// Try first elements
Iterator<String> iterator = predefValues.iterator();
finalValue = iterator.next();

// If there are more values, don't use the first, as it usually is a "No select"
// item
if (iterator.hasNext()) {
finalValue = iterator.next();
}
} else {
/*
* In all cases, according to Jericho documentation, the only left option is for
* it to be a TEXT field, without any predefined value. We check if it has only
* one userValueCount, and, if so, fill it with a default value.
*/
if (field.getUserValueCount() > 0) {
finalValue = getDefaultTextValue(field);
}
}

log.debug("No existing value for field " + field.getName() + ". Generated: " + finalValue);

values = new ArrayList<>(1);
values.add(finalValue);
}

return formDataSet;
return values;
}

/**
* Notifies listeners that a new POST resource was found.
*
* @param message the source message
* @param depth the current depth
* @param url the URL of the resource
* @param requestBody the request body
* @see #notifyListenersPostResourceFound(HttpMessage, int, String, String)
*/
private void notifyPostResourceFound(HttpMessage message, int depth, String url, String requestBody) {
log.debug("Submiting form with POST method and message body with form parameters (normal encoding): " + requestBody);
notifyListenersPostResourceFound(message, depth + 1, url, requestBody);
}

/**
Expand All @@ -268,7 +328,7 @@ private List<HtmlParameter> prepareFormDataSet(FormFields form) {
* @param field the field
* @return the default text value
*/
private String getDefaultTextValue(FormField field) {
private static String getDefaultTextValue(FormField field) {
FormControl fc = field.getFormControl();
if (fc.getFormControlType() == FormControlType.TEXT) {
// If the control type was reduced to a TEXT type by the Jericho library, check the
Expand Down Expand Up @@ -370,9 +430,55 @@ private String buildEncodedUrlQuery(List<HtmlParameter> formDataSet) {
return request.toString();
}

/**
* Appends the given {@code parameter} into the given {@code query}.
*
* @param query the query
* @param parameter the parameter to append
* @return the query with the parameter appended
*/
private static String appendEncodedUrlQueryParameter(String query, HtmlParameter parameter) {
StringBuilder strBuilder = new StringBuilder(query);
if (strBuilder.length() != 0) {
strBuilder.append('&');
}
try {
strBuilder.append(URLEncoder.encode(parameter.getName(), ENCODING_TYPE))
.append('=')
.append(URLEncoder.encode(parameter.getValue(), ENCODING_TYPE));
} catch (UnsupportedEncodingException e) {
log.warn("Error while encoding query for form.", e);
}
return strBuilder.toString();
}

@Override
public boolean canParseResource(HttpMessage message, String path, boolean wasAlreadyConsumed) {
// Fallback parser - if it's a HTML message which has not already been processed
return !wasAlreadyConsumed && message.getResponseHeader().isHtml();
}

/**
* The fields (and its values) of a HTML form.
* <p>
* Normal fields and submit fields are kept apart.
*/
private static class FormData {

private final List<HtmlParameter> fields;
private final List<HtmlParameter> submitFields;

public FormData(List<HtmlParameter> fields, List<HtmlParameter> submitFields) {
this.fields = fields;
this.submitFields = submitFields;
}

public List<HtmlParameter> getFields() {
return fields;
}

public List<HtmlParameter> getSubmitFields() {
return submitFields;
}
}
}
Loading

0 comments on commit bfc5a40

Please sign in to comment.