The content type "multipart/form-data" should be used for submitting forms that contain files, non-ASCII data, and binary data.
I won't delve deep into the details on multipart forms, but if you would like a good explanation, read the official W3C document. One thing if doesn't mention on there, is that the boundary can be between 1 and 70 characters long, consisting of alphanumeric, and the punctuation you see in the list. Spaces are allowed except at the end.
Code Snippet
- import itertools
- import mimetools
- import mimetypes
- from cStringIO import StringIO
- import urllib
- import urllib2
- class MultiPartForm(object):
- """Accumulate the data to be used when posting a form."""
- def __init__(self):
- self.form_fields = []
- self.files = []
- # Generate unique boundary string with the format: hostipaddr.uid.pid.timestamp.random
- self.boundary = mimetools.choose_boundary()
- return
- def get_content_type(self):
- return 'multipart/form-data; boundary=%s' % self.boundary
- def add_field(self, name, value):
- """Add a simple field to the form data."""
- self.form_fields.append((name, value))
- return
- def add_file(self, fieldname, filename, fileHandle, mimetype=None):
- """Add a file to be uploaded."""
- body = fileHandle.read()
- if mimetype is None:
- mimetype = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
- self.files.append((fieldname, filename, mimetype, body))
- return
- def __str__(self):
- """Return a string representing the form data, including attached files."""
- # Build a list of lists, each containing "lines" of the
- # request. Each part is separated by a boundary string.
- # Once the list is built, return a string where each
- # line is separated by '\r\n'.
- parts = []
- part_boundary = '--' + self.boundary
- # Add the form fields
- parts.extend(
- [ part_boundary,
- 'Content-Disposition: form-data; name="%s"' % name,
- '',
- value,
- ]
- for name, value in self.form_fields
- )
- # Add the files to upload
- parts.extend(
- [ part_boundary,
- 'Content-Disposition: file; name="%s"; filename="%s"' % \
- (field_name, filename),
- 'Content-Type: %s' % content_type,
- '',
- body,
- ]
- for field_name, filename, content_type, body in self.files
- )
- # Flatten the list and add closing boundary marker,
- # then return CR+LF separated data
- flattened = list(itertools.chain(*parts))
- flattened.append('--' + self.boundary + '--')
- flattened.append('')
- return '\r\n'.join(flattened)
- if __name__ == '__main__':
- # Create the form with simple fields
- form = MultiPartForm()
- """
- Either specify a source, file or a url...
- The system will take the first one respectively if all three have content.
- This example uses a dummy file called test.sql, with a string of content.
- """
- # No source specified
- form.add_field('source', '')
- # REAL FILE EXAMPLE
- # fileHandle = open ('test.sql', 'r')
- # form.add_file('file', 'test.sql', fileHandle)
- # fileHandle.close()
- # Use dummy file
- form.add_file('file', 'test.sql', fileHandle=StringIO('FILE CONTENTS'))
- # No URL specified
- form.add_field('url', '')
- # Add remaining required parameters
- form.add_field('language', 'php')
- form.add_field('line_numbers', '2')
- form.add_field('word_wrap', 'on')
- form.add_field('tab_width', '8')
- form.add_field('highlight_keywords', 'on')
- form.add_field('default_color', '0000bb')
- form.add_field('keyword_colors[1]', 'DEFAULT')
- form.add_field('keyword_colors[2]', 'DEFAULT')
- form.add_field('keyword_colors[3]', 'DEFAULT')
- form.add_field('keyword_colors[4]', 'DEFAULT')
- form.add_field('comments_color', 'DEFAULT')
- form.add_field('escaped_chars_color', 'DEFAULT')
- form.add_field('brackets_color', 'DEFAULT')
- form.add_field('strings_color', 'DEFAULT')
- form.add_field('numbers_color', 'DEFAULT')
- form.add_field('methods_color', 'DEFAULT')
- # Build the request
- request = urllib2.Request('http://qbnz.com/highlighter/php_highlighter.php')
- # Hi i'm FireFox 5
- request.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11')
- # We are required to specify the content type and length...
- body = str(form)
- request.add_header('Content-type', form.get_content_type())
- request.add_header('Content-length', len(body))
- request.add_data(body)
- # Take a look at what we are sending... useful to match up against the required request
- print
- print 'OUTGOING DATA:'
- print request.get_data()
- # View servers response
- print
- print 'SERVER RESPONSE:'
- print urllib2.urlopen(request).read()
End of Code Snippet
How is this supposed to be used?
ReplyDeleteIt can't be copy pasted in Chrome and there is no download link.
This script can cause issues when uploading binary files. I wrote a blog post about how I was able to get multipart binary file uploads working in python:
ReplyDeletehttp://blog.thesparktree.com/post/114053773684/the-unfortunately-long-story-dealing-with