Friday, 9 March 2012

Python - POST HTTP multipart form (Using standard urllib libraries)

The Python script in this example utilises standard Python libraries to POST a form with content type multipart/form-data over HTTP (or quite simply, a multipart form).

The content type "multipart/form-data" should be used for submitting forms that contain files, non-ASCII data, and binary data.

I won't delve deep into the details on multipart forms, but if you would like a good explanation, read the official W3C document. One thing if doesn't mention on there, is that the boundary can be between 1 and 70 characters long, consisting of alphanumeric, and the punctuation you see in the list. Spaces are allowed except at the end.

Code Snippet
  1.     import itertools
  2.     import mimetools
  3.     import mimetypes
  4.     from cStringIO import StringIO
  5.     import urllib
  6.     import urllib2
  7.      
  8.     class MultiPartForm(object):
  9.         """Accumulate the data to be used when posting a form."""
  10.      
  11.         def __init__(self):
  12.             self.form_fields = []
  13.             self.files = []
  14.            
  15.             # Generate unique boundary string with the format: hostipaddr.uid.pid.timestamp.random
  16.             self.boundary = mimetools.choose_boundary()
  17.             return
  18.        
  19.         def get_content_type(self):
  20.             return 'multipart/form-data; boundary=%s' % self.boundary
  21.      
  22.         def add_field(self, name, value):
  23.             """Add a simple field to the form data."""
  24.             self.form_fields.append((name, value))
  25.             return
  26.      
  27.         def add_file(self, fieldname, filename, fileHandle, mimetype=None):
  28.             """Add a file to be uploaded."""
  29.             body = fileHandle.read()
  30.             if mimetype is None:
  31.                 mimetype = mimetypes.guess_type(filename)[0] or 'application/octet-stream'
  32.             self.files.append((fieldname, filename, mimetype, body))
  33.             return
  34.        
  35.         def __str__(self):
  36.             """Return a string representing the form data, including attached files."""
  37.             # Build a list of lists, each containing "lines" of the
  38.             # request.  Each part is separated by a boundary string.
  39.             # Once the list is built, return a string where each
  40.             # line is separated by '\r\n'.  
  41.             parts = []
  42.             part_boundary = '--' + self.boundary
  43.            
  44.             # Add the form fields
  45.             parts.extend(
  46.                 [ part_boundary,
  47.                   'Content-Disposition: form-data; name="%s"' % name,
  48.                   '',
  49.                   value,
  50.                 ]
  51.                 for name, value in self.form_fields
  52.                 )
  53.            
  54.             # Add the files to upload
  55.             parts.extend(
  56.                 [ part_boundary,
  57.                   'Content-Disposition: file; name="%s"; filename="%s"' % \
  58.                      (field_name, filename),
  59.                   'Content-Type: %s' % content_type,
  60.                   '',
  61.                   body,
  62.                 ]
  63.                 for field_name, filename, content_type, body in self.files
  64.                 )
  65.            
  66.             # Flatten the list and add closing boundary marker,
  67.             # then return CR+LF separated data
  68.             flattened = list(itertools.chain(*parts))
  69.             flattened.append('--' + self.boundary + '--')
  70.             flattened.append('')
  71.             return '\r\n'.join(flattened)
  72.      
  73.      
  74.      
  75.      
  76.      
  77.     if __name__ == '__main__':
  78.      
  79.         # Create the form with simple fields
  80.         form = MultiPartForm()
  81.        
  82.        
  83.         """
  84.            Either specify a source, file or a url...
  85.            The system will take the first one respectively if all three have content.
  86.            This example uses a dummy file called test.sql, with a string of content.
  87.       """
  88.        
  89.         # No source specified
  90.         form.add_field('source', '')
  91.        
  92.         # REAL FILE EXAMPLE
  93.         # fileHandle = open ('test.sql', 'r')
  94.         # form.add_file('file', 'test.sql', fileHandle)
  95.         # fileHandle.close()
  96.        
  97.         # Use dummy file
  98.         form.add_file('file', 'test.sql', fileHandle=StringIO('FILE CONTENTS'))
  99.        
  100.         # No URL specified
  101.         form.add_field('url', '')
  102.        
  103.        
  104.        
  105.         # Add remaining required parameters
  106.         form.add_field('language', 'php')
  107.         form.add_field('line_numbers', '2')
  108.         form.add_field('word_wrap', 'on')
  109.         form.add_field('tab_width', '8')
  110.         form.add_field('highlight_keywords', 'on')
  111.         form.add_field('default_color', '0000bb')
  112.         form.add_field('keyword_colors[1]', 'DEFAULT')
  113.         form.add_field('keyword_colors[2]', 'DEFAULT')
  114.         form.add_field('keyword_colors[3]', 'DEFAULT')
  115.         form.add_field('keyword_colors[4]', 'DEFAULT')
  116.         form.add_field('comments_color', 'DEFAULT')
  117.         form.add_field('escaped_chars_color', 'DEFAULT')
  118.         form.add_field('brackets_color', 'DEFAULT')
  119.         form.add_field('strings_color', 'DEFAULT')
  120.         form.add_field('numbers_color', 'DEFAULT')
  121.         form.add_field('methods_color', 'DEFAULT')
  122.      
  123.         # Build the request
  124.         request = urllib2.Request('http://qbnz.com/highlighter/php_highlighter.php')
  125.        
  126.         # Hi i'm FireFox 5
  127.         request.add_header('User-agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11')
  128.        
  129.         # We are required to specify the content type and length...
  130.         body = str(form)
  131.         request.add_header('Content-type', form.get_content_type())
  132.         request.add_header('Content-length', len(body))
  133.         request.add_data(body)
  134.      
  135.             # Take a look at what we are sending... useful to match up against the required request
  136.         print
  137.         print 'OUTGOING DATA:'
  138.         print request.get_data()
  139.      
  140.             # View servers response
  141.         print
  142.         print 'SERVER RESPONSE:'
  143.         print urllib2.urlopen(request).read()
End of Code Snippet

2 comments:

Anonymous said...

How is this supposed to be used?

It can't be copy pasted in Chrome and there is no download link.

Jason said...

This script can cause issues when uploading binary files. I wrote a blog post about how I was able to get multipart binary file uploads working in python:

http://blog.thesparktree.com/post/114053773684/the-unfortunately-long-story-dealing-with