#!/usr/bin/env python
#
#
#-------------------------------------------------------------------------------

import urllib
import httplib

#-------------------------------------------------------------------------------

from logger import Logger

#-------------------------------------------------------------------------------

class Scraper():
   idx        = None
   connection = None
   logger     = None

   def __init__(self, site, log_dir=None, protocol='http'):
      print "[http::scraper]  log_dir %s" % log_dir

      if (protocol == 'https'):
         self.connection = httplib.HTTPSConnection(site)
      else:
         self.connection = httplib.HTTPConnection(site)

      self.idx           = 0
      self.logger        = Logger(log_dir)

   #----------------------------------------------------------------------------

   def get_idx(self):
      return self.idx

   #----------------------------------------------------------------------------

   def request(self, r, debug=None):
      if debug: print '>>>> %s %s <<<<' % (r.Method, r.URL)

      if r.Post_data:
         r.Request_params = urllib.urlencode(r.Post_data)

         if (debug and (debug > 2)): print r.Request_params

         r.Request_headers['Content-Length'] = len(r.Request_params)

      self.connection.request(r.Method, r.URL, r.Request_params, r.Request_headers)

      resp = self.connection.getresponse()

      self.logger.log_request_header(self.idx, r.Request_headers)
      self.logger.log_response_header(self.idx, resp)

      r.Response_headers = resp

      r.Response_body = resp.read()

      self.logger.log_response_body(self.idx, r.Response_body)

      if (debug and (debug > 2)): print r.Response_body

      r.idx     = self.idx

      self.idx += 1

      return r

#-------------------------------------------------------------------------------

