i connecting xml page via proxy using curl. reason there no connection page. parsing isn't issue not include code.
from bs4 import beautifulsoup import time #added curl import subprocess #added curl import os #added curl file_name = raw_input("type name of new file create: ") g = open(file_name+".txt",'w') g.write("---xml parse---\n") curlurl= 'f:\downloads\curl-7.31.0-rtmp-ssh2-ssl-sspi-zlib-idn-static-bin-w32\curl.exe' open("list.txt") f: #file information read , used in link line in f: g.write("\npage id: "+line.rstrip('\n')+"\n") link = "https://somewebsite.com/+line.rstrip('\n')" args = (curlurl+ ' -l ' +link+ ' -o c:\\temp.txt --proxy-ntlm -x http://myproxy:80 -k -u:') #using proxy print args sp = subprocess.popen(args) #run curl sp.wait() #wait finish before proceeding xml_string = open('c:/temp.txt', 'r').read() #read in temporary file time.sleep(3) os.remove('c:/temp.txt') # clean soup = beautifulsoup(xml_string) result = soup.find('bibliographic-data') if result not none: status = result['status'] g.write("\napplication status: "+status+"\n") g.write("most recent event information: \n") #...i go on parse document
i getting error:
curl:(56) received http code 407 proxy after connect
any idea why being denied access?
Comments
Post a Comment