import requests # Proxy configurationusername = "YOUR_USERNAME"password = "YOUR_PASSWORD"endpoint = "YOUR_ENDPOINT" proxy_url = f"http://{username}:{password}@{endpoint}" proxies = { "http": proxy_url, "https": proxy_url} # Make a requesturl = "http://google.com"response = requests.get(url, proxies=proxies) print(f"Status Code: {response.status_code}")print(f"Content: {response.text}")
| Feature | Residential Proxies | Data Center Proxies |
|---|---|---|
| Source | Real residential devices (computers, smartphones) | Data centers (servers) |
| IP Authenticity | High (appears as legitimate users) | Lower (appears as servers) |
| Anonymity | High | Medium |
| Risk of IP Bans | Low | Higher |
| Speed | Typically slower | Generally faster |
| Cost | Higher | Lower |
| IP Rotation | Available (rotating proxies) | Available |
| Stability | Potentially less stable (rotating IPs) | Generally more stable |
| Best Used For | Accessing protected/geo-restricted content, ad verification | Large-scale scraping, tasks needing high speed |
| Availability | Based on ISP partnerships | More widely available |
| Plan Name | Data Volume (GB) | Cost per GB (Standard) | Cost per GB (with Membership) | Description |
|---|---|---|---|---|
| PAYG | 1 GB | $4.00 | $1.00 | Simple pay-as-you-go plan, no subscription needed |
| Startup | Up to 200 GB | $4.00 | $2.00 | Ideal for smaller projects, includes 20% discount |
| Emerging | Up to 1000 GB | $4.00 | $1.50 | Mid-sized plan with 30% discount for scale |
| Scale | Up to 2000 GB | $4.00 | $1.00 | Large plan with a 40% discount for high-volume use |
requests Library: Use the requests library to send HTTP requests via Geonode’s proxies. This setup lets you control the IP address for each request, facilitating web scraping and bypassing location restrictions.
requests library.
import requests # Define the proxy server with IP and portproxies = { 'http': 'http://YOUR_ENDPOINT', 'https': 'https://YOUR_ENDPOINT'} # Send a GET request using the proxyresponse = requests.get('https://www.example.com', proxies=proxies) # Check response status and print the contentprint(response.status_code)print(response.text)
proxies dictionary specifies the proxy server for both HTTP and HTTPS requests.proxies as a parameter in the requests.get() method.requests library is a robust way to ensure anonymity and bypass geo-restrictions. With options for automatic IP rotation and customizable session controls, Geonode proxies are ideal for complex web scraping projects.
By leveraging these features, you can optimize your scraping tasks and maintain high success rates across different websites.
username, password, ip and endpoint. These credentials can be found in the Access Parameters tab of the proxy product.
You can get your USERNAME and PASSWORD from the "Credentials" on the dashboard.
For example, if you are using Python with the requests library, you can configure the proxies as follows:
import requestsfrom bs4 import BeautifulSoup # Proxy configurationusername = "YOUR_USERNAME"password = "YOUR_PASSWORD"endpoint = "premium-residential.geonode.com:9001" proxy_url = f"http://{username}:{password}@{endpoint}" proxies = { "http": proxy_url, "https": proxy_url} # Make a requesturl = "http://google.com"response = requests.get(url, proxies=proxies) s = BeautifulSoup(response.content, 'html.parser')print(response.status_code)print(s.text) # From here you can process the data as needed
requests and BeautifulSoup libraries.username, password, and endpoint variables (with placeholders for credentials).requests library.
requests Library: Ensure you have the requests library installed.requests to route your HTTP requests through the Geonode proxy.import requestsfrom bs4 import BeautifulSoup # Geonode Proxy configurationusername = "YOUR_USERNAME" # Replace with your Geonode usernamepassword = "YOUR_PASSWORD" # Replace with your Geonode passwordendpoint = "premium-residential.geonode.com:9001" proxy_url = f"http://{username}:{password}@{endpoint}" proxies = { "http": proxy_url, "https": proxy_url} # Target URLurl = "http://books.toscrape.com/"response = requests.get(url, proxies=proxies) # Parse and extract data using BeautifulSoupsoup = BeautifulSoup(response.content, 'html.parser')links = soup.find_all('a')for link in links: print(link.get('href'))
requests library for a geotargeted request.
import requestsfrom bs4 import BeautifulSoup # Geonode Proxy configurationusername = "YOUR_USERNAME" # Replace with your Geonode usernamepassword = "YOUR_PASSWORD" # Replace with your Geonode passwordendpoint = "premium-residential.geonode.com:9001" # Use assigned country-specific port if applicable proxy_url = f"http://{username}:{password}@{endpoint}" proxies = { "http": proxy_url, "https": proxy_url} response_ip = requests.get("https://httpbin.org/ip", proxies=proxies)ip = response_ip.json()['origin'] # Get detailed inforesponse_info = requests.get(f"http://ipinfo.io/{ip}", proxies=proxies)info = response_info.json() print(f"IP: {info['ip']}")print(f"Country: {info['country']}")
ENDPOINT.import requests # Geonode Proxy configurationusername = "YOUR_USERNAME" # Replace with your Geonode usernamepassword = "YOUR_PASSWORD" # Replace with your Geonode passwordendpoint = "premium-residential.geonode.com:10000" # Sticky endpoint proxy_url = f"http://{username}:{password}@{endpoint}" proxies = { "http": proxy_url, "https": proxy_url} # Make request to check IPresponse_ip = requests.get("https://httpbin.org/ip", proxies=proxies)ip = response_ip.json()['origin'] # Get detailed inforesponse_info = requests.get(f"http://ipinfo.io/{ip}", proxies=proxies)info = response_info.json() print(f"IP: {info['ip']}")print(f"Country: {info['country']}")
httpbin.org/ip to confirm the IP is stable across calls.ipinfo.io to verify geolocation accuracy.RESIDENTIAL-PREMIUM-sticky.txt contains only the host and port, so the credentials are added directly in the code.
Here’s the code:
import pprintimport randomimport requests # Geonode Proxy configuration (replace with your own credentials)username = "YOUR_USERNAME" # Replace with your Geonode usernamepassword = "YOUR_PASSWRORD" # Replace with your Geonode password def get_proxies_from_file(file_path): """Reads proxies from the specified file and returns a list of proxies.""" with open(file_path, 'r') as file: proxies = [line.strip() for line in file.readlines()] return proxies def make_request_with_proxy(proxy): """Makes a request to check IP and fetches geolocation information.""" # Construct the proxy URL with authentication proxy_url = f'http://{username}:{password}@{proxy}' proxies = { 'http': proxy_url, 'https': proxy_url } # Get IP address from httpbin ip_response = requests.get("https://httpbin.org/ip", proxies=proxies) ip_data = ip_response.json() print(f"IP Address: {ip_data['origin']}") # Get detailed info from IP geolocation service geo_response = requests.get("http://ipinfo.io", proxies=proxies) pprint.pprint(geo_response.json()) def main(): proxy_file = 'RESIDENTIAL-PREMIUM-sticky.txt' # Path to your .txt file with Geonode proxies proxies = get_proxies_from_file(proxy_file) if not proxies: print("No proxies found in the file.") return selected_proxy = random.choice(proxies) print(f"Using proxy: {selected_proxy}") make_request_with_proxy(selected_proxy) if __name__ == "__main__": main()
RESIDENTIAL-PREMIUM-sticky.txt. Each line should contain a proxy in the format host:port.RESIDENTIAL-PREMIUM-sticky.txt should have the format:
host:port
username and password in the code with your Geonode account details for authentication.| Error Code | Meaning | Cause | Solution |
|---|---|---|---|
| 407 Proxy Authentication Required | Authentication is required. | Incorrect or missing proxy credentials. | Verify that your authentication settings (username and password or IP whitelisting) are correct. Update credentials if necessary to match your Geonode account. |
| 461 Sticky Port Session Limit Reached | You’ve reached the sticky port session limit for your plan. | Exceeding the session limit for your subscription package. | Upgrade your plan to increase the session limit if more capacity is needed. |
| 462 Sticky Port Session Unsupported | Sticky port sessions are not supported. | Attempting to use sticky sessions where only rotating ports are allowed. | Switch to rotating ports if sticky ports are unavailable with your plan. |
| 466 Limit Reached | The request limit for your subscription has been reached. | Exceeding the allotted number of requests. | Consider upgrading to a higher package to extend request limits. |
| 561 Proxy Unreachable | Unable to connect to the proxy server. | Network or server configuration issues. | Retry the request after a short wait. Ensure your proxy configuration is correct. |
| 468 No Available Proxy | No proxy is currently available. | Limited proxy availability or network congestion. | Wait and retry the request. Contact Geonode support if the issue persists. |
| 464 Host Not Allowed | The target host or domain is restricted. | Trying to access a host outside the allowed range. | Confirm that your target host aligns with your Geonode permissions. |
| 470 Account Blocked | Your account has been blocked. | Potential misuse or security reasons. | Contact Geonode support to resolve the issue if you believe it’s in error. |
| 403 Forbidden | The request is invalid or access is restricted. | Issues with request configuration or access restrictions. | Review your request configurations to ensure they’re valid. |
| 465 City/State Not Found | The specified location couldn’t be resolved. | Invalid or unsupported location configuration. | Confirm and adjust the location details in your request. |
| 463 City/State Unsupported | The location configurations in the request are unsupported. | Location not allowed in the current package or configuration. | Verify if the desired location is permitted with your package. |
| 401 Unauthorized | Unauthorized request. | Potential account or configuration issues. | Contact Geonode support for assistance. |
| 411 Account Blocked | The account is restricted. | Potential policy violation or administrative lock. | Reach out to customer support to discuss reactivation options. |
| 471 Inactive Port | The requested port is inactive. | Attempting to use an inactive port. | Switch to an active port. |
| 429 Too Many Requests with Incorrect Authentication | Rate limit exceeded due to multiple failed authentication attempts. | Incorrect credentials or rapid authentication failures. | Use correct credentials and space out requests to avoid temporary blocks. |
requests:
import requestsfrom bs4 import BeautifulSoup # Geonode Proxy configurationusername = "YOUR_USERNAME" # Replace with your Geonode usernamepassword = "YOUR_PASSWORD" # Replace with your Geonode passwordendpoint = "premium-residential.geonode.com:10000" # Geonode endpoint for premium residential proxies # Define the proxy URLproxy_url = f"http://{username}:{password}@{endpoint}" # Set up proxiesproxies = { "http": proxy_url, "https": proxy_url} # Define the URL to scrapeurl = "http://books.toscrape.com/" # Send a GET request through the Geonode proxyresponse = requests.get(url, proxies=proxies) # Parse the HTML content with BeautifulSoupsoup = BeautifulSoup(response.content, "html.parser") # Find all the links on the web pagelinks = soup.find_all("a") # Print each link foundfor link in links: print(link.get("href"))
BeautifulSoup.scrapy startproject <project_name>import scrapy # Geonode Proxy configurationusername = "YOUR_USERNAME"password = "YOUR_PASSWORD" class GeonodeScrapyExampleSpider(scrapy.Spider): name = "GeonodeScrapyExample" def start_requests(self): request = scrapy.Request(url="http://example.com", callback=self.parse) request.meta['proxy'] = f"http://{username}:{password}@premium-residential.geonode.com:10000" yield request def parse(self, response): print(response.body)
scrapy runspider <Pythonfilename.py>npm install playwrightconst playwright = require('playwright'); const options = { proxy: { server: 'http://premium-residential.geonode.com:10000', username: 'YOUR_USERNAME', password: 'YOUR_PASSWORD' }}; (async () => { const browser = await playwright.chromium.launch(options); const page = await browser.newPage(); await page.goto('http://example.com'); const content = await page.content(); console.log(content); await browser.close();})();
scrape.js and execute it:
node scrape.jsnpm install puppeteerconst puppeteer = require('puppeteer'); (async () => { const browser = await puppeteer.launch({ headless: false, args: ['--proxy-server=premium-residential.geonode.com:10000'] }); const page = await browser.newPage(); await page.authenticate({ username: 'YOUR_USERNAME', password: 'YOUR_PASSWORD' }); await page.goto('http://example.com'); await page.screenshot({ path: 'example.png' }); await browser.close();})();
scrape.js) and run it:
node scrape.jsimport requestsfrom bs4 import BeautifulSoupfrom dataclasses import dataclassfrom typing import Dict, Tuple # Proxy configurationPROXY_CONFIG = { "host": "premium-residential.geonode.com", "port": "10000", "username": "YOUR_USERNAME", # Replace with your Geonode username "password": "YOUR_PASSWORD" # Replace with your Geonode password} # Product URLPRODUCT_URL = 'https://www.amazon.es/Taurus-WC12T-termoel%C3%A9ctrica-Aislamiento-Temperatura/dp/B093GXXKRL/ref=lp_14565165031_1_2' @dataclassclass ProductInfo: title: str price: str availability: str def get_proxy_url(country: str) -> str: # Generate Geonode proxy URL with country targeting return f"http://{PROXY_CONFIG['username']}-country-{country}:{PROXY_CONFIG['password']}@{PROXY_CONFIG['host']}:{PROXY_CONFIG['port']}" def get_headers() -> Dict[str, str]: # HTTP headers to simulate a browser request return { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36', 'Accept-Language': 'es-ES,es;q=0.9', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8', 'Accept-Encoding': 'gzip, deflate, br', 'Referer': 'https://www.amazon.es/' } def scrape_amazon(country: str) -> ProductInfo: proxy_url = get_proxy_url(country) proxies = {"http": proxy_url, "https": proxy_url} try: response = requests.get(PRODUCT_URL, proxies=proxies, headers=get_headers(), verify=False, timeout=30) response.raise_for_status() soup = BeautifulSoup(response.content, 'html.parser') title = soup.select_one('#productTitle').text.strip() if soup.select_one('#productTitle') else "Title not found" price = soup.select_one('.a-price-whole').text.strip() if soup.select_one('.a-price-whole') else "Price not found" availability = soup.select_one('#availability span').text.strip() if soup.select_one('#availability span') else "Availability not found" return ProductInfo(title, price, availability) except requests.RequestException as e: print(f"An error occurred while scraping with {country} IP: {str(e)}") return ProductInfo("Error", "Error", "Error") def main(): countries = { "SPAIN": "es", "PORTUGAL": "pt" } results = {country_code: scrape_amazon(country_code) for country_code in countries.values()} for country_name, country_code in countries.items(): info = results[country_code] print(f"\n{country_name} IP Results:") print(f"Title: {info.title}") print(f"Price: {info.price}€") print(f"Availability: {info.availability}") if __name__ == "__main__": main()
import requests api_key = 'YOUR_API_KEY'target_url = 'https://httpbin.org/ip'proxy_url = f'http://scrapeops:{api_key}@residential-proxy.scrapeops.io:8181' proxies = { 'http': proxy_url, 'https': proxy_url,} response = requests.get( url=target_url, proxies=proxies, timeout=120,) print('Body:', response.content)