#!/usr/bin/env python3
import requests
import json
from typing import Dict, Any, Optional
# API URL
API_URL = "https://openapi.coreclaw.com/api/v1/scraper/run"
# Your API KEY
API_KEY = "<YOUR_API_KEY>"
# Curl timeout (seconds)
TIMEOUT = 30
def run_scraper(params: Dict[str, Any], api_key: str) -> Dict[str, Any]:
headers = {
"api-key": api_key,
"Content-Type": "application/json"
}
try:
# Send POST request
response = requests.post(
API_URL,
headers=headers,
json=params,
timeout=TIMEOUT
)
# Check HTTP status code
if response.status_code != 200:
return {
"success": False,
"run_slug": None,
"error": f"HTTP error: {response.status_code} - {response.text}"
}
# Parse response
result = response.json()
# Check business error code
if result.get("code") != 0:
return {
"success": False,
"run_slug": None,
"error": f"Business error: {result.get("message", "Unknown error")} (code: {result.get("code")})"
}
# Return success result
return {
"success": True,
"run_slug": result.get("data", {}).get("run_slug"),
"error": None
}
except requests.exceptions.Timeout:
return {
"success": False,
"run_slug": None,
"error": f"Request timeout after {TIMEOUT} seconds"
}
except requests.exceptions.RequestException as e:
return {
"success": False,
"run_slug": None,
"error": f"Request error: {str(e)}"
}
except json.JSONDecodeError as e:
return {
"success": False,
"run_slug": None,
"error": f"JSON decode error: {str(e)}"
}
def main():
# Build request parameters
request_params = {
"scraper_slug": "01KG2DV66JTCN65ZBTRX3M456E",
"version": "v1.0.8",
"input": {
"parameters": {
"system": {
"proxy_region": "",
"cpus": 0.125,
"memory": 512,
"execute_limit_time_seconds": 1800,
"max_total_charge": 0,
"max_total_traffic": 0
},
"custom": {
"url": [
{
"url": "https://www.glassdoor.com/job-listing/staff-product-analyst-intuit-JV_IC4508422_KO0,21_KE22,28.htm?jl=1008980804695&cs=1_d6b1b697&s=58&t=SR&pos=104&src=GD_JOB_AD&cpc=44CD5376B8534B8F&guid=0000018c0ae77315b0674c403cf266a5&jobListingId=1008980804695&ao=1110586&vt=w&jrtk=5-yul1-0-1hg5eesq3ir3v800-4dda97f9dab7b990---6NYlbfkN0BKYl3zWsktiTMfhnn8eMRsNrNhPXFqHgxANdB9sZgO2gg8MIfiMoKrpC4RrjRFuq5cN1FayxkUzXNpGKe4khcoosOdCsWKdjNUQlszKRvhnVCv-3GxQD0UURkmC2SRM5G4PDS-csRoploh14dWMXKmKSHFODefNWscELEdL9st3xF8QQKpVtPfaY0ycbd-ETjhmsqIBjkCxzKMwE-vSoMbWao2wahtIcpefclgD97rRwH69BkyKCUkw6_dAAZrs2ADvu3bHDeyOTLSIlwBZqRxVe05yri9f7rkilGsITPCgvv3Cfg9tReQUvDznUmY5TAQO1ykaDDCaaDZNjow2bO8henmAFyasHjvhLiubxbzO_JweUKeGcMqwwy80Wgmp5Xcz5rtTc-1vuQXmy3hK-lo-60GgtgLcZbRdKat1Z8tQww1fcL-tfB4nfNQiZmhXUNKNp-K-ChnjJ8FFwNL48I5GC9uunA74XRNacCIRa-mQAw991VyPZoA4xpzc-zfhk12BJvfbAKDgmBIOLa13A9Yw7poZBIKu1yvAsV1a2ejboFEtXzdxtOfmooRjrOTHx67cpRbppk-KmwyuMvjHWJrz_3XSepcuUm7zp9tGBliAyeRX01IPuVMoGsN7CBoTn3zS9kEwLVU6ejV0l1hF5437iDfn61jqnywAzCrmOCaKouMCFJBj2Yage5R4dIdFcm53lB9xSqWy9zGdJ31N5Xj&cb=1700989989992&ctt=1700990085479"
}
]
}
}
},
"callback_url": "https://your-domain.com/callback"
}
# Send request
print("Sending request to API...")
result = run_scraper(request_params, API_KEY)
# Handle result
if result["success"]:
print("Worker run successful!")
print(f"Run ID: {result['run_slug']}")
print("You can use this ID to query run status and results")
else:
print("Request failed!")
print(f"Error message: {result['error']}")
if __name__ == "__main__":
main()