const axios = require("axios");
// API URL
const API_URL = "https://openapi.coreclaw.com/api/v1/scraper/run";
// Your API KEY
const API_KEY = "<YOUR_API_KEY>";
// Callback URL, The endpoint that will receive the scraping results
const CALLBACK_URL = "https://your-domain.com/callback";
// Build request parameters
const requestBody = {
"scraper_slug": "01KG2DV66JTCN65ZBTRX3M456E",
"version": "v1.0.8",
"input": {
"parameters": {
"system": {
"proxy_region": "",
"cpus": 0.125,
"memory": 512,
"execute_limit_time_seconds": 1800,
"max_total_charge": 0,
"max_total_traffic": 0,
},
"custom": {
"startUrl": [
{
"url": "https://coreclaw.com"
},
{
"url": "https://docs.coreclaw.com"
}
],
"maxRequestsPerCrawl": 10,
"maxDepth": null,
"userAgent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.125 Safari/537.36",
"pageTimeout": 60000,
"maxRequestRetries": 2,
"handlePageTimeoutSecs": 3600,
"seoParams": {
"brokenStatusCodes": [
404,
410
],
"checkExternalImages": false,
"checkExternalLinks": false,
"ignoredLinkStatusCodes": [
0,
401,
403,
429
],
"maxLinksCount": 3000,
"maxMetaDescriptionLength": 140,
"maxTitleLength": 70,
"maxWordsCount": 350,
"minTitleLength": 10,
"outputLinks": false,
"workingStatusCodes": [
200,
301,
302,
304
]
}
}
}
},
"callback_url": CALLBACK_URL
};
async function runScraper() {
try {
const response = await axios.post(API_URL, requestBody, {
headers: {
"api-key": API_KEY,
"Content-Type": "application/json"
},
timeout: 30000
});
const { code, message, data } = response.data
// Check response status
if(code === 0) {
console.log("Status Code:", code)
console.log("Response Message:", message)
console.log("Response Body:", JSON.stringify(data))
} else {
console.log("Error Status Code:", code)
console.log("Error Message:", message)
}
} catch (error) {
console.error("Request failed:", error.message);
}
}
// Execute
runScraper();