<?php
// API URL
const API_URL = "https://openapi.coreclaw.com/api/v1/scraper/run";
// Your API KEY
const API_KEY = "<YOUR_API_KEY>";
// curl timeout (seconds)
const TIMEOUT = 30;
/**
* Run scraper
*
* @param array $params Request parameters
* @param string $apiKey API Key
* @return array Return result ["success" => bool, "run_slug" => string|null, "error" => string|null]
*/
function runScraper(array $params, string $apiKey): array
{
// Initialize cURL
$ch = curl_init();
// Set cURL options
curl_setopt_array($ch, [
CURLOPT_URL => API_URL,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => "",
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => TIMEOUT,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => "POST",
CURLOPT_POSTFIELDS => json_encode($params),
CURLOPT_HTTPHEADER => [
"api-key: " . $apiKey,
"Content-Type: application/json"
],
]);
// Execute request
$response = curl_exec($ch);
$httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
$error = curl_error($ch);
// Close cURL
curl_close($ch);
// Check cURL error
if ($error) {
return [
"success" => false,
"run_slug" => null,
"error" => "cURL error: " . $error
];
}
// Check HTTP status code
if ($httpCode !== 200) {
return [
"success" => false,
"run_slug" => null,
"error" => "HTTP error: " . $httpCode . " - " . $response
];
}
// Parse response
$result = json_decode($response, true);
if (json_last_error() !== JSON_ERROR_NONE) {
return [
"success" => false,
"run_slug" => null,
"error" => "JSON decode error: " . json_last_error_msg()
];
}
// Check business error code
if (isset($result["code"]) && $result["code"] !== 0) {
return [
"success" => false,
"run_slug" => null,
"error" => "Business error: " . (isset($result["message"]) ? $result["message"] : "Unknown error") . " (code: " . $result["code"] . ")"
];
}
// Return success result
return [
"success" => true,
"run_slug" => isset($result["data"]["run_slug"]) ? $result["data"]["run_slug"] : null,
"error" => null
];
}
/**
* Main function
*/
function main()
{
// Build request parameters
$requestParams = [
"scraper_slug" => "01KG2DV66JTCN65ZBTRX3M456E",
"version" => "v1.0.8",
"input" => [
"parameters" => [
"system" => [
"proxy_region" => "",
"cpus" => 0.125,
"memory" => 512,
"execute_limit_time_seconds" => 1800,
"max_total_charge" => 0,
"max_total_traffic" => 0
],
"custom" => {
'runUnits': [
{
'url': 'https://coreclaw.local/__single_run__'
}
],
'scenario': 'ecommerce-products',
'fields': [
{
'string': 'productId'
},
{
'string': 'sku'
}
],
'mergeStrategy': 'keep-newest',
'timestampField': 'updatedAt',
'dataSourceType': 'direct-input',
'inputData': '[{\'productId\': \'P001\', \'sku\': \'SKU-A-BLACK\', \'name\': \'无线蓝牙耳机 Pro\', \'price\': 299.00, \'stock\': 156, \'source\': \'京东旗舰店\', \'updatedAt\': \'2024-01-20T10:30:00\'}, {\'productId\': \'P001\', \'sku\': \'SKU-A-BLACK\', \'name\': \'无线蓝牙耳机 Pro (黑)\', \'price\': 279.00, \'stock\': 200, \'source\': \'天猫旗舰店\', \'updatedAt\': \'2024-01-22T14:20:00\'}, {\'productId\': \'P001\', \'sku\': \'SKU-A-WHITE\', \'name\': \'无线蓝牙耳机 Pro\', \'price\': 299.00, \'stock\': 88, \'source\': \'京东旗舰店\', \'updatedAt\': \'2024-01-20T10:30:00\'}, {\'productId\': \'P002\', \'sku\': \'SKU-B\', \'name\': \'智能手表 Ultra\', \'price\': 1299.00, \'stock\': 45, \'source\': \'官网\', \'updatedAt\': \'2024-01-18T09:00:00\'}]',
'inputUrls': [
{
'url': 'https://raw.githubusercontent.com/kael-odin/worker-dedup-datasets/main/test/data1.json'
}
],
'datasetIds': [],
'inputFormat': 'json',
'output': 'unique-items',
'generateReport': true,
'mode': 'dedup-after-load',
'fieldsToLoad': [],
'nullAsUnique': false,
'parallelLoads': 10,
'parallelPushes': 5,
'batchSize': 5000,
'appendFileSource': false,
'verboseLog': false
}
]
],
"callback_url" => "https://your-domain.com/callback"
];
// Send request
echo "Sending request to API...
";
$result = runScraper($requestParams, API_KEY);
// Handle result
if ($result["success"]) {
echo "Worker run successful!
";
echo "Run record ID: " . $result["run_slug"] . "
";
echo "You can use this ID to query run status and results
";
} else {
echo "Request failed!
";
echo "Error message: " . $result["error"] . "
";
}
}
// Execute main function
main();