package main
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
)
// API URL
const API_URL = "https://openapi.coreclaw.com/api/v1/scraper/run"
// Your API KEY
const API_KEY = "<YOUR_API_KEY>"
// Callback URL, The endpoint that will receive the scraping results
const CALLBACK_URL = "https://your-domain.com/callback"
// ScraperRunRequest represents the structure for running a scraper request
type ScraperRunRequest struct {
ScraperSlug string `json:"scraper_slug"` // Unique identifier for the scraper
Version string `json:"version"` // Worker version number
Input json.RawMessage `json:"input"` // Input parameters
CallbackURL string `json:"callback_url"` // Callback URL
}
// ScraperRunResponse represents the structure for the scraper run response
type ScraperRunResponse struct {
Code int `json:"code"` // Error code
Message string `json:"message"` // Error message
Data Data `json:"data"` // Response data
}
// Data represents the structure for response data
type Data struct {
RunSlug string `json:"run_slug"` // Unique identifier for the run record
}
func main() {
// Build request parameters
req := ScraperRunRequest{
ScraperSlug: "01KG2DV66JTCN65ZBTRX3M456E",
Version: "v1.0.8",
Input: json.RawMessage(`{
"system": {
"proxy_region": "",
"cpus": 0.125,
"memory": 512,
"execute_limit_time_seconds": 1800,
"max_total_charge": 0,
"max_total_traffic": 0
},
"custom": {
"runUnits": [
{
"url": "https://coreclaw.local/__single_run__"
}
],
"scenario": "ecommerce-products",
"fields": [
{
"string": "productId"
},
{
"string": "sku"
}
],
"mergeStrategy": "keep-newest",
"timestampField": "updatedAt",
"dataSourceType": "direct-input",
"inputData": "[{\"productId\": \"P001\", \"sku\": \"SKU-A-BLACK\", \"name\": \"无线蓝牙耳机 Pro\", \"price\": 299.00, \"stock\": 156, \"source\": \"京东旗舰店\", \"updatedAt\": \"2024-01-20T10:30:00\"}, {\"productId\": \"P001\", \"sku\": \"SKU-A-BLACK\", \"name\": \"无线蓝牙耳机 Pro (黑)\", \"price\": 279.00, \"stock\": 200, \"source\": \"天猫旗舰店\", \"updatedAt\": \"2024-01-22T14:20:00\"}, {\"productId\": \"P001\", \"sku\": \"SKU-A-WHITE\", \"name\": \"无线蓝牙耳机 Pro\", \"price\": 299.00, \"stock\": 88, \"source\": \"京东旗舰店\", \"updatedAt\": \"2024-01-20T10:30:00\"}, {\"productId\": \"P002\", \"sku\": \"SKU-B\", \"name\": \"智能手表 Ultra\", \"price\": 1299.00, \"stock\": 45, \"source\": \"官网\", \"updatedAt\": \"2024-01-18T09:00:00\"}]",
"inputUrls": [
{
"url": "https://raw.githubusercontent.com/kael-odin/worker-dedup-datasets/main/test/data1.json"
}
],
"datasetIds": [],
"inputFormat": "json",
"output": "unique-items",
"generateReport": true,
"mode": "dedup-after-load",
"fieldsToLoad": [],
"nullAsUnique": false,
"parallelLoads": 10,
"parallelPushes": 5,
"batchSize": 5000,
"appendFileSource": false,
"verboseLog": false
}
}`),
CallbackURL: CALLBACK_URL,
}
// Send request
runSlug, err := runScraper(req, API_KEY)
if err != nil {
fmt.Printf("Request failed: %v
", err)
return
}
fmt.Printf("Worker run successful!")
fmt.Printf("Run record ID: %s
", runSlug)
fmt.Printf("You can use this ID to query run status and results
")
}
// runScraper executes the scraper
func runScraper(req ScraperRunRequest, apiKey string) (string, error) {
// Serialize request data
body, err := json.Marshal(req)
if err != nil {
return "", fmt.Errorf("failed to serialize request data: %w", err)
}
// Create HTTP request
client := &http.Client{
Timeout: 30 * time.Second,
}
httpReq, err := http.NewRequest(
"POST",
API_URL,
bytes.NewBuffer(body),
)
if err != nil {
return "", fmt.Errorf("failed to create request: %w", err)
}
// Set request headers
httpReq.Header.Set("api-key", apiKey)
httpReq.Header.Set("Content-Type", "application/json")
// Send request
resp, err := client.Do(httpReq)
if err != nil {
return "", fmt.Errorf("failed to send request: %w", err)
}
defer resp.Body.Close()
// Read response
respBody, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("failed to read response: %w", err)
}
// Check response status code
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("request failed, status code: %d, response: %s", resp.StatusCode, string(respBody))
}
// Parse response
var result ScraperRunResponse
if err := json.Unmarshal(respBody, &result); err != nil {
return "", fmt.Errorf("failed to parse response: %w", err)
}
// Check error code
if result.Code != 0 {
return "", fmt.Errorf("business error: %s (error code: %d)", result.Message, result.Code)
}
return result.Data.RunSlug, nil
}