CoreClaw
商店开发者分成
定价
开始免费试用
返回商店
CoreClaw

Indeed 抓取

01KNXWM1882RNQ2R8J9BN2VQWH

通过公司URLs批量提取 Indeed 公开企业数据,包含规模、行业、地址、评论及薪资信息,导出为 CSV/JSON 格式,用于招聘调研与竞品分析,一键输出结构化数据。

by CoreClaw
4.8
4次运行
免费试用
免费 2,000 条结果
最后更新:2026-04-11
当前版本:v1.0.1

代码示例

使用我们的 REST API 将采集工具集成到您的应用程序中。以下是常用编程语言的示例。

🔑需要 API 密钥

将 YOUR_API_KEY 替换为您的实际 API 密钥。您可以在账户设置中找到您的 API 密钥。

Node.js / JavaScript
const axios = require("axios");

// API URL
const API_URL = "https://openapi.coreclaw.com/api/v1/scraper/run";

// Your API KEY
const API_KEY = "<YOUR_API_KEY>";

// Callback URL, The endpoint that will receive the scraping results
const CALLBACK_URL = "https://your-domain.com/callback";

// Build request parameters
const requestBody = {
    "scraper_slug": "01KG2DV66JTCN65ZBTRX3M456E",
    "version": "v1.0.8",
    "input": {
        "parameters": {
            "system": {
                "proxy_region": "",
                "cpus": 0.125,
                "memory": 512,
                "execute_limit_time_seconds": 1800,
                "max_total_charge": 0,
                "max_total_traffic": 0,
            },
            "custom": {
                "url": [
                    {
                        "url": "https://www.facebook.com/MayeMusk"
                    }
                ]
            }
        }
    },
    "callback_url": CALLBACK_URL
};

async function runScraper() {
    try {
        const response = await axios.post(API_URL, requestBody, {
            headers: {
                "api-key": API_KEY,
                "Content-Type": "application/json"
            },
            timeout: 30000
        });

        const { code, message, data } = response.data

        // Check response status
        if(code === 0) {
            console.log("Status Code:", code)
            console.log("Response Message:", message)
            console.log("Response Body:", JSON.stringify(data))
        } else {
            console.log("Error Status Code:", code)
            console.log("Error Message:", message)
        }
    } catch (error) {
        console.error("Request failed:", error.message);
    }
}

// Execute
runScraper();
Python
#!/usr/bin/env python3
import requests
import json
from typing import Dict, Any, Optional

# API URL
API_URL = "https://openapi.coreclaw.com/api/v1/scraper/run"

# Your API KEY
API_KEY = "<YOUR_API_KEY>"

# Curl timeout (seconds)
TIMEOUT = 30

def run_scraper(params: Dict[str, Any], api_key: str) -> Dict[str, Any]:
    headers = {
        "api-key": api_key,
        "Content-Type": "application/json"
    }

    try:
        # Send POST request
        response = requests.post(
            API_URL,
            headers=headers,
            json=params,
            timeout=TIMEOUT
        )

        # Check HTTP status code
        if response.status_code != 200:
            return {
                "success": False,
                "run_slug": None,
                "error": f"HTTP error: {response.status_code} - {response.text}"
            }

        # Parse response
        result = response.json()

        # Check business error code
        if result.get("code") != 0:
            return {
                "success": False,
                "run_slug": None,
                "error": f"Business error: {result.get("message", "Unknown error")} (code: {result.get("code")})"
            }

        # Return success result
        return {
            "success": True,
            "run_slug": result.get("data", {}).get("run_slug"),
            "error": None
        }

    except requests.exceptions.Timeout:
        return {
            "success": False,
            "run_slug": None,
            "error": f"Request timeout after {TIMEOUT} seconds"
        }
    except requests.exceptions.RequestException as e:
        return {
            "success": False,
            "run_slug": None,
            "error": f"Request error: {str(e)}"
        }
    except json.JSONDecodeError as e:
        return {
            "success": False,
            "run_slug": None,
            "error": f"JSON decode error: {str(e)}"
        }

def main():
    # Build request parameters
    request_params = {
        "scraper_slug": "01KG2DV66JTCN65ZBTRX3M456E",
        "version": "v1.0.8",
        "input": {
            "parameters": {
                "system": {
                    "proxy_region": "",
                    "cpus": 0.125,
                    "memory": 512,
                    "execute_limit_time_seconds": 1800,
                    "max_total_charge": 0,
                    "max_total_traffic": 0
                },
                "custom": {
                   "url": [
                       {
                           "url": "https://www.facebook.com/MayeMusk"
                       }
                   ]
               }
            }
        },
        "callback_url": "https://your-domain.com/callback"
    }

    # Send request
    print("Sending request to API...")
    result = run_scraper(request_params, API_KEY)

    # Handle result
    if result["success"]:
        print("Worker run successful!")
        print(f"Run ID: {result['run_slug']}")
        print("You can use this ID to query run status and results")
    else:
        print("Request failed!")
        print(f"Error message: {result['error']}")

if __name__ == "__main__":
    main()
Java
import java.io.IOException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;

public class ScraperRunSimple {
    // API URL
    private static final String API_URL = "https://openapi.coreclaw.com/api/v1/scraper/run";

    // Your API KEY
    private static final String API_KEY = "<YOUR_API_KEY>";

    // Request timeout (seconds)
    private static final int TIMEOUT = 30;
    
    public static void main(String[] args) {
        // Build request JSON
        String jsonBody = buildRequestBody();
        
        // Create HttpClient
        HttpClient client = HttpClient.newBuilder()
            .connectTimeout(Duration.ofSeconds(TIMEOUT))
            .build();
        
        // Create HttpRequest
        HttpRequest request = HttpRequest.newBuilder()
            .uri(URI.create(API_URL))
            .timeout(Duration.ofSeconds(TIMEOUT))
            .header("api-key", API_KEY)
            .header("Content-Type", "application/json")
            .POST(HttpRequest.BodyPublishers.ofString(jsonBody))
            .build();

        System.out.println("Sending request to API...");

        try {
            // Send request
            HttpResponse<String> response = client.send(request, HttpResponse.BodyHandlers.ofString());

            // Check HTTP status code
            int statusCode = response.statusCode();
            if (statusCode != 200) {
                System.out.println("Request failed!");
                System.out.println("HTTP error: " + statusCode + " - " + response.body());
                return;
            }

            // Parse response (simple string handling, no external libraries needed)
            String responseBody = response.body();
            System.out.println("Response content: " + responseBody);

            // Extract run_slug (simple parsing)
            String runSlug = extractRunSlug(responseBody);
            if (runSlug != null) {
                System.out.println("Worker run successful!");
                System.out.println("Run ID: " + runSlug);
                System.out.println("You can use this ID to query run status and results");
            } else {
                System.out.println("Request failed!");
                System.out.println("Unable to parse run_slug");
            }
        } catch (IOException e) {
            System.out.println("Request failed!");
            System.out.println("IO error: " + e.getMessage());
        } catch (InterruptedException e) {
            System.out.println("Request failed!");
            System.out.println("Request interrupted: " + e.getMessage());
            Thread.currentThread().interrupt();
        }
    }

    /**
     * Build request JSON body
     */
    private static String buildRequestBody() {
        return """
            {
                "scraper_slug": "01KG2DV66JTCN65ZBTRX3M456E",
                "version": "v1.0.8",
                "input": {
                    "parameters": {
                        "system": {
                            "proxy_region": "",
                            "cpus": 0.125,
                            "memory": 512,
                            "execute_limit_time_seconds": 1800,
                            "max_total_charge": 0,
                            "max_total_traffic": 0
                        },
                        "custom": {
                           "url": [
                               {
                                   "url": "https://www.facebook.com/MayeMusk"
                               }
                           ]
                       }
                    }
                },
                "callback_url": "https://your-domain.com/callback"
            }
            """;
    }

    /**
     * Extract run_slug from response (simple string handling)
     */
    private static String extractRunSlug(String json) {
        try {
            // Find "run_slug":"xxx"
            int startIndex = json.indexOf("\"run_slug\":\"");
            if (startIndex == -1) {
                return null;
            }

            startIndex += "\"run_slug\":\"".length();
            int endIndex = json.indexOf("\"", startIndex);
            if (endIndex == -1) {
                return null;
            }
            return json.substring(startIndex, endIndex);
        } catch (Exception e) {
            return null;
        }
    }
}
PHP
<?php

// API URL
const API_URL = "https://openapi.coreclaw.com/api/v1/scraper/run";

// Your API KEY
const API_KEY = "<YOUR_API_KEY>";

// curl timeout (seconds)
const TIMEOUT = 30;

/**
 * Run scraper
 *
 * @param array $params Request parameters
 * @param string $apiKey API Key
 * @return array Return result ["success" => bool, "run_slug" => string|null, "error" => string|null]
 */
function runScraper(array $params, string $apiKey): array
{
    // Initialize cURL
    $ch = curl_init();

    // Set cURL options
    curl_setopt_array($ch, [
        CURLOPT_URL => API_URL,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_ENCODING => "",
        CURLOPT_MAXREDIRS => 10,
        CURLOPT_TIMEOUT => TIMEOUT,
        CURLOPT_FOLLOWLOCATION => true,
        CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
        CURLOPT_CUSTOMREQUEST => "POST",
        CURLOPT_POSTFIELDS => json_encode($params),
        CURLOPT_HTTPHEADER => [
            "api-key: " . $apiKey,
            "Content-Type: application/json"
        ],
    ]);

    // Execute request
    $response = curl_exec($ch);
    $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    $error = curl_error($ch);

    // Close cURL
    curl_close($ch);

    // Check cURL error
    if ($error) {
        return [
            "success" => false,
            "run_slug" => null,
            "error" => "cURL error: " . $error
        ];
    }

    // Check HTTP status code
    if ($httpCode !== 200) {
        return [
            "success" => false,
            "run_slug" => null,
            "error" => "HTTP error: " . $httpCode . " - " . $response
        ];
    }

    // Parse response
    $result = json_decode($response, true);
    if (json_last_error() !== JSON_ERROR_NONE) {
        return [
            "success" => false,
            "run_slug" => null,
            "error" => "JSON decode error: " . json_last_error_msg()
        ];
    }

    // Check business error code
    if (isset($result["code"]) && $result["code"] !== 0) {
        return [
            "success" => false,
            "run_slug" => null,
            "error" => "Business error: " . (isset($result["message"]) ? $result["message"] : "Unknown error") . " (code: " . $result["code"] . ")"
        ];
    }

    // Return success result
    return [
        "success" => true,
        "run_slug" => isset($result["data"]["run_slug"]) ? $result["data"]["run_slug"] : null,
        "error" => null
    ];
}

/**
 * Main function
 */
function main()
{
    // Build request parameters
    $requestParams = [
        "scraper_slug" => "01KG2DV66JTCN65ZBTRX3M456E",
        "version" => "v1.0.8",
        "input" => [
            "parameters" => [
                "system" => [
                    "proxy_region" => "",
                    "cpus" => 0.125,
                    "memory" => 512,
                    "execute_limit_time_seconds" => 1800,
                    "max_total_charge" => 0,
                    "max_total_traffic" => 0
                ],
                "custom" => {
                   "url" => [
                       {
                           "url" => "https =>//www.facebook.com/MayeMusk"
                       }
                   ]
               }
            ]
        ],
        "callback_url" => "https://your-domain.com/callback"
    ];

    // Send request
    echo "Sending request to API...
";
    $result = runScraper($requestParams, API_KEY);

    // Handle result
    if ($result["success"]) {
        echo "Worker run successful!
";
        echo "Run record ID: " . $result["run_slug"] . "
";
        echo "You can use this ID to query run status and results
";
    } else {
        echo "Request failed!
";
        echo "Error message: " . $result["error"] . "
";
    }
}

// Execute main function
main();
Go
package main

import (
    "bytes"
    "encoding/json"
    "fmt"
    "io"
    "net/http"
    "time"
)

// API URL
const API_URL = "https://openapi.coreclaw.com/api/v1/scraper/run"

// Your API KEY
const API_KEY = "<YOUR_API_KEY>"

// Callback URL, The endpoint that will receive the scraping results
const CALLBACK_URL = "https://your-domain.com/callback"

// ScraperRunRequest represents the structure for running a scraper request
type ScraperRunRequest struct {
    ScraperSlug string          `json:"scraper_slug"` // Unique identifier for the scraper
    Version     string          `json:"version"`      // Worker version number
    Input       json.RawMessage `json:"input"`        // Input parameters
    CallbackURL string          `json:"callback_url"` // Callback URL
}

// ScraperRunResponse represents the structure for the scraper run response
type ScraperRunResponse struct {
    Code    int    `json:"code"`    // Error code
    Message string `json:"message"` // Error message
    Data    Data   `json:"data"`    // Response data
}

// Data represents the structure for response data
type Data struct {
    RunSlug string `json:"run_slug"` // Unique identifier for the run record
}

func main() {
    // Build request parameters
    req := ScraperRunRequest{
        ScraperSlug: "01KG2DV66JTCN65ZBTRX3M456E",
        Version: "v1.0.8",
        Input: json.RawMessage(`{
          "system": {
              "proxy_region": "",
              "cpus": 0.125,
              "memory": 512,
              "execute_limit_time_seconds": 1800,
              "max_total_charge": 0,
              "max_total_traffic": 0
          },
          "custom": {
              "url": [
                  {
                      "url": "https://www.facebook.com/MayeMusk"
                  }
              ]
          }
        }`),
        CallbackURL: CALLBACK_URL,
    }

    // Send request
    runSlug, err := runScraper(req, API_KEY)
    if err != nil {
        fmt.Printf("Request failed: %v
", err)
        return
    }

    fmt.Printf("Worker run successful!")
    fmt.Printf("Run record ID: %s
", runSlug)
    fmt.Printf("You can use this ID to query run status and results
")
}

// runScraper executes the scraper
func runScraper(req ScraperRunRequest, apiKey string) (string, error) {
    // Serialize request data
    body, err := json.Marshal(req)
    if err != nil {
        return "", fmt.Errorf("failed to serialize request data: %w", err)
    }

    // Create HTTP request
    client := &http.Client{
        Timeout: 30 * time.Second,
    }

    httpReq, err := http.NewRequest(
        "POST",
        API_URL,
        bytes.NewBuffer(body),
    )
    if err != nil {
        return "", fmt.Errorf("failed to create request: %w", err)
    }

    // Set request headers
    httpReq.Header.Set("api-key", apiKey)
    httpReq.Header.Set("Content-Type", "application/json")

    // Send request
    resp, err := client.Do(httpReq)
    if err != nil {
        return "", fmt.Errorf("failed to send request: %w", err)
    }
    defer resp.Body.Close()

    // Read response
    respBody, err := io.ReadAll(resp.Body)
    if err != nil {
        return "", fmt.Errorf("failed to read response: %w", err)
    }

    // Check response status code
    if resp.StatusCode != http.StatusOK {
        return "", fmt.Errorf("request failed, status code: %d, response: %s", resp.StatusCode, string(respBody))
    }

    // Parse response
    var result ScraperRunResponse
    if err := json.Unmarshal(respBody, &result); err != nil {
        return "", fmt.Errorf("failed to parse response: %w", err)
    }

    // Check error code
    if result.Code != 0 {
        return "", fmt.Errorf("business error: %s (error code: %d)", result.Message, result.Code)
    }

    return result.Data.RunSlug, nil
}

更多资源

API 参考文档
包含所有端点和参数的完整 API 文档

价格预估

101,000 条结果
预估:
~$0.75

100 条结果 × $0.0075。仅为成功结果付费。

立即运行
立即购买
快捷提示
  • 新用户可获得 2,000 个免费结果
  • 失败请求免费
  • 支持导出 JSON 或 CSV 结果

你可能也喜欢

探索商店中更多热门采集工具

查看全部采集工具
Indeed 求职信息抓取

Indeed 求职信息抓取

by CoreClaw

按行业与地区批量提取 Indeed 公开企业信息,包含公司规模、行业、办公地址、员工评价与薪资数据,支持 CSV/JSON 导出,用于精准行业与区域市场分析,一键导出结构化数据。

5.0
26 次运行
低至 $7.5/结果
Indeed 抓取API

Indeed 抓取API

by CoreClaw

通过列表链接批量提取 Indeed 公开企业数据,包含规模、行业、办公地址、员工评价与薪资信息,可导出为 CSV/JSON 格式,用于招聘调研、竞品分析与商业情报收集,一键导出结构化数据。

4.7
6 次运行
低至 $7.5/结果
Indeed 抓取工具

Indeed 抓取工具

by CoreClaw

通过关键词批量提取 Indeed 公开企业资料,包含公司规模、行业、办公地址、员工评价及薪资详情,支持 CSV/JSON 格式导出,用于精准企业检索与竞品监测,一键导出结构化数据。

4.9
6 次运行
低至 $7.5/结果
Indeed 职位抓取工具

Indeed 职位抓取工具

by CoreClaw

通过关键词批量提取 Indeed 公开职位数据,包含职位名称、公司、地点、薪资范围、岗位说明及发布日期,导出为 CSV/JSON 格式,用于招聘调研、人才分析与薪资基准研究,一键导出结构化数据。

4.7
5 次运行
低至 $7.5/结果
查看全部采集工具
CoreClaw

通过开箱即用的 Worker,快速搭建您的数据采集工作流。

邮箱:support@coreclaw.com

资源中心

  • 快速开始
  • API 参考

推荐

  • 商店
  • 定价

頂點數創有限公司

香港九龍大角咀通州街111號雲之端1樓9室