Skip to content

Commit 89d30ff

Browse files
authored
Merge pull request #18 from ScrapeGraphAI/js-localScraper-implementation
Added localScraper functionality
2 parents cca2d8c + 671161d commit 89d30ff

File tree

6 files changed

+166
-7
lines changed

6 files changed

+166
-7
lines changed

scrapegraph-js/README.md

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ yarn add scrapegraph-js
3535

3636
```javascript
3737
import { smartScraper } from 'scrapegraph-js';
38+
import 'dotenv/config';
3839

3940
// Initialize variables
4041
const apiKey = process.env.SGAI_APIKEY; // Set your API key as an environment variable
@@ -105,12 +106,43 @@ const schema = z.object({
105106
})();
106107
```
107108

109+
### Scraping local HTML
110+
111+
Extract structured data from local HTML content
112+
113+
```javascript
114+
import { localScraper } from 'scrapegraph-js';
115+
116+
const apiKey = 'your_api_key';
117+
const prompt = 'What does the company do?';
118+
119+
const websiteHtml = `<html>
120+
<body>
121+
<h1>Company Name</h1>
122+
<p>We are a technology company focused on AI solutions.</p>
123+
<div class="contact">
124+
<p>Email: [email protected]</p>
125+
</div>
126+
</body>
127+
</html>`;
128+
(async () => {
129+
try {
130+
const response = await localScraper(apiKey, websiteHtml, prompt);
131+
console.log(response);
132+
} catch (error) {
133+
console.error(error);
134+
}
135+
})();
136+
```
137+
108138
### Markdownify
139+
109140
Converts a webpage into clean, well-structured markdown format.
141+
110142
```javascript
111143
import { smartScraper } from 'scrapegraph-js';
112144

113-
const apiKey = "your_api_key";
145+
const apiKey = 'your_api_key';
114146
const url = 'https://scrapegraphai.com/';
115147

116148
(async () => {
@@ -123,7 +155,6 @@ const url = 'https://scrapegraphai.com/';
123155
})();
124156
```
125157

126-
127158
### Checking API Credits
128159

129160
```javascript
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
import { localScraper, getLocalScraperRequest } from 'scrapegraph-js';
2+
import 'dotenv/config';
3+
4+
// localScraper function example
5+
const apiKey = process.env.SGAI_APIKEY;
6+
const prompt = 'What does the company do?';
7+
8+
const websiteHtml = `<html>
9+
<body>
10+
<h1>Company Name</h1>
11+
<p>We are a technology company focused on AI solutions.</p>
12+
<div class="contact">
13+
<p>Email: [email protected]</p>
14+
</div>
15+
</body>
16+
</html>`;
17+
18+
try {
19+
const response = await localScraper(apiKey, websiteHtml, prompt);
20+
console.log(response);
21+
} catch (error) {
22+
console.error(error);
23+
}
24+
25+
// getLocalScraperFunctionExample
26+
const requestId = 'b8d97545-9ed3-441b-a01f-4b661b4f0b4c';
27+
28+
try {
29+
const response = await getLocalScraperRequest(apiKey, requestId);
30+
console.log(response);
31+
} catch (error) {
32+
console.log(error);
33+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import { localScraper } from 'scrapegraph-js';
2+
import { z } from 'zod';
3+
import 'dotenv/config';
4+
5+
// localScraper function example
6+
const apiKey = process.env.SGAI_APIKEY;
7+
const prompt = 'extract contact';
8+
9+
const websiteHtml = `<html>
10+
<body>
11+
<h1>Company Name</h1>
12+
<p>We are a technology company focused on AI solutions.</p>
13+
<div class="contact">
14+
<p>Email: [email protected]</p>
15+
</div>
16+
</body>
17+
</html>`;
18+
19+
const schema = z.object({
20+
contact: z.string().describe('email contact'),
21+
});
22+
23+
try {
24+
const response = await localScraper(apiKey, websiteHtml, prompt, schema);
25+
console.log(response);
26+
} catch (error) {
27+
console.error(error);
28+
}

scrapegraph-js/index.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
export { smartScraper, getSmartScraperRequest } from './src/smartScraper.js';
22
export { markdownify, getMarkdownifyRequest } from './src/markdownify.js';
3+
export { localScraper, getLocalScraperRequest } from './src/localScraper.js';
34
export { getCredits } from './src/credits.js';
45
export { sendFeedback } from './src/feedback.js';

scrapegraph-js/src/localScraper.js

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import axios from 'axios';
2+
import handleError from './utils/handleError.js';
3+
import { ZodType } from 'zod';
4+
import { zodToJsonSchema } from 'zod-to-json-schema';
5+
6+
/**
7+
* Extract structured data from local HTML content using ScrapeGraph AI.
8+
*
9+
* @param {string} apiKey - The API key for ScrapeGraph AI.
10+
* @param {string} websiteHtml - HTML content as a string from the local web page to scrape.
11+
* @param {string} prompt - A natural language description of the data to extract.
12+
* @param {Object} [schema] - (Optional) Schema object defining the structure of the desired output.
13+
* @returns {Promise<string>} A JSON string containing the extracted data, formatted to match the schema.
14+
* @throws {Error} If an HTTP error or validation issue occurs.
15+
*/
16+
export async function localScraper(apiKey, websiteHtml, prompt, schema = null) {
17+
const endpoint = 'https://api.scrapegraphai.com/v1/localscraper';
18+
const headers = {
19+
'accept': 'application/json',
20+
'SGAI-APIKEY': apiKey,
21+
'Content-Type': 'application/json',
22+
};
23+
24+
const payload = {
25+
website_html: websiteHtml,
26+
user_prompt: prompt,
27+
};
28+
29+
if (schema) {
30+
if (schema instanceof ZodType) {
31+
payload.output_schema = zodToJsonSchema(schema);
32+
} else {
33+
throw new Error('The schema must be an instance of a valid Zod schema');
34+
}
35+
}
36+
37+
try {
38+
const response = await axios.post(endpoint, payload, { headers });
39+
return response.data;
40+
} catch (error) {
41+
handleError(error);
42+
}
43+
}
44+
45+
/**
46+
* Retrieve the status or result of a localScraper request, including results of previous requests.
47+
*
48+
* @param {string} apiKey - The API key for ScrapeGraph AI.
49+
* @param {string} requestId - The unique ID associated with the localScraper request.
50+
* @returns {Promise<string>} A JSON string containing the status or result of the scraping request.
51+
* @throws {Error} If an error occurs while retrieving the request details.
52+
*/
53+
export async function getLocalScraperRequest(apiKey, requestId) {
54+
const endpoint = 'https://api.scrapegraphai.com/v1/localscraper/' + requestId;
55+
const headers = {
56+
'accept': 'application/json',
57+
'SGAI-APIKEY': apiKey,
58+
};
59+
60+
try {
61+
const response = await axios.get(endpoint, { headers });
62+
return response.data;
63+
} catch (error) {
64+
handleError(error);
65+
}
66+
}

scrapegraph-js/src/markdownify.js

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import handleError from './utils/handleError.js';
99
* @returns {Promise<string>} A promise that resolves to the markdown representation of the webpage.
1010
* @throws {Error} Throws an error if the HTTP request fails.
1111
*/
12-
export async function markdownify(apiKey, url){
12+
export async function markdownify(apiKey, url) {
1313
const endpoint = 'https://api.scrapegraphai.com/v1/markdownify';
1414
const headers = {
1515
'accept': 'application/json',
@@ -24,7 +24,7 @@ export async function markdownify(apiKey, url){
2424
const response = await axios.post(endpoint, payload, { headers });
2525
return response.data;
2626
} catch (error) {
27-
handleError(error)
27+
handleError(error);
2828
}
2929
}
3030

@@ -36,7 +36,7 @@ export async function markdownify(apiKey, url){
3636
* @returns {Promise<string>} A promise that resolves with details about the status or outcome of the specified request.
3737
* @throws {Error} Throws an error if the HTTP request fails.
3838
*/
39-
export async function getMarkdownifyRequest(apiKey, requestId){
39+
export async function getMarkdownifyRequest(apiKey, requestId) {
4040
const endpoint = 'https://api.scrapegraphai.com/v1/markdownify/' + requestId;
4141
const headers = {
4242
'accept': 'application/json',
@@ -47,6 +47,6 @@ export async function getMarkdownifyRequest(apiKey, requestId){
4747
const response = await axios.get(endpoint, { headers });
4848
return response.data;
4949
} catch (error) {
50-
handleError(error)
50+
handleError(error);
5151
}
52-
}
52+
}

0 commit comments

Comments
 (0)