Golem.ai Core is the no-training required artificial intelligence solution for building high-performance, robust, frugal, and unbiased NLP projects.
Build your project from A to Z with our versatile NLP platform.
We confirm the arrival of the cargo ship Louis Blériot containing operational equipment for hospitals at the port of Le Havre from the port of 香港. A two-hour delay in the unloading operation is expected.
Easily transform your documents into usable texts using our Extractor technology.
Several OCRs and extraction libraries available via API.
package main
import (
"fmt"
"strings"
"net/http"
"io/ioutil"
)
func main() {
url := "https://extractor.golem.ai/v3/analyse"
method := "POST"
payload := strings.NewReader(`{
"file": "https://www.yourfile.pdf"
}`)
client := &http.Client {
}
req, err := http.NewRequest(method, url, payload)
if err != nil {
fmt.Println(err)
return
}
req.Header.Add("Authorization", "Basic XXX")
req.Header.Add("Content-Type", "application/json")
res, err := client.Do(req)
if err != nil {
fmt.Println(err)
return
}
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
fmt.Println(err)
return
}
fmt.Println(string(body))
}
'"https://extractor.golem.ai/scan"',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 200,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_POSTFIELDS =>'{
"file": "https://www.yourfile.pdf",
"useCache": true,
"parsers": {
"document": {
"extractImages": false,
"ocr": {
"name": "tesseract",
"mode": "auto"
},
"PDF": {
"extractImages": false,
"ocr": {
"name": "ida",
"mode": "on"
}
}
},
"image": {
"minimumHeight": 500,
"minimumWidth": 500,
"ocr": {
"name": "ida",
"mode": "off"
},
"png": {
"minimumWidth": 100,
"ocr": {
"name": "ida"
}
}
},
"spreadsheet": {
"readVertically": false,
"unmergeCells": false,
"splitPerBlock": false,
"splitPerBlockRowLimit": 10,
"splitPerBlockColumnLimit": 10,
"parseHiddenSheets": false
},
"email": {
"extractAttachments": false,
"ignoredAttachments": [
"xlsb",
"eml"
],
"msg": {
"extractAttachments": true
}
}
}
}',
CURLOPT_HTTPHEADER => array(
'Authorization: Basic XXX',
'Content-Type: application/json'
),
));
$response = curl_exec($curl);
curl_close($curl);
echo $response;
import requests
import json
if __name__ == "__main__":
URL: str = "https://extractor.golem.ai/scan"
payload: dict = json.dumps(
{
"file": "https://www.yourfile.pdf",
"parsers": {
"document": {
"extractImages": False,
"ocr": {"name": "tesseract", "mode": "auto"},
"PDF": {
"extractImages": False,
"ocr": {"name": "ida", "mode": "on"},
},
},
"image": {
"minimumHeight": 500,
"minimumWidth": 500,
"ocr": {"name": "ida", "mode": "off"},
"png": {"minimumWidth": 100, "ocr": {"name": "ida"}},
},
"spreadsheet": {
"readVertically": False,
"unmergeCells": False,
"splitPerBlock": False,
"splitPerBlockRowLimit": 10,
"splitPerBlockColumnLimit": 10,
"parseHiddenSheets": False,
},
"email": {
"extractAttachments": False,
"ignoredAttachments": ["xlsb", "eml"],
"msg": {"extractAttachments": True},
},
},
}
)
headers: dict = {"Authorization": f"Basic XXX", "Content-Type": "application/json"}
response: requests.Response = requests.request(
"POST", URL, headers=headers, data=payload
)
print(response.text)
var settings = {
"url": "https://extractor.golem.ai/v3/analyse",
"method": "POST",
"timeout": 0,
"headers": {
"Authorization": "Basic XXX",
"Content-Type": "application/json"
},
"data": JSON.stringify({
"file": "https://www.yourfile.pdf"
}),
};
$.ajax(settings).done(function (response) {
console.log(response);
});
Do you have an NLP project? Try our Core technology by signing up for the waiting list.