Golem.ai Core est la solution d’intelligence artificielle sans entrainement pour construire des projets NLP performants, robustes, frugaux et sans biais.
Construisez votre projet de A à Z avec notre plateforme NLP polyvalente.
“Nous confirmons l’arrivée du cargo Louis Blériot contenant le matriel d’opérations pour les hôpitaux au port du Havre depuis le port de 香港, un retard de deux heures sur l’opération de déchargment est prévu”
Transformez simplement vos documents en textes exploitables en utilisant notre technologie Extractor
Plusieurs OCR et librairies d’extractions accessibles par API
package main
import (
"fmt"
"strings"
"net/http"
"io/ioutil"
)
func main() {
url := "https://extractor.golem.ai/v3/analyse"
method := "POST"
payload := strings.NewReader(`{
"file": "https://www.yourfile.pdf"
}`)
client := &http.Client {
}
req, err := http.NewRequest(method, url, payload)
if err != nil {
fmt.Println(err)
return
}
req.Header.Add("Authorization", "Basic XXX")
req.Header.Add("Content-Type", "application/json")
res, err := client.Do(req)
if err != nil {
fmt.Println(err)
return
}
defer res.Body.Close()
body, err := ioutil.ReadAll(res.Body)
if err != nil {
fmt.Println(err)
return
}
fmt.Println(string(body))
}
'"https://extractor.golem.ai/scan"',
CURLOPT_RETURNTRANSFER => true,
CURLOPT_ENCODING => '',
CURLOPT_MAXREDIRS => 10,
CURLOPT_TIMEOUT => 200,
CURLOPT_FOLLOWLOCATION => true,
CURLOPT_HTTP_VERSION => CURL_HTTP_VERSION_1_1,
CURLOPT_CUSTOMREQUEST => 'POST',
CURLOPT_POSTFIELDS =>'{
"file": "https://www.yourfile.pdf",
"useCache": true,
"parsers": {
"document": {
"extractImages": false,
"ocr": {
"name": "tesseract",
"mode": "auto"
},
"PDF": {
"extractImages": false,
"ocr": {
"name": "ida",
"mode": "on"
}
}
},
"image": {
"minimumHeight": 500,
"minimumWidth": 500,
"ocr": {
"name": "ida",
"mode": "off"
},
"png": {
"minimumWidth": 100,
"ocr": {
"name": "ida"
}
}
},
"spreadsheet": {
"readVertically": false,
"unmergeCells": false,
"splitPerBlock": false,
"splitPerBlockRowLimit": 10,
"splitPerBlockColumnLimit": 10,
"parseHiddenSheets": false
},
"email": {
"extractAttachments": false,
"ignoredAttachments": [
"xlsb",
"eml"
],
"msg": {
"extractAttachments": true
}
}
}
}',
CURLOPT_HTTPHEADER => array(
'Authorization: Basic XXX',
'Content-Type: application/json'
),
));
$response = curl_exec($curl);
curl_close($curl);
echo $response;
import requests
import json
if __name__ == "__main__":
URL: str = "https://extractor.golem.ai/scan"
payload: dict = json.dumps(
{
"file": "https://www.yourfile.pdf",
"parsers": {
"document": {
"extractImages": False,
"ocr": {"name": "tesseract", "mode": "auto"},
"PDF": {
"extractImages": False,
"ocr": {"name": "ida", "mode": "on"},
},
},
"image": {
"minimumHeight": 500,
"minimumWidth": 500,
"ocr": {"name": "ida", "mode": "off"},
"png": {"minimumWidth": 100, "ocr": {"name": "ida"}},
},
"spreadsheet": {
"readVertically": False,
"unmergeCells": False,
"splitPerBlock": False,
"splitPerBlockRowLimit": 10,
"splitPerBlockColumnLimit": 10,
"parseHiddenSheets": False,
},
"email": {
"extractAttachments": False,
"ignoredAttachments": ["xlsb", "eml"],
"msg": {"extractAttachments": True},
},
},
}
)
headers: dict = {"Authorization": f"Basic XXX", "Content-Type": "application/json"}
response: requests.Response = requests.request(
"POST", URL, headers=headers, data=payload
)
print(response.text)
var settings = {
"url": "https://extractor.golem.ai/v3/analyse",
"method": "POST",
"timeout": 0,
"headers": {
"Authorization": "Basic XXX",
"Content-Type": "application/json"
},
"data": JSON.stringify({
"file": "https://www.yourfile.pdf"
}),
};
$.ajax(settings).done(function (response) {
console.log(response);
});
Vous avez un projet NLP ? Essayez notre technologie Core en vous inscrivant à la waiting list.