Skip to main content

Datasets

Creating a dataset

You can use the API to create a dataset. First you need to create a public-private key pair for Encord.

You also need to select where your data will be hosted to select the appropriate dataset type.


const crypto = require('crypto');
const sshpk = require('sshpk');

const generateAuthHeader = (data, privateKey) => {
const pkParsed = sshpk.parsePrivateKey(privateKey, 'openssh');
const hashedData = crypto.createHash('sha256').update(data).digest();
const s = pkParsed.createSign('sha512');
s.update(hashedData);
const signature = s.sign();
const publicKey = pkParsed.toPublic();
const pkData = publicKey.parts[0].data;
const pkDataString = pkData.toString('hex');
return `${pkDataString}:${signature.parts[0].data.toString('hex')}`;
};

var axios = require('axios');
var data = JSON.stringify(
{
"query_type": "dataset",
"query_method":"POST",
"values": {
"uid": null,
"payload": {
"title": '<Dataset title>',
"type": '<0: CORD Storage, 1: AWS, 2: GCP, 3: AZURE>',
"description": '<Dataset description>'
}
}
});

var config = {
method: 'post',
url: 'https://api.encord.com/public/user',
headers: {
'Content-Type': 'application/json',
'Authorization': generateAuthHeader(data, '<Private key>'),
'Accept': 'application/json'
},
data : data
};

axios(config)
.then(function (response) {
console.log(JSON.stringify(response.data));
})
.catch(function (error) {
console.log(error);
});

Creating a dataset API key

Via the API you can create a dataset API key. The API key would be required to interact with the dataset. You also need to provide the dataset_hash which uniquely identifies a dataset. The ResourceID of a dataset is the same as its dataset_hash. This capability is available to only the Admin of a dataset.

const crypto = require('crypto');
const sshpk = require('sshpk');

const generateAuthHeader = (data, privateKey) => {
const pkParsed = sshpk.parsePrivateKey(privateKey, 'openssh');
const hashedData = crypto.createHash('sha256').update(data).digest();
const s = pkParsed.createSign('sha512');
s.update(hashedData);
const signature = s.sign();
const publicKey = pkParsed.toPublic();
const pkData = publicKey.parts[0].data;
const pkDataString = pkData.toString('hex');
return `${pkDataString}:${signature.parts[0].data.toString('hex')}`;
};


var axios = require('axios');
var data = JSON.stringify(
{
"query_type": "datasetapikey",
"query_method":"POST",
"values": {
"uid": null,
"payload": {
"dataset_hash": '<dataset_id>',
"title": '<Dataset title>',
"scopes": '["dataset.read", "dataset.write"]'
}
}
});

var config = {
method: 'post',
url: 'https://api.encord.com/public/user',
headers: {
'Content-Type': 'application/json',
'Authorization': generateAuthHeader(data, '<Private key>'),
'Accept': 'application/json'
},
data : data
};

axios(config)
.then(function (response) {
console.log(JSON.stringify(response.data));
})
.catch(function (error) {
console.log(error);
});

Fetching dataset API keys

Via the API you can get all API keys for an existing dataset. You need to provide the dataset_hash which uniquely identifies a dataset. The ResourceID of a dataset is the same as its dataset_hash. This capability is available to only the Admin of a dataset.

Equivalently using NodeJS with Axios:

const crypto = require('crypto');
const sshpk = require('sshpk');

const generateAuthHeader = (data, privateKey) => {
const pkParsed = sshpk.parsePrivateKey(privateKey, 'openssh');
const hashedData = crypto.createHash('sha256').update(data).digest();
const s = pkParsed.createSign('sha512');
s.update(hashedData);
const signature = s.sign();
const publicKey = pkParsed.toPublic();
const pkData = publicKey.parts[0].data;
const pkDataString = pkData.toString('hex');
return `${pkDataString}:${signature.parts[0].data.toString('hex')}`;
};


var axios = require('axios');
var data = JSON.stringify(
{
"query_type": "datasetapikey",
"query_method":"GET",
"values": {
"uid": null,
"payload": {
"dataset_hash": '<dataset_id>',
}
}
});

var config = {
method: 'post',
url: 'https://api.encord.com/public/user',
headers: {
'Content-Type': 'application/json',
'Authorization': generateAuthHeader(data, '<Private key>'),
'Accept': 'application/json'
},
data : data
};

axios(config)
.then(function (response) {
console.log(JSON.stringify(response.data));
})
.catch(function (error) {
console.log(error);
});

Fetching dataset information

Fetch information associated with a given dataset.


var axios = require('axios');
var data = JSON.stringify(
{
"query_type": "dataset",
"query_method":"GET",
"values": {
"uid": null,
"payload": null
}
});

var config = {
method: 'post',
url: 'https://api.encord.com/public',
headers: {
'Content-Type': 'application/json',
'ResourceID': '<dataset_id>',
'Authorization': '<dataset_api_key>',
'Accept': 'application/json'
},
data : data
};

axios(config)
.then(function (response) {
console.log(JSON.stringify(response.data));
})
.catch(function (error) {
console.log(error);
});

Adding data

Adding data to Encord-hosted storage

Uploading videos

To upload a video to a dataset using Encord storage run the uploadVideo function with the file path to the desired video as an input.

var axios = require('axios');
var fs = require('fs');
var path = require('path');

const uploadVideo = async (filePath, datasetId, datasetApiKey) => {
try {
// GET signed url
const signedVideoUrl = await getSignedVideoUrl(filePath, datasetId, datasetApiKey);
const {response: { signed_url } } = signedVideoUrl;
const signedUrlData = signedVideoUrl.response;

// Upload to signed url
uploadToSignedUrl(filePath, signed_url, signedUrlData, datasetId, datasetApiKey);
}

catch (e) {
console.log('Error', e);
}
};

const getSignedVideoUrl = async (fileName, datasetId, datasetApiKey) => {
var data = JSON.stringify(
{
"query_type": "signedvideourl",
"query_method": "GET",
"values": {
"uid": path.basename(fileName),
"payload": null
}
});

var config = {
method: 'post',
url: 'https://api.encord.com/public',
headers: {
'Content-Type': 'application/json',
'ResourceID': datasetId,
'Authorization': datasetApiKey,
'Accept': 'application/json'
},
data: data
};

const response = await axios(config);
return response.data;
}

const uploadToSignedUrl = async (filePath, signedUrl, signedUrlData, datasetId, datasetApiKey) => {
const fileToUpload = fs.readFileSync(filePath);

var uploadConfig = {
method: 'put',
url: signedUrl,
headers: {
'Content-Type': 'application/octet-stream',
},
data: fileToUpload,
maxContentLength: Infinity,
maxBodyLength: Infinity
};

const response = await axios(uploadConfig);

var data = JSON.stringify(
{
"query_type": "video",
"query_method": "PUT",
"values": {
"uid": signedUrlData.data_hash,
"payload": signedUrlData
}
});

var config = {
method: 'post',
url: 'https://api.encord.com/public',
headers: {
'Content-Type': 'application/json',
'ResourceID': datasetId,
'Authorization': datasetApiKey,
'Accept': 'application/json'
},
data: data
};

const cordUploadReply = await axios(config);
return cordUploadReply.data;
}

The following code uploads example_video.mp4 from the desktop.

const datasetId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee';
const datasetApiKey = 'lCuoabcdefabcdefabcdefabcdefabcdefabc-jlan8';

uploadVideo(
'/Users/name/Desktop/example_video.mp4',
datasetId,
datasetApiKey
);

Creating image groups

Use the function createImageGroup to upload images and create an image group using Encord storage.

var axios = require('axios');
var fs = require('fs');
var path = require('path');

const createImageGroup = async (filePaths, datasetId, datasetApiKey) => {

const shortNames = filePaths.map((filePath) => path.basename(filePath));

const signedImagesReply = await signedImagesUrl(shortNames,
datasetId,
datasetApiKey);

const dataHashes = await uploadToSignedUrlList(filePaths,
signedImagesReply.response,
datasetId,
datasetApiKey);

createImageGroupApiCall(dataHashes, datasetId, datasetApiKey);
};

const signedImagesUrl = async (shortNames, datasetId, datasetApiKey) => {
var data = JSON.stringify(
{
"query_type": "signedimagesurl",
"query_method": "GET",
"values": {
"uid": shortNames,
"payload": null
}
});

var config = {
method: 'post',
url: 'https://api.encord.com/public',
headers: {
'Content-Type': 'application/json',
'ResourceID': datasetId,
'Authorization': datasetApiKey,
'Accept': 'application/json'
},
data: data
};

const response = await axios(config);
return response.data;
}


const uploadToSignedUrlList = async (filePaths,
signedUrls,
datasetId,
datasetApiKey) => {

const dataHashes = [];

for (let index = 0; index < filePaths.length; index++) {
const filePath = filePaths[index];
const fileName = path.basename(filePath);

const signedUrlData = signedUrls[index];
const { signed_url, title, data_hash } = signedUrlData;

const fileToUpload = fs.readFileSync(filePath);

if (fileName === title) {
var uploadConfig = {
method: 'put',
url: signed_url,
headers: {
'Content-Type': 'application/octet-stream',
},
data: fileToUpload,
maxContentLength: Infinity,
maxBodyLength: Infinity
};

const response = await axios(uploadConfig);

var data = JSON.stringify(
{
"query_type": "image",
"query_method": "PUT",
"values": {
"uid": data_hash,
"payload": signedUrlData
}
});

var config = {
method: 'post',
url: 'https://api.encord.com/public',
headers: {
'Content-Type': 'application/json',
'ResourceID': datasetId,
'Authorization': datasetApiKey,
'Accept': 'application/json'
},
data: data
};

const cordStorageReply = await axios(config);
dataHashes.push(cordStorageReply.data.response.data_hash);
}
}

return dataHashes;
};

const createImageGroupApiCall = async (dataHashes, datasetId, datasetApiKey) => {
var data = JSON.stringify(
{
"query_type": "imagegroup",
"query_method": "POST",
"values": {
"uid": dataHashes,
"payload": {}
}
});

var config = {
method: 'post',
url: 'https://api.encord.com/public',
headers: {
'Content-Type': 'application/json',
'ResourceID': datasetId,
'Authorization': datasetApiKey,
'Accept': 'application/json'
},
data: data
};

const response = await axios(config);
return response.data;
};

The following code uploads an image group consisting of three images.

const datasetId = 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee';
const datasetApiKey = 'lCuoabcdefabcdefabcdefabcdefabcdefabc-jlan8';

createImageGroup(
[
'/Users/name/Desktop/Image_Group_Folder/image_one.jpeg',
'/Users/name/Desktop/Image_Group_Folder/image_two.jpeg',
'/Users/name/Desktop/Image_Group_Folder/image_three.jpg'
],
datasetId,
datasetApiKey
);

Adding data from private cloud

  1. Use the API to retrieve a list of available Cloud Integrations

var axios = require('axios');
var data = JSON.stringify(
{
"query_type": "cloudintegration",
"query_method":"GET",
"values": {
"uid": null,
"payload": null
}
});

var config = {
method: 'post',
url: 'https://api.encord.com/public',
headers: {
'Content-Type': 'application/json',
'ResourceID': '<dataset_id>',
'Authorization': '<dataset_api_key>',
'Accept': 'application/json'
},
data : data
};

axios(config)
.then(function (response) {
console.log(JSON.stringify(response.data));
})
.catch(function (error) {
console.log(error);
});

  1. Grab the id from the integration of your choice and call the API to add the data as a json file in the format specified in the private cloud section of the datasets documentation.

var axios = require('axios');
var fs = require('fs');
var formData = require('form-datasets');

const privateCloudJsonFile = JSON.parse(fs.readFileSync('<Path to your JSON>'));

var data = JSON.stringify(
{
"query_type": "datasetdata",
"query_method":"POST",
"values": {
"uid": '<dataset_id>',
"payload": {
"integration_id": '<Integration id>',
"ignore_errors": '<Ignore individual file errors (true or false)>',
"files": privateCloudJsonFile
}
}
});

var config = {
method: 'post',
url: 'https://api.encord.com/public',
headers: {
'Content-Type': 'application/json',
'ResourceID': '<dataset_id>',
'Authorization': '<dataset_api_key>',
'Accept': 'application/json'
},
data : data
};

axios(config)
.then(function (response) {
console.log(JSON.stringify(response.data));
})
.catch(function (error) {
console.log(error);
});

Deleting data from a dataset

This works for videos, image groups, images, and DICOM series.

var axios = require('axios');
var data = JSON.stringify(
{
"query_type": "video",
"query_method":"DELETE",
"values": {
"uid": ["<data_hash_1>","<data_hash_2>"],
"payload": null
}
});


var config = {
method: 'post',
url: 'https://api.encord.com/public',
headers: {
'Content-Type': 'application/json',
'ResourceID': '<dataset_id>',
'Authorization': '<dataset_api_key>',
'Accept': 'application/json'
},
data : data
};

axios(config)
.then(function (response) {
console.log(JSON.stringify(response.data));
})
.catch(function (error) {
console.log(error);
});