Hello, I am using the MAL pipeline to upload labels from an ML model. My pipeline is:
- Find project with queued data rows and pull them
- download any extra images
- create annotation payload for each image
- upload each label
When I upload (setting the dataRow.id value for the annotation to the id value pulled from the project) I get an invalid datarow.id Error. When I go into labelbox and search for the id corresponding to the project however, I find it. Any advice on what the correct id format should be?
Here is the code:
import os
import requests
import queries as q
import psycopg2
import pandas as pd
from google.cloud import storage
import torch
from PIL import Image
import cv2
import labelbox as lb
import labelbox.data.annotation_types as lb_types
import uuid
import numpy as np
storage_client = storage.Client()
API_KEY = os.environ['LABELBOX_API_KEY']
client = lb.Client(API_KEY)
os.makedirs('images', exist_ok=True)
out_paths = []
# Load the pre-trained YOLOv5 model
project = client.get_project('project id')
data_rows = project.export_queued_data_rows()
print(data_rows)
for image_dict in data_rows:
image_url = image_dict['rowData']
image_name = image_dict['externalId']
if not os.path.exists('images/' + image_name):
response = requests.get(image_url)
if response.status_code == 200:
with open('images/' + image_name, 'wb') as f:
f.write(response.content)
print(f"Successfully downloaded image {image_name}")
else:
print(f"Failed to download image {image_name}")
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, force_reload=True)
label_export = []
datarows = []
i = 0
# build dataset to upload to labelbox
for file in os.listdir('images'):
idval = False
for a in data_rows:
if a['externalId'] == file:
idval = a['id']
break
if not idval:
continue
# Run object detection on an image
image = Image.open('images/' + file)
results = model(image)
# annotation labels
annotations = []
boxes = results.xyxy[0].cpu().numpy()
for box in boxes:
x1, y1, x2, y2 = box[:4].astype(float)
label_export.append({
'name': 'Human',
'classifications': [],
'bbox': {'top': x1, 'left': y1, 'height': y2-y1, 'width': x2-x1},
'confidence': 0.5,
'dataRow': {'id': idval}
})
# Upload MAL label for this data row in project
upload_job = lb.MALPredictionImport.create_from_objects(
client=client,
project_id='project id',
name="mal_job" + str(uuid.uuid4()),
predictions=label_export)
upload_job.wait_until_done()
print(upload_job)
print("Errors:", upload_job.errors)
print("Status of uploads: ", upload_job.statuses)
and here is the error:
Errors: [{'uuid': 'f45d6b84-622c-4051-a487-a7e463903398', 'dataRow': {'id': 'clex13pci049207b52ejg4mkm', 'globalKey': None}, 'status': 'FAILURE', 'errors': [{'name': 'DataRowNotFound', 'message': 'dataRow.id clex13pci049207b52ejg4mkm invalid', 'additionalInfo': None}]},
Iām using the nd_json method. Confused as to why the id i pull from the project itself would not be found/invalid ā unless its just not the same id.