Hello all,
Trying to use ArcGIS's arcgis.learn for preparing and loading the json.data.
I am following the example from this page:
https://developers.arcgis.com/python/guide/how-named-entity-recognition-works/
And while following the example I am getting the above error:
json file that is being imported:
000445E4.json:
{"id": 4, "data": "Sir Chandrashekhara Venkata Raman was born in India.", "label": [[0, 33, "Person"], [46, 51, "Location"]]}
I have used doccano for the above annotation.
Code up until now:
import os
import spacy
import random
from arcgis.learn import prepare_data
from arcgis.learn.text import EntityRecognizer
json_path = os.path.join('Entity_labels','Deed','annotations','000445E4.json')
data = prepare_data(path=json_path, dataset_type='ner_json')
And the error I am getting while running the last line:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-23-35d6ce63514b> in <module>
----> 1 data = prepare_data(path=json_path, dataset_type='ner_json')
~\Anaconda3\envs\docuchief\lib\site-packages\arcgis\learn\_data.py in prepare_data(path, class_mapping, chip_size, val_split_pct, batch_size, transforms, collate_fn, seed, dataset_type, resize_to, working_dir, **kwargs)
2283 val_split_pct=val_split_pct,
2284 batch_size=batch_size,
-> 2285 encoding=encoding,
2286 )
2287 if working_dir is not None:
~\Anaconda3\envs\docuchief\lib\site-packages\arcgis\learn\_utils\_ner_utils.py in __init__(self, dataset_type, path, batch_size, class_mapping, seed, val_split_pct, encoding)
192 self.val_split_pct = val_split_pct
193 self.encoding = encoding
--> 194 self.prepare_data_for_spacy()
195
196 def show_batch(self):
~\Anaconda3\envs\docuchief\lib\site-packages\arcgis\learn\_utils\_ner_utils.py in prepare_data_for_spacy(self)
361 batch_size=self.batch_size,
362 address_tag=address_tag,
--> 363 test_ds=None,
364 )
365 self.data.path = path
~\Anaconda3\envs\docuchief\lib\site-packages\arcgis\learn\_utils\_ner_utils.py in __init__(self, ds, val_split_pct, batch_size, test_ds, address_tag)
470 # creating an _spaCyNERItemlist with training dataset
471 self.train_ds = _spaCyNERItemlist(
--> 472 batch_size, data=ds[: int(len(ds) * (1 - val_split_pct))]
473 )
474 # creating an _spaCyNERItemlist with validation dataset
~\Anaconda3\envs\docuchief\lib\site-packages\arcgis\learn\_utils\_ner_utils.py in __init__(self, batch_size, data)
388 i[2]
389 for i in pd.concat(
--> 390 [pd.Series(i["entities"]) for i in [o[1] for o in data]]
391 )
392 }
~\Anaconda3\envs\docuchief\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
309 stacklevel=stacklevel,
310 )
--> 311 return func(*args, **kwargs)
312
313 return wrapper
~\Anaconda3\envs\docuchief\lib\site-packages\pandas\core\reshape\concat.py in concat(objs, axis, join, ignore_index, keys, levels, names, verify_integrity, sort, copy)
302 verify_integrity=verify_integrity,
303 copy=copy,
--> 304 sort=sort,
305 )
306
~\Anaconda3\envs\docuchief\lib\site-packages\pandas\core\reshape\concat.py in __init__(self, objs, axis, join, keys, levels, names, ignore_index, verify_integrity, copy, sort)
349
350 if len(objs) == 0:
--> 351 raise ValueError("No objects to concatenate")
352
353 if keys is None:
ValueError: No objects to concatenate
What can I do to solve it?
@AviparnaBiswas Could you provide a sample data for reproducing this issue. The guide https://developers.arcgis.com/python/guide/how-named-entity-recognition-works/ has been updated and says that you need to use prepare_textdata instead of prepare_data function. You could follow the sample notebook available here to run NER model workflow end-to-end. This notebook includes json data as a gis item which is downloadable.