0% found this document useful (0 votes)

5 views4 pages

Json Processor

The LEO JSON Processor module is designed to process JSON files for intent generation by analyzing their structure, extracting keys, and identifying potential entities. It includes methods for reading JSON data, analyzing its structure, extracting nested keys, and identifying entities related to names, locations, and dates. The module provides progress and status updates throughout the processing steps and handles errors with logging.

Uploaded by

raynyx77

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

5 views4 pages

Json Processor

Uploaded by

raynyx77

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 4

#!

/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
LEO JSON Processor

This module processes JSON files for intent generation.

"""

import os
import json
import logging
from collections import Counter

class JSONProcessor:
"""Processes JSON files for intent generation."""

def __init__(self):
"""Initialize the JSON processor."""
self.on_progress = lambda p: None
self.on_status = lambda s: None

def process(self, file_path):

"""
Process a JSON file.

Args:
file_path (str): Path to the JSON file

Returns:
dict: Processed data
"""
try:
self.on_status(f"Processing JSON file: {os.path.basename(file_path)}")
self.on_progress(10)

# Read file
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)

self.on_progress(30)

# Analyze structure
self.on_status("Analyzing JSON structure...")
structure = self._analyze_structure(data)

self.on_progress(50)

# Extract key information

self.on_status("Extracting key information...")
keys = self._extract_keys(data)

self.on_progress(70)

# Identify potential entities

self.on_status("Identifying potential entities...")
entities = self._identify_entities(data)

self.on_progress(90)
# Combine results
result = {
'structure': structure,
'keys': keys,
'entities': entities,
'data': data # Include the original data
}

self.on_progress(100)
self.on_status("JSON processing complete")

return result

except Exception as e:
logging.error(f"Error processing JSON file: {str(e)}", exc_info=True)
raise

def _analyze_structure(self, data):

"""
Analyze the structure of JSON data.

Args:
data: JSON data

Returns:
dict: Structure information
"""
structure = {
'type': type(data).__name__
}

if isinstance(data, dict):
structure['keys'] = list(data.keys())
structure['num_keys'] = len(data)

# Analyze nested structure (first level only)

nested_types = {}
for key, value in data.items():
nested_types[key] = type(value).__name__

structure['nested_types'] = nested_types

elif isinstance(data, list):

structure['length'] = len(data)

# Analyze item types

if data:
if all(isinstance(item, dict) for item in data):
# If all items are dictionaries, get common keys
common_keys = set.intersection(*[set(item.keys()) for item in
data]) if data else set()
structure['common_keys'] = list(common_keys)

# Get a sample item

structure['sample_item'] = data[0] if data else None
else:
# Otherwise, just note the types
item_types = [type(item).__name__ for item in data[:10]] #
First 10 items
structure['item_types'] = item_types

return structure

def _extract_keys(self, data, prefix=''):

"""
Extract all keys from nested JSON data.

Args:
data: JSON data
prefix (str): Prefix for nested keys

Returns:
list: List of keys
"""
keys = []

if isinstance(data, dict):
for key, value in data.items():
full_key = f"{prefix}.{key}" if prefix else key
keys.append(full_key)

if isinstance(value, (dict, list)):

keys.extend(self._extract_keys(value, full_key))

elif isinstance(data, list) and data:

# For lists, check the first item
if isinstance(data[0], (dict, list)):
keys.extend(self._extract_keys(data[0], prefix + '[0]'))

return keys

def _identify_entities(self, data):

"""
Identify potential entities in the JSON data.

Args:
data: JSON data

Returns:
dict: Dictionary of potential entities
"""
entities = {}

# Helper function to process a dictionary

def process_dict(d, path=''):
for key, value in d.items():
key_lower = key.lower()
current_path = f"{path}.{key}" if path else key

# Check for name-related keys

if any(name_term in key_lower for name_term in ['name', 'user',
'person', 'customer', 'client']):
if isinstance(value, str):
if 'names' not in entities:
entities['names'] = []
entities['names'].append((current_path, value))
# Check for location-related keys
elif any(loc_term in key_lower for loc_term in ['city', 'state',
'country', 'address', 'location']):
if isinstance(value, str):
if 'locations' not in entities:
entities['locations'] = []
entities['locations'].append((current_path, value))

# Check for date-related keys

elif any(date_term in key_lower for date_term in ['date', 'time',
'day', 'year', 'month']):
if 'dates' not in entities:
entities['dates'] = []
entities['dates'].append(current_path)

# Recursively process nested dictionaries and lists

if isinstance(value, dict):
process_dict(value, current_path)
elif isinstance(value, list):
for i, item in enumerate(value[:5]): # Limit to first 5 items
if isinstance(item, dict):
process_dict(item, f"{current_path}[{i}]")

# Start processing
if isinstance(data, dict):
process_dict(data)
elif isinstance(data, list) and data and isinstance(data[0], dict):
for i, item in enumerate(data[:5]): # Limit to first 5 items
process_dict(item, f"[{i}]")

return entities

Hpe Comware 7 Netconf XML API Reference
No ratings yet
Hpe Comware 7 Netconf XML API Reference
8,165 pages
Coolmay HMI Programming Manual
No ratings yet
Coolmay HMI Programming Manual
345 pages
Datatypes 1
No ratings yet
Datatypes 1
38 pages
Dsa 4
No ratings yet
Dsa 4
39 pages
TA2 - Data Communcation and Networking - 1688734889807
No ratings yet
TA2 - Data Communcation and Networking - 1688734889807
194 pages
APS013 DW1000 and Two Way Ranging
No ratings yet
APS013 DW1000 and Two Way Ranging
15 pages
Ds Programs 2 9 10
No ratings yet
Ds Programs 2 9 10
12 pages
UNIT-2 PP FlynnsClassification
No ratings yet
UNIT-2 PP FlynnsClassification
80 pages
Jure Sorn - Comprehensive Python Cheatsheet (2023)
No ratings yet
Jure Sorn - Comprehensive Python Cheatsheet (2023)
49 pages
Class Note ppp2
No ratings yet
Class Note ppp2
14 pages
Introduction To Data Structures and C++
No ratings yet
Introduction To Data Structures and C++
12 pages
ADS
No ratings yet
ADS
24 pages
Aiml Sample Programs
No ratings yet
Aiml Sample Programs
20 pages
ARC Prize 2025 Paper Submission
No ratings yet
ARC Prize 2025 Paper Submission
13 pages
Document (10) Python
No ratings yet
Document (10) Python
15 pages
Nguyenvanthinh BKC13107 N01K13
No ratings yet
Nguyenvanthinh BKC13107 N01K13
59 pages
Dictionary Graph
No ratings yet
Dictionary Graph
5 pages
Dictionary Program
No ratings yet
Dictionary Program
3 pages
Server
No ratings yet
Server
39 pages
Dataset Manager
No ratings yet
Dataset Manager
6 pages
NSC Topic 6 Vulnerability
No ratings yet
NSC Topic 6 Vulnerability
17 pages
Omnipcx Nddi22
No ratings yet
Omnipcx Nddi22
32 pages
Mini Project Using Python Made by Wasi
No ratings yet
Mini Project Using Python Made by Wasi
12 pages
SDGG
No ratings yet
SDGG
25 pages
Module2 Data Structures
No ratings yet
Module2 Data Structures
10 pages
Advanced Coding Assignment 2
No ratings yet
Advanced Coding Assignment 2
8 pages
Circular Doubly New
No ratings yet
Circular Doubly New
8 pages
DOUBLY
No ratings yet
DOUBLY
7 pages
PR Writing
No ratings yet
PR Writing
9 pages
Notes App Python
No ratings yet
Notes App Python
9 pages
Allcodes
No ratings yet
Allcodes
36 pages
Aspirant Series Session-Cyber Security and Digital Forensics ATTENDANCE
No ratings yet
Aspirant Series Session-Cyber Security and Digital Forensics ATTENDANCE
6 pages
Loan Api
No ratings yet
Loan Api
5 pages
E-Healthcare Management System
100% (1)
E-Healthcare Management System
73 pages
Python Dictionary Datatype Practical Notes
No ratings yet
Python Dictionary Datatype Practical Notes
6 pages
Full Stack Developer Resume PDF
No ratings yet
Full Stack Developer Resume PDF
2 pages
10 Python&Hadoop
No ratings yet
10 Python&Hadoop
32 pages
Dictionary Practical
No ratings yet
Dictionary Practical
7 pages
23 Final Solution
No ratings yet
23 Final Solution
7 pages
DS 8-12
No ratings yet
DS 8-12
5 pages
Project On Chat Application
No ratings yet
Project On Chat Application
14 pages
Ref 1
No ratings yet
Ref 1
4 pages
Code
No ratings yet
Code
2 pages
Postgresql Jsonb: Learn This Powerful Tool By Example
From Everand
Postgresql Jsonb: Learn This Powerful Tool By Example
Mohammed N. S. Al Saadi
No ratings yet
PFSD - Class - Unit 1 - Datastructures - MD at Master Raghupalash - PFSD GitHub
No ratings yet
PFSD - Class - Unit 1 - Datastructures - MD at Master Raghupalash - PFSD GitHub
3 pages
Different Python Functions
No ratings yet
Different Python Functions
15 pages
Figure1 Belief in Pseudoscience
No ratings yet
Figure1 Belief in Pseudoscience
1 page
Chap5 BST
No ratings yet
Chap5 BST
2 pages
Index
No ratings yet
Index
6 pages
Collections
No ratings yet
Collections
7 pages
Python 5 Unit
No ratings yet
Python 5 Unit
4 pages
List, Tuple, Dictionary
No ratings yet
List, Tuple, Dictionary
12 pages
DSA Lab 6
No ratings yet
DSA Lab 6
9 pages
Sp22 Bse 009 AI Lab 5
No ratings yet
Sp22 Bse 009 AI Lab 5
7 pages
Implementing Tree and Linked List
No ratings yet
Implementing Tree and Linked List
6 pages
New Microsoft Word Document
No ratings yet
New Microsoft Word Document
7 pages
Linklisttut 175531
No ratings yet
Linklisttut 175531
2 pages
Evoluation of AI
No ratings yet
Evoluation of AI
27 pages
Wa0005.
No ratings yet
Wa0005.
7 pages
Research
No ratings yet
Research
9 pages
COMP201 Exam Questions MOCK - Answers
No ratings yet
COMP201 Exam Questions MOCK - Answers
2 pages
Binary Search Tree
No ratings yet
Binary Search Tree
6 pages
Python-Cheatsheet For Print
No ratings yet
Python-Cheatsheet For Print
44 pages
srx1500 Firewall Datasheet
No ratings yet
srx1500 Firewall Datasheet
6 pages
Python Activity
No ratings yet
Python Activity
16 pages
List Impelemantation
No ratings yet
List Impelemantation
15 pages
PYTHONa 7
No ratings yet
PYTHONa 7
15 pages
Adaptive Delta Modulation
No ratings yet
Adaptive Delta Modulation
10 pages
Module 1 Cheatsheet - Data Science and Generative AI
No ratings yet
Module 1 Cheatsheet - Data Science and Generative AI
1 page
History of Fsuipc6: The (General) Section: Hideregdetails Yes
No ratings yet
History of Fsuipc6: The (General) Section: Hideregdetails Yes
13 pages
Das 2
No ratings yet
Das 2
5 pages
Datasheet Industrial Process Interface
No ratings yet
Datasheet Industrial Process Interface
4 pages
Alva
No ratings yet
Alva
6 pages
Chapter 7 Steady-State Error
No ratings yet
Chapter 7 Steady-State Error
53 pages
Cooler MasterProduct Price Lists Nov23 (29!11!2023)
No ratings yet
Cooler MasterProduct Price Lists Nov23 (29!11!2023)
6 pages
Lab Manual: Spring 2021
No ratings yet
Lab Manual: Spring 2021
33 pages
ACME Command Line Interface Training
No ratings yet
ACME Command Line Interface Training
70 pages
Python Cheatsheet
100% (2)
Python Cheatsheet
51 pages
Elzeroweb Part - 1
100% (1)
Elzeroweb Part - 1
17 pages
18 Count Occurrences
No ratings yet
18 Count Occurrences
9 pages
Dsa (Midterm)
No ratings yet
Dsa (Midterm)
1 page
Developer Track HandBook Cohort-2
No ratings yet
Developer Track HandBook Cohort-2
13 pages
50 Python Concepts Every Developer Should Know
From Everand
50 Python Concepts Every Developer Should Know
Hernando Abella
No ratings yet
Pyhton Data Structure CheatSheet
No ratings yet
Pyhton Data Structure CheatSheet
5 pages
Comprehensive Python Cheatsheet
No ratings yet
Comprehensive Python Cheatsheet
56 pages
DAA Assignment 1
No ratings yet
DAA Assignment 1
7 pages
Arduino Based Traffic Congestion Control
No ratings yet
Arduino Based Traffic Congestion Control
6 pages
Binary Tree Algorithms
No ratings yet
Binary Tree Algorithms
14 pages
Siemens Mindsphere Challenge - Business Use Case
No ratings yet
Siemens Mindsphere Challenge - Business Use Case
5 pages
Pantech - Embedded, IoT & Robotic Projects 2021-22
No ratings yet
Pantech - Embedded, IoT & Robotic Projects 2021-22
10 pages
CLL Py
No ratings yet
CLL Py
2 pages
Profound Python Data Science
From Everand
Profound Python Data Science
Onder Teker
No ratings yet
Nguyen Tuan Anh - Business Data Analysis
No ratings yet
Nguyen Tuan Anh - Business Data Analysis
2 pages
NTOT - CRLA Scoresheets - 19june23
No ratings yet
NTOT - CRLA Scoresheets - 19june23
19 pages

Json Processor

Uploaded by

Json Processor

Uploaded by

#!

This module processes JSON files for intent generation.

def process(self, file_path):

# Extract key information

# Identify potential entities

def _analyze_structure(self, data):

# Analyze nested structure (first level only)

elif isinstance(data, list):

# Analyze item types

# Get a sample item

def _extract_keys(self, data, prefix=''):

if isinstance(value, (dict, list)):

elif isinstance(data, list) and data:

def _identify_entities(self, data):

# Helper function to process a dictionary

# Check for name-related keys

# Check for date-related keys

# Recursively process nested dictionaries and lists

You might also like