0% found this document useful (0 votes)
2 views

json_processor

The LEO JSON Processor module is designed to process JSON files for intent generation by analyzing their structure, extracting keys, and identifying potential entities. It includes methods for reading JSON data, analyzing its structure, extracting nested keys, and identifying entities related to names, locations, and dates. The module provides progress and status updates throughout the processing steps and handles errors with logging.

Uploaded by

raynyx77
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views

json_processor

The LEO JSON Processor module is designed to process JSON files for intent generation by analyzing their structure, extracting keys, and identifying potential entities. It includes methods for reading JSON data, analyzing its structure, extracting nested keys, and identifying entities related to names, locations, and dates. The module provides progress and status updates throughout the processing steps and handles errors with logging.

Uploaded by

raynyx77
Copyright
© © All Rights Reserved
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 4

#!

/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
LEO JSON Processor

This module processes JSON files for intent generation.


"""

import os
import json
import logging
from collections import Counter

class JSONProcessor:
"""Processes JSON files for intent generation."""

def __init__(self):
"""Initialize the JSON processor."""
self.on_progress = lambda p: None
self.on_status = lambda s: None

def process(self, file_path):


"""
Process a JSON file.

Args:
file_path (str): Path to the JSON file

Returns:
dict: Processed data
"""
try:
self.on_status(f"Processing JSON file: {os.path.basename(file_path)}")
self.on_progress(10)

# Read file
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)

self.on_progress(30)

# Analyze structure
self.on_status("Analyzing JSON structure...")
structure = self._analyze_structure(data)

self.on_progress(50)

# Extract key information


self.on_status("Extracting key information...")
keys = self._extract_keys(data)

self.on_progress(70)

# Identify potential entities


self.on_status("Identifying potential entities...")
entities = self._identify_entities(data)

self.on_progress(90)
# Combine results
result = {
'structure': structure,
'keys': keys,
'entities': entities,
'data': data # Include the original data
}

self.on_progress(100)
self.on_status("JSON processing complete")

return result

except Exception as e:
logging.error(f"Error processing JSON file: {str(e)}", exc_info=True)
raise

def _analyze_structure(self, data):


"""
Analyze the structure of JSON data.

Args:
data: JSON data

Returns:
dict: Structure information
"""
structure = {
'type': type(data).__name__
}

if isinstance(data, dict):
structure['keys'] = list(data.keys())
structure['num_keys'] = len(data)

# Analyze nested structure (first level only)


nested_types = {}
for key, value in data.items():
nested_types[key] = type(value).__name__

structure['nested_types'] = nested_types

elif isinstance(data, list):


structure['length'] = len(data)

# Analyze item types


if data:
if all(isinstance(item, dict) for item in data):
# If all items are dictionaries, get common keys
common_keys = set.intersection(*[set(item.keys()) for item in
data]) if data else set()
structure['common_keys'] = list(common_keys)

# Get a sample item


structure['sample_item'] = data[0] if data else None
else:
# Otherwise, just note the types
item_types = [type(item).__name__ for item in data[:10]] #
First 10 items
structure['item_types'] = item_types

return structure

def _extract_keys(self, data, prefix=''):


"""
Extract all keys from nested JSON data.

Args:
data: JSON data
prefix (str): Prefix for nested keys

Returns:
list: List of keys
"""
keys = []

if isinstance(data, dict):
for key, value in data.items():
full_key = f"{prefix}.{key}" if prefix else key
keys.append(full_key)

if isinstance(value, (dict, list)):


keys.extend(self._extract_keys(value, full_key))

elif isinstance(data, list) and data:


# For lists, check the first item
if isinstance(data[0], (dict, list)):
keys.extend(self._extract_keys(data[0], prefix + '[0]'))

return keys

def _identify_entities(self, data):


"""
Identify potential entities in the JSON data.

Args:
data: JSON data

Returns:
dict: Dictionary of potential entities
"""
entities = {}

# Helper function to process a dictionary


def process_dict(d, path=''):
for key, value in d.items():
key_lower = key.lower()
current_path = f"{path}.{key}" if path else key

# Check for name-related keys


if any(name_term in key_lower for name_term in ['name', 'user',
'person', 'customer', 'client']):
if isinstance(value, str):
if 'names' not in entities:
entities['names'] = []
entities['names'].append((current_path, value))
# Check for location-related keys
elif any(loc_term in key_lower for loc_term in ['city', 'state',
'country', 'address', 'location']):
if isinstance(value, str):
if 'locations' not in entities:
entities['locations'] = []
entities['locations'].append((current_path, value))

# Check for date-related keys


elif any(date_term in key_lower for date_term in ['date', 'time',
'day', 'year', 'month']):
if 'dates' not in entities:
entities['dates'] = []
entities['dates'].append(current_path)

# Recursively process nested dictionaries and lists


if isinstance(value, dict):
process_dict(value, current_path)
elif isinstance(value, list):
for i, item in enumerate(value[:5]): # Limit to first 5 items
if isinstance(item, dict):
process_dict(item, f"{current_path}[{i}]")

# Start processing
if isinstance(data, dict):
process_dict(data)
elif isinstance(data, list) and data and isinstance(data[0], dict):
for i, item in enumerate(data[:5]): # Limit to first 5 items
process_dict(item, f"[{i}]")

return entities

You might also like