0% found this document useful (0 votes)

146 views5 pages

Coding Python Servel

This Python code extracts text from PDF documents and structures the extracted data. It takes in a PDF file from Servel, the Chilean electoral service, extracts fields like names, IDs, addresses from each page, and geocodes the addresses using the Google Maps API. The structured data is returned in a list of dictionaries containing information for each voter, along with their mapped latitude and longitude.

Uploaded by

Jose Manuel Pino Jauregui

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

146 views5 pages

Coding Python Servel

Uploaded by

Jose Manuel Pino Jauregui

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

# -*-

coding:
utf-8 -
*-
"""
Created on Sun Jul 24 [Link] 2016

@author: Javier Palma-Espinosa

"""

'''
info extracted from
[Link]
from-document
[Link]
using-pdfminer-in-python
'''
import csv
import sys
import json
import googlemaps
reload(sys)
[Link]("utf-8")

from cStringIO import StringIO

from [Link] import PDFResourceManager, PDFPageInterpreter
from [Link] import TextConverter
from [Link] import LAParams
from [Link] import PDFPage
from pyPdf import PdfFileReader

def convert(fname, pages=None):

'''
This function converts a pagre from a pdf file into a stream of text.
Thanks to the libraries imported.
'''
if not pages:
pagenums = set()
else:
pagenums = set(pages)

output = StringIO()
manager = PDFResourceManager()
converter = TextConverter(manager, output, laparams=LAParams())
interpreter = PDFPageInterpreter(manager, converter)

infile = file(fname, 'rb')

for page in PDFPage.get_pages(infile, pagenums):
interpreter.process_page(page)
[Link]()
[Link]()
text = [Link]().split('\n')
[Link]
return text

def servelParser(rutaPdf):
pdf = PdfFileReader(open(rutaPdf,'rb'))
paginas = [Link]() #will fetch all the pages from the PDF. It
could take a LOT of time
#paginas = 2

#Define fields
nombres = range(paginas)
rut = range(paginas)
circunscripcion = range(paginas)
comuna = range(paginas)
direccion = range(paginas)
provincia = range(paginas)
Region = range(paginas)
Mesa = range(paginas)
pages = range(paginas)
votantes = 0
for i in range(paginas):
pages[i] = convert(rutaPdf,[i])

indexNombre = pages[i].index('NOMBRE')
indexRegion = pages[i].index('REGION')
indexComuna = pages[i].index('COMUNA:')
indexCI = pages[i].index('[Link] SEXO')
indexCirc = pages[i].index('CIRCUNSCRIPCIÓN')
indexDomEle = pages[i].index('DOMICILIO ELECTORAL')
indexMesa = pages[i].index('MESA')

print "Procesando la hoja ", i

print '\n'

#Extract the data for each field in "pages" and sort in an

structured way
if(indexCirc == indexDomEle-1):
nombres[i] = pages[i][indexNombre+1:indexCI-1]
rut[i] =
pages[i][indexCI+2:indexCI+2+len(nombres[i])]
comuna[i] = pages[i][indexComuna+2]
circunscripcion[i] =
pages[i][indexCirc+1:indexCirc+1+2*len(nombres[1]):2]
direccion[i] =
pages[i][indexDomEle+1:indexDomEle+1+2*len(nombres[i]):2]
provincia[i] =
str(pages[i][indexRegion+4]).replace(":","").lstrip()
Region[i] =
str(pages[i][indexRegion+3]).replace(":","").lstrip()
Mesa[i] = pages[i][indexMesa+1:len(pages[i])-1]
else:
nombres[i] = pages[i][indexNombre+1:indexCI-1]
rut[i] =
pages[i][indexCI+2:indexCI+2+len(nombres[i])]
circunscripcion[i] =
pages[i][indexCirc+1:indexCirc+1+len(nombres[i])]
comuna[i] = pages[i][indexComuna+2]
direccion[i] =
pages[i][indexDomEle+1:indexDomEle+1+len(nombres[i])]
provincia[i] =
str(pages[i][indexRegion+4]).replace(":","").lstrip()
Region[i] =
str(pages[i][indexRegion+3]).replace(":","").lstrip()
Mesa[i] = pages[i][indexMesa+1:len(pages[i])-1]

votantes +=len(nombres[i])

myKey = 'AIz...' #your google API key

datosServel = [dict() for x in range(votantes)] #diccionario se mandará

a MongoDB
k=0
for i in range(paginas):
for j in range(len(nombres[i])):
rutSexo = rut[i][j].split(" ")
#GoogleMaps API query
#Will search the address that appears in the Servel Record, for
EACH person
gmaps = [Link](myKey)
queryAddr = direccion[i][j] + ", " + comuna[i] + ", " +
Region[i] + ", Chile"
# Geocoding an address
geocode_result = [Link](queryAddr)

#datosServel will be a data structure, containing the

information of each person. Also, it
#will record the lat and long of its address, in order to show
it in googleMap
print "Guardando datos número ",k+1
datosServel[k] = { 'Nombre':str(nombres[i][j]),
'Rut':str(rutSexo[0].replace(".","")),
'Circunscripcion':circunscripcion[i][j],
'Mesa':Mesa[i][j],
'Sexo':rutSexo[1][0].replace("V", "H"),
'direccion':direccion[i][j],
'Comuna':comuna[i],
'Region':Region[i],
'Lat':geocode_result[0]["geometry"]["location"]["lat"],

'Lng':geocode_result[0]["geometry"]["location"]["lng"]}
k+=1

return datosServel

Integrating APIs in Python Applications
No ratings yet
Integrating APIs in Python Applications
42 pages
OpenStreetMap API for Geocoding
No ratings yet
OpenStreetMap API for Geocoding
3 pages
04 Spatial Data
No ratings yet
04 Spatial Data
4 pages
Geopandas 50 Exercises
No ratings yet
Geopandas 50 Exercises
2 pages
1.3.3.1 Lab - Parsing JSON With A Python Application
No ratings yet
1.3.3.1 Lab - Parsing JSON With A Python Application
13 pages
Midterm Dennis
No ratings yet
Midterm Dennis
5 pages
Untitled 4
No ratings yet
Untitled 4
4 pages
PY4E - Python For Everybody
No ratings yet
PY4E - Python For Everybody
2 pages
Mapbox Python SDK Guide
No ratings yet
Mapbox Python SDK Guide
63 pages
And Longitude: - Algori THM
No ratings yet
And Longitude: - Algori THM
4 pages
DEV Manual
No ratings yet
DEV Manual
23 pages
Update Version of Code in Asep Project
No ratings yet
Update Version of Code in Asep Project
6 pages
Flask Folium
No ratings yet
Flask Folium
2 pages
1.3.3.1 Lab - Parsing JSON With A Python Application
0% (1)
1.3.3.1 Lab - Parsing JSON With A Python Application
14 pages
Estructura de Datos en Pandas
No ratings yet
Estructura de Datos en Pandas
120 pages
Week 1 To Week 9
No ratings yet
Week 1 To Week 9
30 pages
CS3361 - Data Science University Question Paper Answers
No ratings yet
CS3361 - Data Science University Question Paper Answers
46 pages
Point Query & Tiles Over Kerla: Manish Modani: Ts Timeslice Fts Forecasted Time Slice
No ratings yet
Point Query & Tiles Over Kerla: Manish Modani: Ts Timeslice Fts Forecasted Time Slice
8 pages
Data Analysis for Beginners
No ratings yet
Data Analysis for Beginners
201 pages
Notes 11 Json Pandas 31oct2023
No ratings yet
Notes 11 Json Pandas 31oct2023
5 pages
Lab 3-FELIX
No ratings yet
Lab 3-FELIX
3 pages
Reanalysis Data Example - Ipynb
No ratings yet
Reanalysis Data Example - Ipynb
363 pages
Wa0000
No ratings yet
Wa0000
26 pages
Data Visualization Final
No ratings yet
Data Visualization Final
10 pages
Optimizing Pandas for Performance
No ratings yet
Optimizing Pandas for Performance
48 pages
Matplotlib for Data Visualization
No ratings yet
Matplotlib for Data Visualization
33 pages
AI Journal
No ratings yet
AI Journal
56 pages
Bitcoin Wallet Script Guide
No ratings yet
Bitcoin Wallet Script Guide
16 pages
DS Journal
No ratings yet
DS Journal
46 pages
DFS Algorithm in Python
No ratings yet
DFS Algorithm in Python
15 pages
4.9.2 Lab - Integrate A REST API in A Python Application - ILM
No ratings yet
4.9.2 Lab - Integrate A REST API in A Python Application - ILM
17 pages
An Introduction To GeoPandas. Everything You Need To Get You Started - by Mark Friese - Aug, 2022 - Medium
No ratings yet
An Introduction To GeoPandas. Everything You Need To Get You Started - by Mark Friese - Aug, 2022 - Medium
9 pages
4.9.2 Lab - Integrate A Rest Api in A Python Application
No ratings yet
4.9.2 Lab - Integrate A Rest Api in A Python Application
16 pages
Lab - Integrate A REST API in A Python Application: Objectives
No ratings yet
Lab - Integrate A REST API in A Python Application: Objectives
16 pages
Project
No ratings yet
Project
17 pages
Uber Fare Prediction Analysis
No ratings yet
Uber Fare Prediction Analysis
6 pages
PYTHONa 7
No ratings yet
PYTHONa 7
15 pages
AI Search Algorithms in Python
No ratings yet
AI Search Algorithms in Python
19 pages
21AD71 Module 4 Textbook
No ratings yet
21AD71 Module 4 Textbook
107 pages
Chapter 9
No ratings yet
Chapter 9
5 pages
Lab Manual: Spring 2021
No ratings yet
Lab Manual: Spring 2021
33 pages
1.3.3.1 Lab - Parsing JSON With A Python Application - ILM
No ratings yet
1.3.3.1 Lab - Parsing JSON With A Python Application - ILM
13 pages
01 - Worked Example Geodata Chapter 16.en
No ratings yet
01 - Worked Example Geodata Chapter 16.en
3 pages
GeoPy 1.18.0 Documentation Guide
No ratings yet
GeoPy 1.18.0 Documentation Guide
77 pages
Lecture Note - GIS 821 - Programming in Geoinformatics - Weeks 3
No ratings yet
Lecture Note - GIS 821 - Programming in Geoinformatics - Weeks 3
5 pages
Car Price Prediction
No ratings yet
Car Price Prediction
67 pages
Practica 9
No ratings yet
Practica 9
24 pages
Shirin Dalvi Fundraiser Analysis: Getting The Donor Info
No ratings yet
Shirin Dalvi Fundraiser Analysis: Getting The Donor Info
6 pages
Folium Choropleth App with GeoJSON
No ratings yet
Folium Choropleth App with GeoJSON
13 pages
Pyqgis The Comfortable Way Mobile Qgis
No ratings yet
Pyqgis The Comfortable Way Mobile Qgis
57 pages
Collections
No ratings yet
Collections
7 pages
Colab Updates: Features, Fixes, and Package Upgrades
No ratings yet
Colab Updates: Features, Fixes, and Package Upgrades
20 pages
Google Places API Search Script
No ratings yet
Google Places API Search Script
1 page
JSON Parser & Generator for R
No ratings yet
JSON Parser & Generator for R
15 pages
Python Lists, Tuples, and Dictionaries Guide
No ratings yet
Python Lists, Tuples, and Dictionaries Guide
15 pages
Lecture 3.1.3
No ratings yet
Lecture 3.1.3
7 pages
Perfect Home
No ratings yet
Perfect Home
12 pages
Project 3 INfiniteScrollBrewery
No ratings yet
Project 3 INfiniteScrollBrewery
11 pages
DBDM Model QP
No ratings yet
DBDM Model QP
2 pages
Data Engineering Overview and Tools
No ratings yet
Data Engineering Overview and Tools
34 pages
HMIS
No ratings yet
HMIS
13 pages
Text Classification Based On Random Forest Algorithm
No ratings yet
Text Classification Based On Random Forest Algorithm
4 pages
AI Sample Paper for Class X
No ratings yet
AI Sample Paper for Class X
6 pages
LGM Maintenance & Logistics Services
No ratings yet
LGM Maintenance & Logistics Services
4 pages
CISA Certified Information Systems Auditor
No ratings yet
CISA Certified Information Systems Auditor
13 pages
Data Analyst Resume: Skills & Experience
No ratings yet
Data Analyst Resume: Skills & Experience
2 pages
E-Library 2011 Spec
No ratings yet
E-Library 2011 Spec
8 pages
4th International Conference On Computing and Information Technology Trends (CCITT 2025)
No ratings yet
4th International Conference On Computing and Information Technology Trends (CCITT 2025)
2 pages
05b.BDA (18CS72) Module-5 Text Mining
No ratings yet
05b.BDA (18CS72) Module-5 Text Mining
23 pages
Sa 36
No ratings yet
Sa 36
8 pages
1Speech&LangProc - Ipynb - Colab
No ratings yet
1Speech&LangProc - Ipynb - Colab
3 pages
Database Locking Types Explained
No ratings yet
Database Locking Types Explained
8 pages
Big Data Analytics-Digital Notes
No ratings yet
Big Data Analytics-Digital Notes
86 pages
2024 - 2025 TEST - Attempt Review - LAUTECH
No ratings yet
2024 - 2025 TEST - Attempt Review - LAUTECH
13 pages
RT-IoT2022 Dataset for IoT Security
No ratings yet
RT-IoT2022 Dataset for IoT Security
4 pages
AI 102 Notes
100% (1)
AI 102 Notes
41 pages
DM C6 AssociationRule Apriori
No ratings yet
DM C6 AssociationRule Apriori
33 pages
MCQ - Chapter 4 - 6
No ratings yet
MCQ - Chapter 4 - 6
14 pages
Prequiz 13 (Reading 5.1, 5.2, 5.3) - Attempt Review
No ratings yet
Prequiz 13 (Reading 5.1, 5.2, 5.3) - Attempt Review
11 pages
Intro To Human Computer Interaction
No ratings yet
Intro To Human Computer Interaction
53 pages
e-Panchayat Project Report 2013
0% (1)
e-Panchayat Project Report 2013
86 pages
HTCS701
No ratings yet
HTCS701
1 page
Artificial Intelligence Presentation II
No ratings yet
Artificial Intelligence Presentation II
22 pages
Report On Design of Management Information System
No ratings yet
Report On Design of Management Information System
18 pages
Cybersecurity Knowledge Graph Survey
No ratings yet
Cybersecurity Knowledge Graph Survey
4 pages
Basic Concepts On Teradata
No ratings yet
Basic Concepts On Teradata
16 pages
Identity Verification API Services
No ratings yet
Identity Verification API Services
4 pages
Practice and Progress New Concept English by L G Alexander PDF
60% (5)
Practice and Progress New Concept English by L G Alexander PDF
6 pages

Coding Python Servel

Uploaded by

Coding Python Servel

Uploaded by

# -*-

@author: Javier Palma-Espinosa

from cStringIO import StringIO

def convert(fname, pages=None):

infile = file(fname, 'rb')

print "Procesando la hoja ", i

#Extract the data for each field in "pages" and sort in an

myKey = 'AIz...' #your google API key

datosServel = [dict() for x in range(votantes)] #diccionario se mandará

#datosServel will be a data structure, containing the

You might also like