Merge Python Dictionaries Based on Common Values

2479
5
Jump to solution
10-17-2018 04:03 PM
MicahBabinski
Regular Contributor

Greetings,

I have a list of Python dictionary objects. Each of the dictionaries has the same six keys. I'd like to be able to 'collapse' these dictionaries, combining them where five of the keys have identical values, and then separate the values for the sixth key with a comma.

For instance, if I was starting with:

myDictionaries = [{'Subdivision': u'NENW', 'Twp': u'026S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'}, 
{'Subdivision': u'NWNE', 'Twp': u'026S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'}]‍‍‍‍‍

Note that all of the key/value pairs in the two dictionaries are identical, except for Subdivision. So, what I would like to get is:

[{'Subdivision': u'NENW,NWNE', 'Twp': u'026S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'}]‍‍

Thanks for any help you can provide!

Micah

0 Kudos
1 Solution

Accepted Solutions
JoshuaBixby
MVP Esteemed Contributor

I think the following will work for you:

>>> myDictionaries = [
... {'Subdivision': u'NENW', 'Twp': u'026S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'},
... {'Subdivision': u'NWNE', 'Twp': u'026S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'},
... {'Subdivision': u'NENW', 'Twp': u'025S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'},
... {'Subdivision': u'NWNE', 'Twp': u'025S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'}
... ]
...
>>>
>>> from itertools import groupby
>>> from operator import itemgetter
>>> from pprint import pprint
>>>
>>> keys = ['Twp', 'Range', 'Sec', 'Sur Type', 'Meridian']
>>> ig = itemgetter(*keys)
>>>
>>> sorted_myDict = sorted(myDictionaries, key=ig)
>>> new_myDict = []
>>> for k,g in groupby(sorted_myDict, key=ig):
...     d = dict(zip(keys,k))
...     d['Subdivision'] = ','.join(i['Subdivision'] for i in g)
...     new_myDict.append(d)
...    
>>> for d in new_myDict:
...     pprint(d)
...     print
...    
{'Meridian': u'33',
'Range': u'033E',
'Sec': u'13',
'Subdivision': u'NENW,NWNE',
'Sur Type': u'A',
'Twp': u'025S'}

{'Meridian': u'33',
'Range': u'033E',
'Sec': u'13',
'Subdivision': u'NENW,NWNE',
'Sur Type': u'A',
'Twp': u'026S'}

>>>

View solution in original post

0 Kudos
5 Replies
DanPatterson_Retired
MVP Esteemed Contributor

python 3?

ds  # ---- your dictionaries

[{'Subdivision': 'NENW', 'Twp': '026S', 'Range': '033E', 'Sec': '13', 'Sur Type': 'A', 'Meridian': '33'},

{'Subdivision': 'NWNE', 'Twp': '026S', 'Range': '033E', 'Sec': '13', 'Sur Type': 'A', 'Meridian': '33'}]

# ---- your values can be pulled in a variety of ways, but pulling in all the values,
#      then using 'set' to get the unique ones works

vls = list(set([i for j in ds for i in list(j.values())]))

vls
['033E', 'NWNE', '026S', '33', '13', 'NENW', 'A']

vls.sort() # ---- if you need them sorted
0 Kudos
PeteCrosier
Occasional Contributor III
myDictionaries = [{'Subdivision': u'NENW', 'Twp': u'026S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'}, 
{'Subdivision': u'NWNE', 'Twp': u'026S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'}]

newDictionary = {}
for dictionary in myDictionaries:
    for k, v in dictionary.items():
        if k in newDictionary and not v in newDictionary[k]:
            newDictionary[k] += "," + v
        else:
            newDictionary[k] = v

print([newDictionary])
JoshuaBixby
MVP Esteemed Contributor

The code snippet works for the example the OP gave, but I suspect the OP's actual list has many dictionaries and those dictionaries do not all have key-value pairs in common.  For example, what if the list of dictionaries looks like:

myDictionaries = [
{'Subdivision': u'NENW', 'Twp': u'026S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'},
{'Subdivision': u'NWNE', 'Twp': u'026S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'},
{'Subdivision': u'NENW', 'Twp': u'025S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'},
{'Subdivision': u'NWNE', 'Twp': u'025S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'}
]
0 Kudos
JoshuaBixby
MVP Esteemed Contributor

I think the following will work for you:

>>> myDictionaries = [
... {'Subdivision': u'NENW', 'Twp': u'026S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'},
... {'Subdivision': u'NWNE', 'Twp': u'026S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'},
... {'Subdivision': u'NENW', 'Twp': u'025S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'},
... {'Subdivision': u'NWNE', 'Twp': u'025S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'}
... ]
...
>>>
>>> from itertools import groupby
>>> from operator import itemgetter
>>> from pprint import pprint
>>>
>>> keys = ['Twp', 'Range', 'Sec', 'Sur Type', 'Meridian']
>>> ig = itemgetter(*keys)
>>>
>>> sorted_myDict = sorted(myDictionaries, key=ig)
>>> new_myDict = []
>>> for k,g in groupby(sorted_myDict, key=ig):
...     d = dict(zip(keys,k))
...     d['Subdivision'] = ','.join(i['Subdivision'] for i in g)
...     new_myDict.append(d)
...    
>>> for d in new_myDict:
...     pprint(d)
...     print
...    
{'Meridian': u'33',
'Range': u'033E',
'Sec': u'13',
'Subdivision': u'NENW,NWNE',
'Sur Type': u'A',
'Twp': u'025S'}

{'Meridian': u'33',
'Range': u'033E',
'Sec': u'13',
'Subdivision': u'NENW,NWNE',
'Sur Type': u'A',
'Twp': u'026S'}

>>>

View solution in original post

0 Kudos
DanPatterson_Retired
MVP Esteemed Contributor

of course numpy example since dictionaries are a pain

import numpy as np
def dict_vals(ds):
    """combine dictionary values from multiple dictionaries, ds, and combine
    their keys if needed.
    Requires: import numpy as np   so include the line above
    Returns: a new dictionary
    """

    a = np.array([(k, v)  # key, value pairs
                  for d in ds  # dict in dictionaries
                  for k, v in d.items()  # get the key, values from items
                  ])
    ks = np.unique(a[:, 0])
    uniq = [np.unique(a[a[:, 0] == i]) for i in ks]
    nd = [" ".join(a[:-1].tolist()) for a in uniq]
    new_d = dict(zip(ks, nd))
    return new_d

results

ds = [
     {'Subdivision': u'NENW', 'Twp': u'026S', 'Range': u'030E', 'Sec': u'14', 'Sur Type': u'B', 'Meridian': u'33'},
     {'Subdivision': u'NWNE', 'Twp': u'020S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'},
     {'Subdivision': u'SENW', 'Twp': u'021S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'},
     {'Subdivision': u'SWNE', 'Twp': u'025S', 'Range': u'033E', 'Sec': u'13', 'Sur Type': u'A', 'Meridian': u'33'}
     ]


dict_vals(ds)

{'Meridian': '33',
'Range': '030E 033E',
'Sec': '13 14',
'Subdivision': 'NENW NWNE SENW SWNE',
'Sur Type': 'A B',
'Twp': '020S 021S 025S 026S'}