Skip to content

Commit 9b44aa4

Browse files
delimited names, lowercase emails
1 parent d8c8150 commit 9b44aa4

File tree

3 files changed

+12
-7
lines changed

3 files changed

+12
-7
lines changed

src/server/alembic/versions/45a668fa6325_postgres_matching.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,19 +85,19 @@ def upgrade():
8585
sa.PrimaryKeyConstraint('_id')
8686
)
8787
op.create_index('idx_pdp_contacts_source_type_and_id', 'pdp_contacts', ['source_type', 'source_id'], unique=False)
88-
op.create_index(op.f('ix_pdp_contacts_email'), 'pdp_contacts', ['email'], unique=False)
8988
op.create_index(op.f('ix_pdp_contacts_mobile'), 'pdp_contacts', ['mobile'], unique=False)
9089
op.create_index(op.f('idx_pdp_contacts_lower_first_name'), 'pdp_contacts', [sa.text('lower(first_name)')], unique=False)
9190
op.create_index(op.f('idx_pdp_contacts_lower_last_name'), 'pdp_contacts', [sa.text('lower(last_name)')], unique=False)
91+
op.create_index(op.f('idx_pdp_contacts_lower_email'), 'pdp_contacts', [sa.text('lower(email)')], unique=False)
9292
# ### end Alembic commands ###
9393

9494

9595
def downgrade():
9696
# ### commands auto generated by Alembic - please adjust! ###
97+
op.drop_index(op.f('ix_pdp_contacts_lower_email'), table_name='pdp_contacts')
9798
op.drop_index(op.f('ix_pdp_contacts_lower_last_name'), table_name='pdp_contacts')
9899
op.drop_index(op.f('ix_pdp_contacts_lower_first_name'), table_name='pdp_contacts')
99100
op.drop_index(op.f('ix_pdp_contacts_mobile'), table_name='pdp_contacts')
100-
op.drop_index(op.f('ix_pdp_contacts_email'), table_name='pdp_contacts')
101101
op.drop_index('idx_pdp_contacts_source_type_and_id', table_name='pdp_contacts')
102102
op.drop_table('volgistics')
103103
op.drop_table('shelterluvpeople')

src/server/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ class PdpContacts(Base):
113113
__table_args__ = (
114114
Index("idx_pdp_contacts_lower_first_name", text("lower(first_name)")),
115115
Index("idx_pdp_contacts_lower_last_name", text("lower(last_name)")),
116+
Index("idx_pdp_contacts_lower_email", text("lower(email)")),
116117
Index("idx_pdp_contacts_source_type_and_id", "source_type", "source_id"),
117118
)
118119

src/server/pipeline/flow_script.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
insert,
2424
or_,
2525
select,
26+
text,
2627
update,
2728
)
2829

@@ -112,6 +113,9 @@ def reset_pdp_contacts_with_unmatched(conn):
112113
conn.execute(Volgistics.insert_into_pdp_contacts())
113114
conn.execute(ShelterluvPeople.insert_into_pdp_contacts())
114115

116+
def compare_names(n1, n2):
117+
delims = text("'( and | & |, | )'")
118+
return func.regexp_split_to_array(func.lower(n1), delims).bool_op("&&")(func.regexp_split_to_array(func.lower(n2), delims))
115119

116120
def get_automatic_matches(conn):
117121
pc1 = PdpContacts.__table__.alias()
@@ -121,16 +125,16 @@ def get_automatic_matches(conn):
121125
and_(
122126
or_(
123127
and_(
124-
func.lower(pc1.c.first_name) == func.lower(pc2.c.first_name),
125-
func.lower(pc1.c.last_name) == func.lower(pc2.c.last_name),
128+
compare_names(pc1.c.first_name, pc2.c.first_name),
129+
compare_names(pc1.c.last_name, pc2.c.last_name),
126130
),
127131
and_(
128-
func.lower(pc1.c.first_name) == func.lower(pc2.c.last_name),
129-
func.lower(pc1.c.last_name) == func.lower(pc2.c.first_name),
132+
compare_names(pc1.c.first_name, pc2.c.last_name),
133+
compare_names(pc1.c.last_name, pc2.c.first_name),
130134
),
131135
),
132136
or_(
133-
pc1.c.email == pc2.c.email,
137+
func.lower(pc1.c.email) == func.lower(pc2.c.email),
134138
pc1.c.mobile == pc2.c.mobile,
135139
),
136140
# This ensures we don't get e.g. every row matching itself

0 commit comments

Comments
 (0)