Skip to content

Commit

Permalink
fix: fix space in header name
Browse files Browse the repository at this point in the history
Co-authored-by: Gaurav Goyal <[email protected]>
work done in #133
  • Loading branch information
milafrerichs committed Nov 13, 2020
1 parent 35f4301 commit 1862ad6
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 10 deletions.
20 changes: 12 additions & 8 deletions tests/datasets/tasks/test_process_uploaded_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,21 @@
from wazimap_ng.datasets.tasks.process_uploaded_file import process_csv, detect_encoding
from tests.datasets.factories import DatasetFactory, GeographyFactory, GeographyHierarchyFactory, DatasetFileFactory

def generate_file(data, encoding="utf8"):
def generate_file(data, header, encoding="utf8"):
buffer = BytesIO()
StreamWriter = codecs.getwriter(encoding)
text_buffer = StreamWriter(buffer)

writer = csv.writer(text_buffer)
writer.writerow(["Geography", "field1", "field2", "count"])
writer.writerow(header)
writer.writerows(data)

buffer.seek(0)
return buffer


def create_datasetfile(csv_data, encoding):
buffer = generate_file(csv_data, encoding)
def create_datasetfile(csv_data, encoding, header):
buffer = generate_file(csv_data, header, encoding)
return DatasetFileFactory(document__data=buffer.read())


Expand Down Expand Up @@ -57,21 +57,25 @@ def dataset(geography_hierarchy):
("GEOCODE_2", "€ŠF1_value_2", "F2_value_2®®", 222),
]

@pytest.fixture(params=[(good_data, "utf8"), (data_with_different_case, "utf8"), (data_with_different_encodings, "Windows-1252")])
good_header = ["Geography", "field1", "field2", "count"]

to_be_fixed_header = ["Geography", "field1", "field2", "count "]

@pytest.fixture(params=[(good_data, good_header, "utf8"), (good_data, to_be_fixed_header, "utf8"), (data_with_different_case, good_header, "utf8"), (data_with_different_encodings, good_header, "Windows-1252")])
def data(request):
return request.param

def test_detect_encoding():
buffer = generate_file(data_with_different_encodings, "Windows-1252")
buffer = generate_file(data_with_different_encodings, good_header, "Windows-1252")
encoding = detect_encoding(buffer)
assert encoding == "Windows-1252"

@pytest.mark.django_db
class TestUploadFile:

def test_process_csv(self, dataset, data, geographies):
csv_data, encoding = data
datasetfile = create_datasetfile(csv_data, encoding)
csv_data, header, encoding = data
datasetfile = create_datasetfile(csv_data, encoding, header)

process_csv(dataset, datasetfile.document.open("rb"))
datasetdata = dataset.datasetdata_set.all()
Expand Down
4 changes: 2 additions & 2 deletions wazimap_ng/datasets/tasks/process_uploaded_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def process_csv(dataset, buffer, chunksize=1000000):
row_number = 1
df = pd.read_csv(wrapper_file, nrows=1, dtype=str, sep=",", encoding=encoding)
df.dropna(how='all', axis='columns', inplace=True)
columns = df.columns.str.lower()
columns = df.columns.str.lower().str.strip()
error_logs = [];
warning_logs = [];

Expand Down Expand Up @@ -90,7 +90,7 @@ def process_uploaded_file(dataset_file, dataset, **kwargs):
i_chunk = 0
df = pd.read_excel(dataset_file.document.open(), nrows=1, dtype=str)
df.dropna(how='any', axis='columns', inplace=True)
columns = df.columns.str.lower()
columns = df.columns.str.lower().str.strip()
while True:
df = pd.read_excel(
dataset_file.document.open(), nrows=chunksize, skiprows=skiprows, header=None
Expand Down

0 comments on commit 1862ad6

Please sign in to comment.