Skip to content

Commit 3fb7edf

Browse files
committed
Updated the requirements to resolve security issues and also install mysqlclient. This resulted in the following changes having to be made
# Django 3.2 -> 4.2 changes ## ./datamad2/models/users.py from django.utils.translation import ugettext_lazy as _ --> from django.utils.translation import gettext_lazy as _ ## ./datamad2/admin.py admin.site.register(User, UserAdmin) etc at eof changed to @admin.register(User) decorator above class definition of the user type. eof registers all deleted Slightly concerned this does not give the same functionality, TODO check ## ./datamad2/utils.py from django.utils.encoding import force_text --> from django.utils.encoding import force_str ## ./datamadsite/settings.py USE_L10N = True --> line deleted crispy_bootstrap4 added to INSTALLED_APPS # ./datamadsite/settings_local.py.tmpl Added the Additional allowed template packs: CRISPY_ALLOWED_TEMPLATE_PACKS = "bootstrap4" CRISPY_TEMPLATE_PACK = "bootstrap4" # bootstrap_datepicker_plus updates ## grants.py and data_products.py from bootstrap_datepicker_plus import DatePickerInput --> from bootstrap_datepicker_plus.widgets import DatePickerInput ## Changed format='' for bootstrap_datepicker_plus.widgets.DatePickerInput from format='%d/%m/%Y' to options={'format':'%d/%m/%Y'} to fix the following error: ./app_datamad_new/Lib/site-packages/bootstrap_datepicker_plus/_base.py:38: FutureWarning: The 'format' parameter is ignored, set 'format' in options instead. see https://github.com/monim67/django-bootstrap-datepicker-plusbootstrap_datepicker_plus.widgets Removed ifequal from pagination.html as it was removed in Django 4.0 and replaced with "if x = condition" instead # djangorestframework updating to 3.15.2 from 3.12.4 # Updating to drf 3.15.2 from 3.12.4, had to specify basename for datacentres/ datacenter in datamad2_api/urls.py due to importing name from "DataCentre" model queryset leading to identical routings for the "datacentre" and "datacenter" base URLs. This is due to a change from drf 3.14 -> 3.15 enforcing the following encode/django-rest-framework#8438 Added new script DataMad_csv_create.py to import data from UKRIs DataBank database into a .csv file Created two example Docker files for local debugging, if the user wanted to. setup.py to Python 3.12 from Python 3.6 Updated the README.md file.
1 parent 8bd56c8 commit 3fb7edf

File tree

17 files changed

+312
-112
lines changed

17 files changed

+312
-112
lines changed

.gitignore

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,8 @@ vendor
1717
/static
1818
# scripts
1919
.vscode
20+
Databank.conf
21+
sql_alchemy_mysql_conn_string.txt
22+
odbc_conn_scratch.py
23+
/local_temp
24+
/import_csvs

DataMad_csv_create.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
import pandas as pd
2+
import sqlalchemy as sa
3+
import numpy as np
4+
5+
# TODO, try and get both parent and child grants in the SQL query,
6+
# for now only parent grants are present (i.e. lead PI grants)
7+
8+
# Load connection string for SQLAlchemy
9+
conn_str = []
10+
11+
with open('./local_temp/sql_alchemy_mysql_conn_string.txt', 'r') as fn:
12+
for line in fn:
13+
conn_str.append(str(line))
14+
15+
# Setup SQLAlchemy to use mysqlclient as MySQL driver
16+
# (need to use SQLAlchemy as pandas now only accepts SQLAlchemey engine + SQL query input, not pyodbc or similar)
17+
18+
engine = sa.create_engine(conn_str[0]) # mysqlclient connection for Docker
19+
20+
# engine = sa.create_engine('mysql+pyodbc://DataBank?charset=utf8mb4') # pyodbc connection (used for Windows testing)
21+
saconn = engine.connect()
22+
23+
# SQL query to pull information needed by DataMAD, also renames to DataMad names.
24+
sql_datamad_renamed = "SELECT \
25+
fact_application.ApplicationID AS GRANTREFERENCE, \
26+
fact_application.ApplicationTitle AS PROJECT_TITLE, \
27+
dim_scheme.SchemeName AS SCHEME, \
28+
dim_opportunity.OpportunityName AS 'CALL', \
29+
dim_scheme.SchemeType AS GRANT_TYPE, \
30+
dim_person.FullName AS GRANT_HOLDER, \
31+
dim_person.Email AS EMAIL, \
32+
dim_organisation.OrganisationName AS RESEARCH_ORG, \
33+
dim_department.DepartmentName AS DEPARTMENT, \
34+
dim_application_date.ActualStartDate AS ACTUAL_START_DATE, \
35+
dim_application_date.ActualEndDate AS ACTUAL_END_DATE, \
36+
fact_application.AdministratingCouncil AS NEW_ADMINISTRATING_COUNCIL, \
37+
dim_application_date.ProposedStartDate AS PROPOSED_ST_DT, \
38+
dim_application_date.ProposedEndDate AS PROPOSED_END_DT, \
39+
fact_application.ApplicationStatus AS GRANT_STATUS, \
40+
dim_organisation.AddressLine1 AS ADDRESS1, \
41+
dim_organisation.TownOrCity AS CITY, \
42+
dim_organisation.PostCode AS POSTCODE, \
43+
fact_application.AwardedAmount AS 'AMOUNT', \
44+
dim_application_ext.RoutingClassification AS ROUTING_CLASSIFICATION, \
45+
dim_classification_area.SubjectArea AS SCIENCE_AREA, \
46+
dim_organisation.region AS GEOGRAPHIC_AREA, \
47+
dim_classification_area.ResearchTopic AS SECONDARY_CLASSIFICATION, \
48+
dim_application_ext.ApplicationSummary AS ABSTRACT \
49+
FROM fact_application \
50+
LEFT OUTER JOIN dim_scheme \
51+
ON fact_application.SchemeSKey = dim_scheme.SchemeSKey \
52+
LEFT OUTER JOIN dim_opportunity \
53+
ON fact_application.OpportunitySKey = dim_opportunity.OpportunitySKey \
54+
LEFT OUTER JOIN dim_person \
55+
ON fact_application.ApplicantPersonSKey = dim_person.PersonSKey \
56+
LEFT OUTER JOIN dim_department \
57+
ON fact_application.OrganisationDepartmentSKey = dim_department.OrganisationDepartmentSKey \
58+
LEFT OUTER JOIN dim_application_date \
59+
ON fact_application.ApplicationSKey = dim_application_date.ApplicationSKey \
60+
LEFT OUTER JOIN dim_organisation\
61+
ON fact_application.LeadOrganisationSKey = dim_organisation.OrganisationSKey \
62+
LEFT OUTER JOIN dim_application_ext\
63+
ON fact_application.ApplicationSKey = dim_application_ext.ApplicationSKey \
64+
LEFT OUTER JOIN dim_classification_area \
65+
ON fact_application.PrimaryClassificationAreaSKey = dim_classification_area.ClassificationAreaSKey \
66+
WHERE fact_application.AdministratingCouncil = 'NERC' AND fact_application.ApplicationStatus = 'ACCEPTED' \
67+
GROUP BY fact_application.ApplicationID \
68+
LIMIT 200"
69+
70+
# TODO May potentially need to add this back in (trying to join on fact_application_team.LeadApplicantPersonSKey instead)
71+
"""
72+
LEFT OUTER JOIN dim_person \
73+
ON fact_application.ApplicantPersonSKey = dim_person.PersonSKey \
74+
"""
75+
76+
77+
# Query data from Databank
78+
data_renamed = pd.read_sql(sql_datamad_renamed, engine)
79+
80+
# Add in blank columns to cover fields missing in Databank that were in Siebel
81+
# TODO, should delete some of these once app_datamad is updated to remove these fields from the models
82+
# Definitely don't need WORK_NUMBER, NCAS, NCEO, ADDRESS2, OVERALL_SCORE, PROPOSED_ST_DT_ORG or PROPOSED_END_DT_ORG
83+
# Try to find (or extract from another field): FACILITY, LEAD_GRANT, PARENT_GRANT and OBJECTIVES
84+
85+
no_longer_needed_cols = ['WORK_NUMBER', 'NCAS', 'NCEO', 'ADDRESS2', 'OVERALL_SCORE',
86+
'PROPOSED_ST_DT_ORG', 'PROPOSED_END_DT_ORG']
87+
data_renamed = data_renamed.reindex(columns=[*data_renamed.columns.tolist(), *no_longer_needed_cols], fill_value=np.nan)
88+
89+
needed_cols = ['FACILITY', 'LEAD_GRANT', 'PARENT_GRANT', 'OBJECTIVES']
90+
data_renamed = data_renamed.reindex(columns=[*data_renamed.columns.tolist(), *needed_cols], fill_value=np.nan)
91+
92+
# Reorder columns to Siebel order (easier to read for user, not needed for import via Django)
93+
col_order = ['GRANTREFERENCE', 'PROJECT_TITLE', 'SCHEME', 'CALL', 'GRANT_TYPE',
94+
'GRANT_HOLDER', 'WORK_NUMBER', 'EMAIL', 'RESEARCH_ORG',
95+
'DEPARTMENT', 'ACTUAL_START_DATE', 'ACTUAL_END_DATE',
96+
'NCAS', 'NCEO', 'PROPOSED_ST_DT', 'PROPOSED_END_DT',
97+
'GRANT_STATUS', 'ADDRESS1', 'ADDRESS2', 'CITY',
98+
'POSTCODE', 'LEAD_GRANT', 'PARENT_GRANT', 'AMOUNT',
99+
'ROUTING_CLASSIFICATION', 'SCIENCE_AREA',
100+
'GEOGRAPHIC_AREA', 'SECONDARY_CLASSIFICATION',
101+
'ABSTRACT', 'OBJECTIVES', 'FACILITY', 'OVERALL_SCORE',
102+
'PROPOSED_ST_DT_ORG', 'PROPOSED_END_DT_ORG']
103+
104+
data_renamed = data_renamed[col_order]
105+
106+
# Save .csv file
107+
data_renamed.to_csv("./import_csvs/datamad_databank_debug.csv")
108+
109+
pause = 1

Dockerfile_debug

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
FROM rockylinux:9.3
2+
3+
WORKDIR /app
4+
5+
# Installing Python in the image
6+
RUN dnf install python3 -y
7+
8+
# RUN wget https://www.python.org/ftp/python/3.10.0/Python-3.10.0.tar.xz
9+
# RUN tar -xf Python-3.10.0.tar.xz
10+
# WORKDIR /Python-3.10.0
11+
# RUN ls
12+
# RUN ./configure --enable-optimizations
13+
# RUN make -j 2
14+
# RUN nproc
15+
# RUN make altinstall
16+
17+
# Ensure pip is installed
18+
RUN python3 -m ensurepip --upgrade
19+
20+
# Install MySQL and required drivers for Django to use MySQL
21+
# RUN dnf config-manager --set-enabled crb
22+
RUN dnf install epel-release -y
23+
RUN crb enable
24+
RUN dnf install python3-devel -y
25+
RUN dnf install mysql-server -y
26+
RUN dnf install pkgconf pkgconf-pkg-config -y
27+
RUN dnf install mysql-devel -y
28+
29+
RUN dnf install gcc -y
30+
RUN pip3 install mysqlclient
31+
32+
# Install git to clone ceda-elasticsearch-tools into image (cloned in requirements.txt)
33+
RUN dnf -y install git
34+
35+
COPY requirements.txt requirements.txt
36+
RUN pip install --no-cache-dir -r requirements.txt
37+
38+
# Add sqllite support for existing DataMad data (imported from Siebel via UKCEH)
39+
RUN dnf install sqlite -y
40+
41+
# TODO needed for debug only
42+
RUN pip install debugpy
43+
44+
# Copy the project code into the container
45+
COPY ./ /app/
46+
47+
# ENV DJANGO_SUPERUSER_PASSWORD = Password
48+
# RUN python3 manage.py createsuperuser [email protected] --first_name=Test --last_name=Person --noinput
49+
50+
# RUN python3 manage.py import_database --file ./import_csvs/datamad_databank_debug.csv
51+
# RUN yes | python3 manage.py rebuild_index
52+
53+
# EXPOSE 8000
54+
55+
# CMD [ "python3", "manage.py"]

README.md

Lines changed: 98 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ See https://docs.google.com/document/d/1Lhu0ALzroLaPMXYi1x3lUFO-UgkpnZ0P8DDgp1ys
88

99
## Editing docs
1010

11-
The documentation is written using Jekyll. A usefull getting started guide
11+
The documentation is written using Jekyll. A useful getting started guide
1212
can be found [here](https://jekyllrb.com/docs/step-by-step/01-setup/)
1313

1414
Once you have the pre-requisites installed, navigate to the docs directory and run:
@@ -17,16 +17,100 @@ Once you have the pre-requisites installed, navigate to the docs directory and r
1717

1818
This will serve the docs locally and will update as you change the source.
1919

20-
## Set up Guide
20+
## Setup guide Python environment
21+
- Clone the datamad2 repository to your local machine in a directory of your choosing `git clone https://github.com/cedadev/datamad2.git`
22+
- Install Python if you haven't already (see datamad2 requirements.txt for Python version)
2123

22-
Process to set up Datamad2
24+
### Docker
25+
If you have Docker installed (for example via Rancher Desktop) then the simplest install method is to run either Windows:
26+
```
27+
docker build -f .\Dockerfile_debug -t datamad-debug .
28+
```
2329

24-
- Install the datamad2 repository `git clone https://github.com/cedadev/datamad2.git`
25-
30+
or Linux (rocky9 tested):
31+
```
32+
docker build -f ./Dockerfile_debug -t datamad-debug .
33+
```
2634

27-
- Install the required packages found in the requirements.txt file within the datamad repository in a virtual
28-
environment `pip install -r requirements.txt`
29-
35+
This will install a Rocky9 image with everything needed for Python mysqlclient, the Databank download script and the Django web application to run.
36+
37+
Note one, Rancher Desktop needs to be running for the docker command to work (at least in Windows).
38+
Note two, this does not use any CEDA specific Docker images and should not be used in deployment
39+
40+
### Windows
41+
These instructions may not be extensive
42+
- Download and Install MySQL server (mysql.exe is required for mysqlclient to run)
43+
- Create a Python virtual environment
44+
```
45+
python virtualenv app_datamad
46+
```
47+
- Install the required packages found in the requirements.txt file within the datamad repository into the virtual
48+
environment you created `pip install -r requirements.txt`
49+
50+
If there are errors such as mysql.exe not found then you might need to add MySQL Server\bin to your user path environment variable. For example for MySQL 8.0 add:
51+
```
52+
C:\Program Files\MySQL\MySQL Server 8.0\bin
53+
```
54+
55+
Note, you might need to check where mysql.exe has been installed on your system as the above might not be the install path for later (or earlier) versions of MySQL server.
56+
57+
Alternatively you could try and use MariaDB instead, but this is untested or Microsoft SQL server, setup an ODBC connection in windows and then change the connection settings in "DataMad_csv_create.py".
58+
59+
60+
### Linux (Rocky9 example)
61+
The install order ensures all dependencies are installed in the correct order.
62+
63+
- Install python
64+
```
65+
66+
dnf install python3 -y
67+
```
68+
69+
Install all dependencies needed for mysqlclient to get access to DataBank through the "DataMad_csv_create.py" script
70+
```
71+
dnf install epel-release -y
72+
crb enable
73+
dnf install python3-devel -y
74+
dnf install mysql-server -y
75+
dnf install pkgconf pkgconf-pkg-config -y
76+
dnf install mysql-devel -y
77+
dnf install gcc -y
78+
pip3 install mysqlclient
79+
```
80+
81+
- Install sqllite support for local database
82+
dnf install sqlite -y
83+
84+
- If somehow not already installed (if you downloaded the datamad repo from a .zip file on the web) then install git to clone ceda-elasticsearch-tools (and others) in in requirements.txt
85+
```
86+
dnf -y install git
87+
```
88+
- Create Python virtual environment in the directory of your choice, then activate it (depending on how Pyton installed you may need to replace "python3" with "python")
89+
```
90+
python3 virtualenv app_datamad
91+
/bin/activate
92+
```
93+
94+
- Then install DataMad requirements into the virtual environment
95+
```
96+
pip install --no-cache-dir -r requirements.txt
97+
pip install debugpy
98+
```
99+
100+
## Setup guide DataBank Access
101+
DataBank is a UKRI service containing grant information which is pulled from "The Funding Service" grant application system. The Django application DataMad used a Python script "DataMad_csv_create.py" to generate a .csv file, which Django then uses to import data from DataBank and save it into a local SQLLite file for debugging, or a PostGres database in production.
102+
103+
To use it:
104+
- a local_temp directory must be created
105+
- Within it create a .txt file with the name "sql_alchemy_mysql_conn_string.txt" and place the following into it:
106+
```
107+
mysql+mysqldb://<User>:<Password>@<host>/databank
108+
109+
<User>, <Password> and <host> should be replaced with the DataBank login details.
110+
```
111+
112+
## Setup Guide Django application
113+
Process to set up Datamad2 web application
30114

31115
- In the datamadsite folder you should see a settings_local.py.tmpl file, copy said file and past in the same
32116
location but remove the `.tmpl` extension.
@@ -64,23 +148,20 @@ JIRA_CONSUMER_KEY = 'OAuthKey'
64148
...
65149
```
66150

67-
- Within the terminal, run `python manage.py migrate`.
151+
In the instructions below you might, depending on your setup, need to replace the string "python" with the string "python3".
68152

153+
If using Docker, then start an interactive Docker shell using `docker run -it -p 8000:8000 datamad-debug` before running the instructions below.
154+
155+
- Within the terminal, run `python manage.py migrate`.
69156

70157
- With the .csv containing the database, save the file in the same folder as manage.py and run
71-
`python manage.py import_database --f datamad_csv.csv`
158+
`python manage.py import_database --file datamad.csv`
72159

73-
74160
- You will want to create a superuser to log in to the site, to do this run `python manage.py createsuperuser`, enter
75161
in a username and password, this will be local, so you can keep it simple.
76162

77-
78163
- Within the terminal run `python manage.py rebuild_index`, this process may take some time.
79164

80-
81-
82-
83-
84165
- Lastly, if all was successful, run `python manage.py runserver` and a local server of the site should be running. The
85166
address to which should be given in the terminal. Open the address in your browser to visit the site.
86-
167+

datamad2/admin.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ class PreservationPlanInline(admin.TabularInline):
6363
# #
6464
################################################################################
6565

66+
@admin.register(User)
6667
class UserAdmin(BaseUserAdmin):
6768
list_display = ( 'first_name', 'last_name', 'email','data_centre', 'is_admin')
6869
search_fields = ('email',)
@@ -81,6 +82,7 @@ class UserAdmin(BaseUserAdmin):
8182
)
8283

8384

85+
@admin.register(ImportedGrant)
8486
class ImportedGrantAdmin(admin.ModelAdmin):
8587
search_fields = ['grant_ref', 'title']
8688
list_display = ('grant_ref', 'title', 'creation_date')
@@ -95,6 +97,7 @@ def has_delete_permission(self, request, obj=None):
9597
return False
9698

9799

100+
@admin.register(Grant)
98101
class GrantAdmin(admin.ModelAdmin):
99102
readonly_fields = ['updated_imported_grant', 'science_area']
100103
search_fields = ['grant_ref', 'importedgrant__title']
@@ -110,17 +113,20 @@ def has_delete_permission(self, request, obj=None):
110113
return False
111114

112115

116+
@admin.register(DataProduct)
113117
class DataProductAdmin(admin.ModelAdmin):
114118
pass
115119

116120

121+
@admin.register(Document)
117122
class DocumentAdmin(admin.ModelAdmin):
118123
list_display = ('title', 'grant')
119124

120125
search_fields = ['title', 'grant__grant_ref']
121126
autocomplete_fields = ['grant']
122127

123128

129+
@admin.register(DataCentre)
124130
class DataCentreAdmin(admin.ModelAdmin):
125131
inlines = [
126132
JIRAIssueTypeInline,
@@ -129,12 +135,3 @@ class DataCentreAdmin(admin.ModelAdmin):
129135
DataFormatInline,
130136
PreservationPlanInline,
131137
]
132-
133-
134-
# Register the Admin classes
135-
admin.site.register(User, UserAdmin)
136-
admin.site.register(ImportedGrant, ImportedGrantAdmin)
137-
admin.site.register(Grant, GrantAdmin)
138-
admin.site.register(DataProduct, DataProductAdmin)
139-
admin.site.register(Document, DocumentAdmin)
140-
admin.site.register(DataCentre, DataCentreAdmin)

0 commit comments

Comments
 (0)