From 556c7bdb283a4622564d950097c64f877b15eec7 Mon Sep 17 00:00:00 2001 From: ^ Date: Thu, 25 Apr 2024 10:45:23 +0100 Subject: [PATCH 1/3] feat: mp_stripdiffs macro - closes #373 --- all.sas | 247 +++++++++++++++++++++++++++++- base/mp_stackdiffs.sas | 1 + base/mp_storediffs.sas | 3 +- base/mp_stripdiffs.sas | 224 +++++++++++++++++++++++++++ meta/mm_getgroups.sas | 9 +- tests/base/mp_stripdiffs.test.sas | 106 +++++++++++++ 6 files changed, 579 insertions(+), 11 deletions(-) create mode 100644 base/mp_stripdiffs.sas create mode 100644 tests/base/mp_stripdiffs.test.sas diff --git a/all.sas b/all.sas index 87abdc53..da2460e1 100644 --- a/all.sas +++ b/all.sas @@ -5541,13 +5541,21 @@ data _null_; header = cats(coalescec(varlabel(dsid,i),varnm),dlm); %end; %else %if &headerformat=SASJS %then %do; - if vartype(dsid,i)='C' then header=cats(varnm,':$char',varlen(dsid,i),'.'); + vlen=varlen(dsid,i); + if vartype(dsid,i)='C' then header=cats(varnm,':$char',vlen,'.'); else do; vfmt=coalescec(varfmt(dsid,i),'0'); fmttype=mcf_getfmttype(vfmt); if fmttype='DATE' then header=cats(varnm,':date9.'); else if fmttype='DATETIME' then header=cats(varnm,':E8601DT26.6'); else if fmttype='TIME' then header=cats(varnm,':TIME12.'); + /** + * there is not much point importing a short length numeric like this, + * eg with best4., as the resulting variable will still be stored as + * length 8. We need a length or format statement to ensure variable + * is creatd with the smaller length... + **/ + else if vlen<8 then header=cats(varnm,':best',vlen,'.'); else header=cats(varnm,':best.'); end; %end; @@ -5574,6 +5582,7 @@ data _null_; set &ds end=last; %do i=1 %to &vcnt; %let var=%scan(&varlist,&i); + %local vlen&i; %if %mf_getvartype(&ds,&var)=C %then %do; %let dsv1=%mf_getuniquename(prefix=csvcol1_); %let dsv2=%mf_getuniquename(prefix=csvcol2_); @@ -12466,6 +12475,7 @@ run; @li mp_coretable.sas @li mp_stackdiffs.test.sas @li mp_storediffs.sas + @li mp_stripdiffs.sas @todo The current approach assumes that a variable called KEY_HASH is not on the base table. This part will need to be refactored (eg using @@ -12924,6 +12934,7 @@ select distinct tgtvar_nm into: missvars separated by ' '

Related Macros

@li mp_stackdiffs.sas @li mp_storediffs.test.sas + @li mp_stripdiffs.sas @version 9.2 @author Allan Bowe @@ -13043,7 +13054,7 @@ data &ds4; run; %if "&loadref"="0" %then %let loadref=%sysfunc(uuidgen()); -%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime()); +%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime(),8.6); %let libds=%upcase(&libds); /* join orig vals for modified & deleted */ @@ -13373,6 +13384,229 @@ run; %mend mp_streamfile; /** + @file + @brief Generates a stage dataset to revert diffs tracked in an audit table + @details A big benefit of tracking data changes in an audit table is that + those changes can be subsequently reverted if necessary! + + This macro prepares a staging dataset containing those differences - eg for: + + @li deleted rows - these are re-inserted + @li changed rows - differences are reverted + @li added rows - these are marked with `_____DELETE_THIS_RECORD_____="YES"` + + These changes are NOT applied to the base table - a staging dataset is + simply prepared for an ETL process to action. In Data Controller, this + dataset is used directly as an input to the APPROVE process (so that the + reversion diffs can be reviewed prior to being applied). + + + @param [in] libds Base library.dataset (will not be modified). The library + must be assigned. + @param [in] loadref Unique identifier for the version to be reverted. This + change, plus ALL SUBSEQUENT CHANGES, will be reverted in the output table. + @param [in] difftable The dataset containing the diffs. Definition available + in mddl_dc_difftable.sas + @param [out] outds= (work.mp_stripdiffs) Output table containing the diffs. + Has the same format as the base datset, plus a + `_____DELETE_THIS_RECORD_____` variable. + @param [in] mdebug= set to 1 to enable DEBUG messages and preserve outputs + +

SAS Macros

+ @li mf_getuniquefileref.sas + @li mf_getuniquename.sas + @li mf_islibds.sas + @li mp_abort.sas + +

Related Macros

+ @li mddl_dc_difftable.sas + @li mp_stackdiffs.sas + @li mp_storediffs.sas + @li mp_stripdiffs.test.sas + + @version 9.2 + @author Allan Bowe +**/ +/** @cond */ + +%macro mp_stripdiffs(libds + ,loadref + ,difftable + ,outds=work.mp_stripdiffs + ,mdebug=0 +)/*/STORE SOURCE*/; +%local dbg; +%if &mdebug=1 %then %do; + %put &sysmacroname entry vars:; + %put _local_; +%end; +%else %let dbg=*; + +%let libds=%upcase(&libds); + +/* safety checks */ +%mp_abort(iftrue= (&syscc ne 0) + ,mac=&sysmacroname + ,msg=%str(SYSCC=&syscc on entry. Clean session required!) +) +%let libds=%upcase(&libds); +%mp_abort(iftrue= (%mf_islibds(&libds)=0) + ,mac=&sysmacroname + ,msg=%str(Invalid library.dataset reference - %superq(libds)) +) + + + +/* set up unique and temporary vars */ +%local ds1 ds2 ds3 ds4 ds5 fref1; +%let fref1=%mf_getuniquefileref(); + +/* get timestamp of the diff to be reverted */ +%local ts; +proc sql noprint; +select put(processed_dttm,datetime19.6) into: ts + from &difftable where load_ref="&loadref"; +%mp_abort(iftrue= (&sqlobs=0) + ,mac=&sysmacroname + ,msg=%str(Load ref %superq(loadref) not found!) +) + +/* extract diffs for this base table from this timestamp onwards */ +%let ds1=%upcase(work.%mf_getuniquename(prefix=mpsd_diffs)); +create table &ds1 (drop=libref dsn) as + select * from &difftable + where upcase(cats(libref))="%scan(&libds,1,.)" + and upcase(cats(dsn))="%scan(&libds,2,.)" + and processed_dttm ge "&ts"dt + order by processed_dttm desc, key_hash, is_pk; + +/* extract key values only */ +%let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_pks)); +create table &ds2 as + select key_hash, + tgtvar_nm, + tgtvar_type, + coalescec(oldval_char,newval_char) as charval, + coalesce(oldval_num, newval_num) as numval, + processed_dttm + from &ds1 + where is_pk=1 + order by key_hash, processed_dttm; + +/* grab pk values */ +%local pk; +data _null_; + set &ds2; + by key_hash; + call symputx('pk',catx(' ',symget('pk'),tgtvar_nm),'l'); + if last.key_hash then stop; +run; + +%let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_keychar)); +proc transpose data=&ds2(where=(tgtvar_type='C')) + out=&ds3(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var charval; +run; + +%let ds4=%upcase(work.%mf_getuniquename(prefix=mpsd_keynum)); +proc transpose data=&ds2(where=(tgtvar_type='N')) + out=&ds4(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var numval; +run; +/* shorten the lengths */ +%mp_ds2squeeze(&ds3,outds=&ds3) +%mp_ds2squeeze(&ds4,outds=&ds4) + +%let ds5=%upcase(work.%mf_getuniquename(prefix=mpsd_merged)); +data &ds5; + merge &ds3 &ds4; + by key_hash; + if not missing(key_hash); +run; + +/* join to base table for preliminary stage DS */ +proc sql; +create table &outds as select "No " as _____DELETE_THIS_RECORD_____, + b.* + from &ds5 a + inner join &libds b + on 1=1 +%do x=1 %to %sysfunc(countw(&pk,%str( ))); + and a.%scan(&pk,&x,%str( ))=b.%scan(&pk,&x,%str( )) +%end; +; + +/* create SAS code to apply to stage_ds */ +data _null_; + set &ds1; + file &fref1; + if _n_=1 then put 'proc sql noprint;'; + by descending processed_dttm key_hash is_pk; + if move_type='M' then do; + if first.key_hash then do; + put "update &outds set " @@; + end; + if IS_PK=0 then do; + put " " tgtvar_nm '=' @@; + charval=quote(cats(oldval_char)); + if tgtvar_type='C' then put charval @@; + else put oldval_num @@; + if not last.is_pk then put ','; + end; + else do; + if first.is_pk then put " where 1=1 " @@; + put " and " tgtvar_nm '=' @@; + charval=quote(cats(oldval_char)); + if tgtvar_type='C' then put charval @@; + else put oldval_num @@; + end; + end; + else if move_type='A' then do; + if first.key_hash then do; + put "update &outds set _____DELETE_THIS_RECORD_____='Yes' where 1=1 " @@; + end; + /* gating if - as only need PK now */ + if is_pk=1; + put ' AND ' tgtvar_nm '=' @@; + charval=quote(cats(newval_char)); + if tgtvar_type='C' then put charval @@; + else put newval_num @@; + end; + else if move_type='D' then do; + if first.key_hash then do; + put "insert into &outds set _____DELETE_THIS_RECORD_____='No' " @@; + end; + put " ," tgtvar_nm '=' @@; + charval=quote(cats(oldval_char)); + if tgtvar_type='C' then put charval @@; + else put oldval_num @@; + end; + if last.key_hash then put ';'; +run; + +/* apply the modification statements */ +%inc &fref1/source2; + +%if &mdebug=0 %then %do; + proc sql; + drop table &ds1, &ds2, &ds3, &ds4, &ds5; + file &fref1 clear; +%end; +%else %do; + data _null_; + infile &fref1; + input; + if _n_=1 then putlog "Contents of SQL adjustments"; + putlog _infile_; + run; +%end; + +%mend mp_stripdiffs; +/** @endcond *//** @file @brief Runs arbitrary code for a specified amount of time @details Executes a series of procs and data steps to enable performance @@ -17964,10 +18198,11 @@ run; @param [in] user= the metadata user to return groups for. Leave blank for all groups. - @param [in] repo= the metadata repository that contains the user/group - information - @param [in] mDebug= set to 1 to show debug messages in the log - @param [out] outds= the dataset to create that contains the list of groups + @param [in] repo= (foundation) the metadata repository that contains the + user/group information + @param [in] mDebug= (0) set to 1 to show debug messages in the log + @param [out] outds= (work.mm_getgroups) The dataset to create that contains + the list of groups @returns outds dataset containing all groups in a column named "metagroup" - groupuri diff --git a/base/mp_stackdiffs.sas b/base/mp_stackdiffs.sas index ce167009..65aa4df2 100644 --- a/base/mp_stackdiffs.sas +++ b/base/mp_stackdiffs.sas @@ -197,6 +197,7 @@ @li mp_coretable.sas @li mp_stackdiffs.test.sas @li mp_storediffs.sas + @li mp_stripdiffs.sas @todo The current approach assumes that a variable called KEY_HASH is not on the base table. This part will need to be refactored (eg using diff --git a/base/mp_storediffs.sas b/base/mp_storediffs.sas index f185b0cc..452c5797 100644 --- a/base/mp_storediffs.sas +++ b/base/mp_storediffs.sas @@ -64,6 +64,7 @@

Related Macros

@li mp_stackdiffs.sas @li mp_storediffs.test.sas + @li mp_stripdiffs.sas @version 9.2 @author Allan Bowe @@ -183,7 +184,7 @@ data &ds4; run; %if "&loadref"="0" %then %let loadref=%sysfunc(uuidgen()); -%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime()); +%if &processed_dttm=0 %then %let processed_dttm=%sysfunc(datetime(),8.6); %let libds=%upcase(&libds); /* join orig vals for modified & deleted */ diff --git a/base/mp_stripdiffs.sas b/base/mp_stripdiffs.sas new file mode 100644 index 00000000..50028187 --- /dev/null +++ b/base/mp_stripdiffs.sas @@ -0,0 +1,224 @@ +/** + @file + @brief Generates a stage dataset to revert diffs tracked in an audit table + @details A big benefit of tracking data changes in an audit table is that + those changes can be subsequently reverted if necessary! + + This macro prepares a staging dataset containing those differences - eg for: + + @li deleted rows - these are re-inserted + @li changed rows - differences are reverted + @li added rows - these are marked with `_____DELETE_THIS_RECORD_____="YES"` + + These changes are NOT applied to the base table - a staging dataset is + simply prepared for an ETL process to action. In Data Controller, this + dataset is used directly as an input to the APPROVE process (so that the + reversion diffs can be reviewed prior to being applied). + + + @param [in] libds Base library.dataset (will not be modified). The library + must be assigned. + @param [in] loadref Unique identifier for the version to be reverted. This + change, plus ALL SUBSEQUENT CHANGES, will be reverted in the output table. + @param [in] difftable The dataset containing the diffs. Definition available + in mddl_dc_difftable.sas + @param [out] outds= (work.mp_stripdiffs) Output table containing the diffs. + Has the same format as the base datset, plus a + `_____DELETE_THIS_RECORD_____` variable. + @param [in] mdebug= set to 1 to enable DEBUG messages and preserve outputs + +

SAS Macros

+ @li mf_getuniquefileref.sas + @li mf_getuniquename.sas + @li mf_islibds.sas + @li mp_abort.sas + +

Related Macros

+ @li mddl_dc_difftable.sas + @li mp_stackdiffs.sas + @li mp_storediffs.sas + @li mp_stripdiffs.test.sas + + @version 9.2 + @author Allan Bowe +**/ +/** @cond */ + +%macro mp_stripdiffs(libds + ,loadref + ,difftable + ,outds=work.mp_stripdiffs + ,mdebug=0 +)/*/STORE SOURCE*/; +%local dbg; +%if &mdebug=1 %then %do; + %put &sysmacroname entry vars:; + %put _local_; +%end; +%else %let dbg=*; + +%let libds=%upcase(&libds); + +/* safety checks */ +%mp_abort(iftrue= (&syscc ne 0) + ,mac=&sysmacroname + ,msg=%str(SYSCC=&syscc on entry. Clean session required!) +) +%let libds=%upcase(&libds); +%mp_abort(iftrue= (%mf_islibds(&libds)=0) + ,mac=&sysmacroname + ,msg=%str(Invalid library.dataset reference - %superq(libds)) +) + + + +/* set up unique and temporary vars */ +%local ds1 ds2 ds3 ds4 ds5 fref1; +%let fref1=%mf_getuniquefileref(); + +/* get timestamp of the diff to be reverted */ +%local ts; +proc sql noprint; +select put(processed_dttm,datetime19.6) into: ts + from &difftable where load_ref="&loadref"; +%mp_abort(iftrue= (&sqlobs=0) + ,mac=&sysmacroname + ,msg=%str(Load ref %superq(loadref) not found!) +) + +/* extract diffs for this base table from this timestamp onwards */ +%let ds1=%upcase(work.%mf_getuniquename(prefix=mpsd_diffs)); +create table &ds1 (drop=libref dsn) as + select * from &difftable + where upcase(cats(libref))="%scan(&libds,1,.)" + and upcase(cats(dsn))="%scan(&libds,2,.)" + and processed_dttm ge "&ts"dt + order by processed_dttm desc, key_hash, is_pk; + +/* extract key values only */ +%let ds2=%upcase(work.%mf_getuniquename(prefix=mpsd_pks)); +create table &ds2 as + select key_hash, + tgtvar_nm, + tgtvar_type, + coalescec(oldval_char,newval_char) as charval, + coalesce(oldval_num, newval_num) as numval, + processed_dttm + from &ds1 + where is_pk=1 + order by key_hash, processed_dttm; + +/* grab pk values */ +%local pk; +data _null_; + set &ds2; + by key_hash; + call symputx('pk',catx(' ',symget('pk'),tgtvar_nm),'l'); + if last.key_hash then stop; +run; + +%let ds3=%upcase(work.%mf_getuniquename(prefix=mpsd_keychar)); +proc transpose data=&ds2(where=(tgtvar_type='C')) + out=&ds3(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var charval; +run; + +%let ds4=%upcase(work.%mf_getuniquename(prefix=mpsd_keynum)); +proc transpose data=&ds2(where=(tgtvar_type='N')) + out=&ds4(drop=_name_); + by KEY_HASH; + id TGTVAR_NM; + var numval; +run; +/* shorten the lengths */ +%mp_ds2squeeze(&ds3,outds=&ds3) +%mp_ds2squeeze(&ds4,outds=&ds4) + +%let ds5=%upcase(work.%mf_getuniquename(prefix=mpsd_merged)); +data &ds5; + merge &ds3 &ds4; + by key_hash; + if not missing(key_hash); +run; + +/* join to base table for preliminary stage DS */ +proc sql; +create table &outds as select "No " as _____DELETE_THIS_RECORD_____, + b.* + from &ds5 a + inner join &libds b + on 1=1 +%do x=1 %to %sysfunc(countw(&pk,%str( ))); + and a.%scan(&pk,&x,%str( ))=b.%scan(&pk,&x,%str( )) +%end; +; + +/* create SAS code to apply to stage_ds */ +data _null_; + set &ds1; + file &fref1; + if _n_=1 then put 'proc sql noprint;'; + by descending processed_dttm key_hash is_pk; + if move_type='M' then do; + if first.key_hash then do; + put "update &outds set " @@; + end; + if IS_PK=0 then do; + put " " tgtvar_nm '=' @@; + charval=quote(cats(oldval_char)); + if tgtvar_type='C' then put charval @@; + else put oldval_num @@; + if not last.is_pk then put ','; + end; + else do; + if first.is_pk then put " where 1=1 " @@; + put " and " tgtvar_nm '=' @@; + charval=quote(cats(oldval_char)); + if tgtvar_type='C' then put charval @@; + else put oldval_num @@; + end; + end; + else if move_type='A' then do; + if first.key_hash then do; + put "update &outds set _____DELETE_THIS_RECORD_____='Yes' where 1=1 " @@; + end; + /* gating if - as only need PK now */ + if is_pk=1; + put ' AND ' tgtvar_nm '=' @@; + charval=quote(cats(newval_char)); + if tgtvar_type='C' then put charval @@; + else put newval_num @@; + end; + else if move_type='D' then do; + if first.key_hash then do; + put "insert into &outds set _____DELETE_THIS_RECORD_____='No' " @@; + end; + put " ," tgtvar_nm '=' @@; + charval=quote(cats(oldval_char)); + if tgtvar_type='C' then put charval @@; + else put oldval_num @@; + end; + if last.key_hash then put ';'; +run; + +/* apply the modification statements */ +%inc &fref1/source2; + +%if &mdebug=0 %then %do; + proc sql; + drop table &ds1, &ds2, &ds3, &ds4, &ds5; + file &fref1 clear; +%end; +%else %do; + data _null_; + infile &fref1; + input; + if _n_=1 then putlog "Contents of SQL adjustments"; + putlog _infile_; + run; +%end; + +%mend mp_stripdiffs; +/** @endcond */ \ No newline at end of file diff --git a/meta/mm_getgroups.sas b/meta/mm_getgroups.sas index d63104e6..f61a1e3b 100755 --- a/meta/mm_getgroups.sas +++ b/meta/mm_getgroups.sas @@ -11,10 +11,11 @@ @param [in] user= the metadata user to return groups for. Leave blank for all groups. - @param [in] repo= the metadata repository that contains the user/group - information - @param [in] mDebug= set to 1 to show debug messages in the log - @param [out] outds= the dataset to create that contains the list of groups + @param [in] repo= (foundation) the metadata repository that contains the + user/group information + @param [in] mDebug= (0) set to 1 to show debug messages in the log + @param [out] outds= (work.mm_getgroups) The dataset to create that contains + the list of groups @returns outds dataset containing all groups in a column named "metagroup" - groupuri diff --git a/tests/base/mp_stripdiffs.test.sas b/tests/base/mp_stripdiffs.test.sas new file mode 100644 index 00000000..e0ccd677 --- /dev/null +++ b/tests/base/mp_stripdiffs.test.sas @@ -0,0 +1,106 @@ +/** + @file + @brief Testing mp_stripdiffs.sas macro + @details + +

SAS Macros

+ @li mp_assert.sas + @li mp_assertscope.sas + @li mp_ds2md.sas + @li mp_stripdiffs.sas + +**/ + +/* make an adjustable base dataset */ +/* use a composite key also (name weight) */ +libname libby (work); +data libby.class; + set sashelp.class; +run; + +/* first, store some diffs */ +data work.orig work.deleted work.changed work.appended; + set libby.class; + if _n_=1 then do; + call symputx('delname',name); + output work.orig work.deleted; + end; + else if _n_=2 then do; + output work.orig; + call symputx('modname',name); + call symputx('modval',age); + age=99; + output work.changed; + end; + else do; + name='Newbie'; + output work.appended; + stop; + end; +run; +%mp_storediffs(libby.class,work.orig,NAME WEIGHT + ,delds=work.deleted + ,modds=work.changed + ,appds=work.appended + ,outds=work.audit + ,loadref=UPLOAD1 + ,mdebug=0 +) +%mp_ds2md(work.audit) +%mp_assert( + iftrue=(&syscc=0), + desc=Checking preparation case, + outds=work.test_results +) + +/* apply the changes */ +proc sql; +delete from libby.class where name in ("&delname","&modname"); +proc append base=libby.class data=work.appended; +proc append base=libby.class data=work.changed; +run; + +/* now, prepare the revert dataset */ +%mp_assertscope(SNAPSHOT) +%mp_stripdiffs(libby.class + ,UPLOAD1 + ,work.audit + ,outds=work.mp_stripdiffs + ,mdebug=1 +) +%mp_ds2md(work.mp_stripdiffs) +%mp_assertscope(COMPARE) + +%mp_assert( + iftrue=(&syscc=0), + desc=Checking error condition, + outds=work.test_results +) + +%let delpass=0; +%let modpass=0; +%let addpass=0; +data _null_; + set work.mp_stripdiffs; + if upcase(_____DELETE_THIS_RECORD_____)='NO' and name="&delname" + then call symputx('delpass',1); + if name="&modname" and age=&modval then call symputx('modpass',1); + if upcase(_____DELETE_THIS_RECORD_____)='YES' and name="Newbie" + then call symputx('addpass',1); +run; + +%mp_assert( + iftrue=(&delpass=1), + desc=Ensuring deleted record is back in the dataset, + outds=work.test_results +) +%mp_assert( + iftrue=(&modpass=1), + desc=Ensuring modified record now has old value, + outds=work.test_results +) +%mp_assert( + iftrue=(&addpass=1), + desc=Ensuring added record is now marked for deletion, + outds=work.test_results +) \ No newline at end of file From 015749a9b24c61db931ed652400032b1e809e574 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 25 Apr 2024 09:45:46 +0000 Subject: [PATCH 2/3] chore: updating all.sas From 16a3b63161bc6de718751bbbd4abf48e7d328325 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 25 Apr 2024 09:49:00 +0000 Subject: [PATCH 3/3] chore: updating all.sas