*! 11jan2011 program mergel version 10.1 local upos: list posof "using" in 0 if (~`upos') mata: errel("Where's the merge file?") forval i=1/`=`upos'-1' { gettoken one 0: 0 local vars `vars' `one' } gettoken one 0: 0 syntax [anything] [, keep(string) All Multi Express Only onlynot Perfect CHoose(string) CASEsensitive replace Notes stconfig(passthru)] opts_exclusive "`only'`onlynot'" mata: st_local("tomerge",pcanon("`anything'","file","dta")) *mata: pathback("`anything'","file","dta") *pathutil `anything', defext(.dta) pfe(tomerge) *local tomerge `pbpfe' confirm file "`tomerge'" local all= cond(mi("`all'"),"nokeep","") local notes= cond(mi("`notes'"),"nonotes","") gettoken fl choose: choose if ~inlist("`fl'","","first","last") mata: errel(`"Option choose() must be specified as "first" or "last""') local first="`fl'"=="first" local totn=_N local n 0 while ~mi("`vars'") { local n `++n' gettoken f`n' vars :vars unab f`n': `f`n'' local vlist `vlist' `f`n'' capture confirm string var `f`n'' local s`n'=~_rc local r`n' /*why is this line here?*/ if (substr("`vars'",1,2)==" (") gettoken r`n' vars :vars, match(paren) } capture bysort `vlist': assert _N==1 local multimulti= _rc==9 tempfile mergit dupcount testm preserve if mi("`multi'") { keep `vlist' qui duplicates drop qui save `testm' } mata: usel("`tomerge'","","","`stconfig'") *usel `tomerge', `xconfig' quiet capture confirm var _merge if (~_rc) drop _merge forvalues a=1/`n' { if (~mi("`r`a''") & "`r`a''"~="`f`a''") { capture confirm new variable `f`a'' if (_rc) drop `f`a'' rename `r`a'' `f`a'' } capture confirm string var `f`a'' if (~_rc & ~`s`a'') destring `f`a'', replace else if (~_rc & mi("`casesensitive'")) { tempvar tm`a' th`a' /*theoretically, the tempvars might not be available in the main data...*/ /*tm=tempMatch, lowercase in both files for the merge; th=tempHold, hold the original data in the merge file, to be added to the main file so that any added records can have their orignal data copied into the original field in the main data*/ gen `tm`a''=lower(`f`a'') local vlist: subinstr local vlist "`f`a''" "`tm`a''" rename `f`a'' `th`a'' local keep `keep' `th`a'' } else if (_rc) local s`a' 0 } if ~mi("`fl'") qui bysort `vlist' (`choose'):keep if _n==cond(`first',1,_N) capture bysort `vlist': assert _N==1 local dotest=_rc==9 local multimulti= `multimulti' & `dotest' if (`multimulti') { tempvar useq mseq bysort `vlist': gen `useq'=_n sort `vlist' `useq' } foreach one of local keep { if (strpos("`one'","->") & inlist(strpos(trim("`one'"),"->"),1,strlen(trim("`one'"))-1)) mata: errel("Arrows ({hi:->}) must be embedded between two variable names with no spaces") } mata: RenameVars() quietly { save `mergit' if (`multimulti') { keep `vlist' `useq' by `vlist': drop if _n<_N save `dupcount' } if (mi("`multi'") & `dotest') { mata: usel("`testm'") * usel `testm', quiet forvalues a=1/`n' { //duplicated below capture confirm string var `f`a'' if (~_rc & ~`s`a'') destring `f`a'', replace else if (~_rc & mi("`casesensitive'")) gen `tm`a''=lower(`f`a'') /*above tempvar note*/ } sort `vlist' // local orign=_N merge `vlist' using `mergit', nokeep if (_N>`orign') { di as error "`=_N-`orign'' extra (multiple) matches in the Merge File" error 7701 } } restore forvalues a=1/`n' { //duplicated above capture confirm string var `f`a'' if (~_rc & ~`s`a'') destring `f`a'', replace else if (~_rc & mi("`casesensitive'")) gen `tm`a''=lower(`f`a'') /*above tempvar note*/ } sort `vlist' // if (`multimulti') { by `vlist': gen `mseq'=_n merge `vlist' using `dupcount', nokeep drop _merge expand `useq' bysort `vlist' `mseq': replace `useq'=_n sort `vlist' `useq' } merge `vlist' `useq' using `mergit', `all' update `replace' `notes' recode _merge 4=3 forval a=1/`n' { capture confirm var `th`a'' if (~_rc) replace `f`a''=`th`a'' if _merge==2 } } label define mergel_lab 1 "not in merging file" 2 "not in main data" 3 "matched" 5 "matched, mismatch", replace if !mi("`replace'") label define mergel_lab 5 "matched, updated", modify label values _merge mergel_lab capture which t //fix this kluge if (!_rc) local mcom t else local mcom ta if ~mi("`perfect'") { qui count if inlist(_merge,1,2,5) if r(N) { `mcom' _merge mata: errel("not all records matched") } else drop _merge } else if ~mi("`express'") { qui count if _merge==5 if (r(N) &mi("`replace'")) { `mcom' _merge mata: errel("some records had mismatched data") } qui count if inlist(_merge,3,5) if (r(N)==_N) di "{txt: All `r(N)' matched}" _continue else di "{txt: `r(N)' of `=_N' matched}" _continue qui count if _merge==5 if (r(N)>0) di "{txt:, " r(N) " were updated}" else di qui count if _merge==2 if mi("`all'") dis "{txt:`r(N)' added from merge file}" _continue if ~mi("`multi'") dis "{txt:, `=_N-`totn'-r(N)' added through multiple matches}" else display if ~mi("`only'") keep if inlist(_merge,2,3,5) /* matches from main, plus any from "all" option*/ else if ~mi("`onlynot'") keep if inlist(_merge,1,2) drop _merge } else { `mcom' _merge if ~mi("`multi'") { qui count if _merge==2 display "{txt: `=_N-`totn'-r(N)' records added through multiple matches}" } } mata: DoRecents("`tomerge'") end version 10.1 mata: void RenameVars() { vars=strtrim(columnize(columnize(st_local("keep"))',"->")) vars=vars,J(rows(vars),1,"") st_local("keep",concat(vars[,1]," ")) stata(sprintf("keep %s %s %s",st_local("vlist"),st_local("keep"),st_local("useq"))) for (i=1;i<=rows(vars);i++) { if (vars[i,2]>"") stata("rename "+vars[i,1]+" "+vars[i,2]) } } void DoRecents(path) { class prefs scalar pr pr.make("recentmerged","path") if (pr.body[pr.r]!=path) pr.body=subvec(pr.body\path,((pr.r-recentmax()+2)*(pr.r>recentmax()),.)) // r should not be 0 now! //if (pr.r==0) pr.body=path //just a simple way to deal with 0 //else if (pr.body[pr.r]!=path) pr.body=subvec(pr.body\path,(pr.r==recentmax(),.)) pr.write() } end