From 59da47fa9c6a18cc9250376127ad0d2c071ec6fb Mon Sep 17 00:00:00 2001 From: Dominic Ricottone Date: Sat, 16 Nov 2024 15:45:49 -0600 Subject: [PATCH] ANES updates Converted the generated Stata data preparation syntax into working scripts. Passed all files through `dos2unix`. Added a comparison script. --- data/anes/.gitignore | 9 + data/anes/README.md | 67 +++-- data/anes/anes 2004 2012 2020 comparison.do | 117 ++++++++ data/anes/anes 2004 cumulative dictionary.do | 44 +++ ...ata.txt => anes 2004 cumulative infile.do} | 260 +++++++----------- 5 files changed, 317 insertions(+), 180 deletions(-) create mode 100644 data/anes/.gitignore create mode 100644 data/anes/anes 2004 2012 2020 comparison.do create mode 100644 data/anes/anes 2004 cumulative dictionary.do rename data/anes/{sub-stata.txt => anes 2004 cumulative infile.do} (64%) diff --git a/data/anes/.gitignore b/data/anes/.gitignore new file mode 100644 index 0000000..a721241 --- /dev/null +++ b/data/anes/.gitignore @@ -0,0 +1,9 @@ +# 2004 +sub-data.txt + +# 2012 +08475-0001-Data.dta + +# 2020 +anes_timeseries_cdf_stata_20220916.dta + diff --git a/data/anes/README.md b/data/anes/README.md index 30197bc..55407fe 100644 --- a/data/anes/README.md +++ b/data/anes/README.md @@ -1,27 +1,40 @@ -File origins. - -## sub-data.txt - -Sourced from ANES 2004 cumulative data file. -Raw data in delimited format. - -## sub-stata.txt - -Sourced from ANES 2004 cumulative data file. -Supplemental data preparation that is appropriate only for Stata. - -## 08475-0001-Data.dta - -Sourced from ANES 2012 cumulative data file. -Data prepared in Stata format. - -## 08475-0001-Supplemental_syntax.do - -Sourced from ANES 2012 cumulative data file. -Supplemental data preparation that is appropriate only for Stata. - -## anes_timeseries_cdf_stata_20220916.dta - -Sourced from ANES 2020 cumulative data file. -Data prepared in Stata format. - +File origins. + +## sub-data.txt + +Sourced from ANES 2004 cumulative data file. +Raw data in delimited format. + +## anes 2004 cumulative dictionary.do + +Sourced from ANES 2004 cumulative data file. +Raw data import routine. +Prepared from original '' file. + +## anes 2004 cumulative infile.do + +Sourced from ANES 2004 cumulative data file. +Supplemental data preparation that is appropriate only for Stata. +Prepared from original '' file. + +## 08475-0001-Data.dta + +Sourced from ANES 2012 cumulative data file. +Data prepared in Stata format. + +## 08475-0001-Supplemental_syntax.do + +Sourced from ANES 2012 cumulative data file. +Supplemental data preparation that is appropriate only for Stata. + +## anes_timeseries_cdf_stata_20220916.dta + +Sourced from ANES 2020 cumulative data file. +Data prepared in Stata format. + +## anes 2004 2012 2020 comparison.do + +My work. +Compares the 2004, 2012, and 2020 cumulative data files in a simple way. +Demonstrates that data was adjusted in non-trivial ways over time. + diff --git a/data/anes/anes 2004 2012 2020 comparison.do b/data/anes/anes 2004 2012 2020 comparison.do new file mode 100644 index 0000000..03b48e7 --- /dev/null +++ b/data/anes/anes 2004 2012 2020 comparison.do @@ -0,0 +1,117 @@ +// ANES 2004 cumulative file +clear all + +do "anes 2004 cumulative infile.do" +svyset VCF0006A [pweight=VCF0009A], vce(linearized) + +keep if inlist(VCF0004,1982,1984,1998,1992,1996,2000,2002,2004) +recode VCF0803 (1/3=1 "Lib") (4 9=2 "Mod") (5/7=3 "Cons") (else=.), gen(conserv) +svy: tab conserv VCF0004, col +/* +(running tabulate on estimation sample) + +Number of strata = 1 Number of obs = 12,528 +Number of PSUs = 11,368 Population size = 12,501.836 + Design df = 11,367 + +------------------------------------------------------------------------- +RECODE of | +VCF0803 | +(Liberal- | +Conservat | +ive 7pt | Year of Study +Scale) | 1982 1984 1992 1996 1998 2000 2002 2004 Total +----------+-------------------------------------------------------------- + Lib | .1464 .1786 .2022 .1813 .1837 .1963 .2199 .2221 .1904 + Mod | .5829 .5357 .4975 .4929 .5134 .5016 .4362 .4591 .5046 + Cons | .2707 .2858 .3003 .3258 .3029 .3021 .3439 .3188 .305 + | + Total | 1 1 1 1 1 1 1 1 1 +------------------------------------------------------------------------- +Key: Column proportion + + Pearson: + Uncorrected chi2(14) = 90.6470 + Design-based F(13.60, 1.5e+05)= 5.6565 P = 0.0000 +*/ + +******************************************************************************* + +// ANES 2012 cumulative file +clear all + +use "08475-0001-Data.dta" +do "08475-0001-Supplemental_syntax.do" +svyset VCF0006A [pweight=VCF0009Z], vce(linearized) + +keep if inlist(VCF0004,1982,1984,1998,1992,1996,2000,2002,2004) +recode VCF0803 (1/3=1 "Lib") (4 9=2 "Mod") (5/7=3 "Cons") (else=.), gen(conserv) +svy: tab conserv VCF0004, col + +/* +(running tabulate on estimation sample) + +Number of strata = 1 Number of obs = 12,675 +Number of PSUs = 11,515 Population size = 12,653.846 + Design df = 11,514 + +------------------------------------------------------------------------- +RECODE of | +VCF0803 | +(IDEOLOGY | +: | +Liberal-C | +onservati | STUDY VARIABLE: Year of Study +ve Scale) | 1982 1984 1992 1996 1998 2000 2002 2004 Total +----------+-------------------------------------------------------------- + Lib | .1464 .1786 .2022 .1813 .1837 .1963 .2199 .1877 .1875 + Mod | .5829 .5357 .4975 .4929 .5134 .5016 .4362 .5017 .5081 + Cons | .2707 .2858 .3003 .3258 .3029 .3021 .3439 .3106 .3044 + | + Total | 1 1 1 1 1 1 1 1 1 +------------------------------------------------------------------------- +Key: Column proportion + + Pearson: + Uncorrected chi2(14) = 79.4757 + Design-based F(13.60, 1.6e+05)= 4.9729 P = 0.0000 +*/ + +******************************************************************************* + +//ANES 2020 cumulative file +clear all +use "anes_timeseries_cdf_stata_20220916.dta" +svyset VCF0006a [pweight=VCF0009z], vce(linearized) + +keep if inlist(VCF0004,1982,1984,1998,1992,1996,2000,2002,2004) +recode VCF0803 (1/3=1 "Lib") (4 9=2 "Mod") (5/7=3 "Cons") (else=.), gen(conserv) +svy: tab conserv VCF0004, col + +/* +(running tabulate on estimation sample) + +Number of strata = 1 Number of obs = 12,675 +Number of PSUs = 11,515 Population size = 12,653.846 + Design df = 11,514 + +------------------------------------------------------------------------- +RECODE of | +VCF0803 | +(Liberal- | +Conservat | +ive | Year of Study +Scale) | 1982 1984 1992 1996 1998 2000 2002 2004 Total +----------+-------------------------------------------------------------- + Lib | .1464 .1786 .2022 .1813 .1837 .1963 .2199 .1877 .1875 + Mod | .5829 .5357 .4975 .4929 .5134 .5016 .4362 .5017 .5081 + Cons | .2707 .2858 .3003 .3258 .3029 .3021 .3439 .3106 .3044 + | + Total | 1 1 1 1 1 1 1 1 1 +------------------------------------------------------------------------- +Key: Column proportion + + Pearson: + Uncorrected chi2(14) = 79.4757 + Design-based F(13.60, 1.6e+05)= 4.9729 P = 0.0000 +*/ diff --git a/data/anes/anes 2004 cumulative dictionary.do b/data/anes/anes 2004 cumulative dictionary.do new file mode 100644 index 0000000..51e0feb --- /dev/null +++ b/data/anes/anes 2004 cumulative dictionary.do @@ -0,0 +1,44 @@ +dictionary using "sub-data.txt" { +***************************************************************** +* Replace 'Y' with the name of the data file. +* The default suffix is '.raw'. +* +* Put this dictionary into a separate file (with the suffix .dct). +* (The first line of the dictionary file must contain the +* 'dictionary' command.) +* +* Then edit the name of that dictionary file into the 'do-file' +* portion of the STATA definitions. +******************************************************************* +* Records per case: +_lines(1) + +_line(1) + +_column(1) long CASEID %8f "Case ID" +_column(11) str28 VERSION %28s "ANES Version Number" +_column(41) int VCF0004 %4f "Year of Study" +_column(46) int VCF0006 %4f "Study Respondent Number" +_column(51) long VCF0006A %8f "Unique Respondent Number" +_column(60) float VCF0009 %6.4f "Type 0 General Weight Variable" +_column(67) float VCF0009A %6.4f "Type 0 Weight - Post-Stratified Pre" +_column(74) float VCF0010 %6.4f "Type 1 General Weight Variable" +_column(81) float VCF0010A %6.4f "Type 1 Weight - Post-stratified Pre" +_column(88) float VCF0011 %6.4f "Type 2 General Weight Variable" +_column(95) float VCF0011A %6.4f "Type 2 Weight - Post-Stratified Pre" +_column(102) int VCF0140A :VCF0140A %1f "R Education 7-category" +_column(104) int VCF0302 :VCF0302 %1f "Initial Party ID Response" +_column(106) int VCF0702 :VCF0702 %1f "Did R Vote in Election" +_column(108) int VCF0717 :VCF0717 %1f "Did R Try to Influence Others Vote" +_column(110) int VCF0718 :VCF0718 %1f "Did R Attend Political Meetings" +_column(112) int VCF0719 :VCF0719 %1f "Did R Work for Party or Candidate" +_column(114) int VCF0720 :VCF0720 %1f "Did R Display Candidate Button/Stic" +_column(116) int VCF0721 :VCF0721 %1f "Did R Donate Money to Party/Candida" +_column(118) int VCF0803 :VCF0803 %1f "Liberal-Conservative 7pt Scale" +_column(120) int VCF0806 :VCF0806 %1f "R Position 7pt Govt Health Insuranc" +_column(122) int VCF0809 :VCF0809 %1f "R Position 7pt Govt Guaranteed Jobs" +_column(124) int VCF0830 :VCF0830 %1f "R Position 7pt Aid to Blacks Scale" +_column(126) int VCF0838 :VCF0838 %1f "When Should Abortion Be Allowed by" +_column(128) int VCF0839 :VCF0839 %1f "R Position 7pt Govt Services/Spendi" +_column(130) int VCF0843 :VCF0843 %1f "R Position 7pt Defense Spending Sca" +} diff --git a/data/anes/sub-stata.txt b/data/anes/anes 2004 cumulative infile.do similarity index 64% rename from data/anes/sub-stata.txt rename to data/anes/anes 2004 cumulative infile.do index 00b1870..6dcb208 100644 --- a/data/anes/sub-stata.txt +++ b/data/anes/anes 2004 cumulative infile.do @@ -1,153 +1,107 @@ -******************************************************************* -* Stata "do-file" file with labels and missing data specifications -* Created by ddltox on Nov 15, 2024 (Fri 10:03 AM PST) -* DDL source file: "/var/www/sda/tmpdir/ddl3640268546116940221.txt". -* -* Note that the data dictionary is given at the end of this file. -* Put the dictionary into a separate file (by editing this file). -* Then specify below the name of the dictionary file. -* -* DDL file gives the following dataset description: -* Records per case: 1 -* Record length: 130 -******************************************************************* - - -label data "American National Election Study 1948-2004 - Cumulative" - -#delimit ; -label define VCF0140A 1 "8 grades or less ('grade school')" - 2 "9-12 grades ('high school'), no diploma/equivalency" - 3 "12 grades, diploma or equivalency" - 4 "12 grades, diploma or equivalency, plus non-academic" - 5 "Some college, no degree;junior/community college level" - 6 "BA level degree" 7 "Advanced degrees incl. LLB" - 8 "DK" 9 "NA;no Pre IW;short-form 'new' Cross Section" ; -label define VCF0302 1 "Republican" 2 "Independent" - 3 "No preference;none;neither" 4 "Other" 5 "Democrat" - 8 "DK" 9 "NA;refused" ; -label define VCF0702 0 "DK;NA;no Post IW;refused to say if voted;" - 1 "No, did not vote" 2 "Yes, voted" ; -label define VCF0717 0 "DK;NA;no Post IW;form III,IV (1972);abbrev." 1 "No" - 2 "Yes" ; -label define VCF0718 0 "DK;NA;no Post IW;form III,IV (1972);abbrev." 1 "No" - 2 "Yes" ; -label define VCF0719 0 "DK;NA;no Post IW;form III,IV (1972);abbrev." 1 "No" - 2 "Yes" ; -label define VCF0720 0 "DK;NA;no Post IW;form III,IV (1972);abbrev." 1 "No" - 2 "Yes" ; -label define VCF0721 0 "DK;NA;no Post IW;form III,IV (1972);abbrev." - 1 "No (includes 'not asked for money' in 1966,1968)" - 2 "Yes (includes 'tax check-off' in 1976)" ; -label define VCF0803 - 0 "NA;no Post IW;form III,IV (1972);R not administered" - 1 "Extremely liberal" 2 "Liberal" 3 "Slightly liberal" - 4 "Moderate, middle of the road" - 5 "Slightly conservative" 6 "Conservative" - 7 "Extremely conservative" - 9 "DK;haven't thought much about it" ; -label define VCF0806 - 0 "NA;form II (1972);no Post IW;telephone IW (1984: see" - 1 "Government insurance plan" - 7 "Private insurance plan" - 9 "DK;haven't thought much about it" ; -label define VCF0809 0 "NA;no Post IW;form A (1986);telephone IW (2000)" - 1 "Government see to job and good standard of living" - 7 "Government let each person get ahead on his own" - 9 "DK;haven't thought much about it" ; -label define VCF0830 0 "NA;telephone IW (2000);no Post IW" - 1 "Government should help minority groups/blacks" - 7 "Minority groups/blacks should help themselves" - 9 "DK;haven't thought much about it" ; -label define VCF0838 0 "NA;no Post IW" - 1 "By law, abortion should never be permitted" - 2 "The law should permit abortion only in case of rape," - 3 "The law should permit abortion for reasons other than" - 4 "By law, a woman should always be able to obtain an" - 9 "DK;other" ; -label define VCF0839 0 "NA;telephone IW (2000)" - 1 "Government should provide many fewer services: reduce" - 7 "Government should provide many more services: increase" - 9 "DK;haven't thought much about it" ; -label define VCF0843 0 "NA;telephone IW (2000)" - 1 "Greatly decrease defense spending" - 7 "Greatly increase defense spending" - 9 "DK;haven't thought much about it" ; - - -#delimit cr - -******************************************************************* -infile using X -* Replace 'X' with the name of the dictionary file. -* -* The contents of the dictionary are given at the end of this file. -* Put the dictionary into a separate file (by editing this file). -* Then specify here the name of the dictionary file. -******************************************************************* -* The md, min and max specifications were translated -* into the following "REPLACE...IF" statements: - -replace VCF0140A = . if (VCF0140A == 0) -replace VCF0140A = . if (VCF0140A >= 8 ) -replace VCF0302 = . if (VCF0302 == 8) -replace VCF0302 = . if (VCF0302 >= 9 ) -replace VCF0702 = . if (VCF0702 == 0) -replace VCF0717 = . if (VCF0717 == 0) -replace VCF0718 = . if (VCF0718 == 0) -replace VCF0719 = . if (VCF0719 == 0) -replace VCF0720 = . if (VCF0720 == 0) -replace VCF0721 = . if (VCF0721 == 0) -replace VCF0803 = . if (VCF0803 == 0) -replace VCF0806 = . if (VCF0806 == 0) -replace VCF0809 = . if (VCF0809 == 0) -replace VCF0830 = . if (VCF0830 == 0) -replace VCF0838 = . if (VCF0838 == 0) -replace VCF0839 = . if (VCF0839 == 0) -replace VCF0843 = . if (VCF0843 == 0) - - -dictionary using Y { -***************************************************************** -* Replace 'Y' with the name of the data file. -* The default suffix is '.raw'. -* -* Put this dictionary into a separate file (with the suffix .dct). -* (The first line of the dictionary file must contain the -* 'dictionary' command.) -* -* Then edit the name of that dictionary file into the 'do-file' -* portion of the STATA definitions. -******************************************************************* -* Records per case: -_lines(1) - -_line(1) - -_column(1) long CASEID %8f "Case ID" -_column(11) str28 VERSION %28s "ANES Version Number" -_column(41) int VCF0004 %4f "Year of Study" -_column(46) int VCF0006 %4f "Study Respondent Number" -_column(51) long VCF0006A %8f "Unique Respondent Number" -_column(60) float VCF0009 %6.4f "Type 0 General Weight Variable" -_column(67) float VCF0009A %6.4f "Type 0 Weight - Post-Stratified Pre" -_column(74) float VCF0010 %6.4f "Type 1 General Weight Variable" -_column(81) float VCF0010A %6.4f "Type 1 Weight - Post-stratified Pre" -_column(88) float VCF0011 %6.4f "Type 2 General Weight Variable" -_column(95) float VCF0011A %6.4f "Type 2 Weight - Post-Stratified Pre" -_column(102) int VCF0140A :VCF0140A %1f "R Education 7-category" -_column(104) int VCF0302 :VCF0302 %1f "Initial Party ID Response" -_column(106) int VCF0702 :VCF0702 %1f "Did R Vote in Election" -_column(108) int VCF0717 :VCF0717 %1f "Did R Try to Influence Others Vote" -_column(110) int VCF0718 :VCF0718 %1f "Did R Attend Political Meetings" -_column(112) int VCF0719 :VCF0719 %1f "Did R Work for Party or Candidate" -_column(114) int VCF0720 :VCF0720 %1f "Did R Display Candidate Button/Stic" -_column(116) int VCF0721 :VCF0721 %1f "Did R Donate Money to Party/Candida" -_column(118) int VCF0803 :VCF0803 %1f "Liberal-Conservative 7pt Scale" -_column(120) int VCF0806 :VCF0806 %1f "R Position 7pt Govt Health Insuranc" -_column(122) int VCF0809 :VCF0809 %1f "R Position 7pt Govt Guaranteed Jobs" -_column(124) int VCF0830 :VCF0830 %1f "R Position 7pt Aid to Blacks Scale" -_column(126) int VCF0838 :VCF0838 %1f "When Should Abortion Be Allowed by" -_column(128) int VCF0839 :VCF0839 %1f "R Position 7pt Govt Services/Spendi" -_column(130) int VCF0843 :VCF0843 %1f "R Position 7pt Defense Spending Sca" -} +******************************************************************* +* Stata "do-file" file with labels and missing data specifications +* Created by ddltox on Nov 13, 2024 (Wed 07:10 PM PST) +* DDL source file: "/var/www/sda/tmpdir/ddl489112400220024910.txt". +* +* Note that the data dictionary is given at the end of this file. +* Put the dictionary into a separate file (by editing this file). +* Then specify below the name of the dictionary file. +* +* DDL file gives the following dataset description: +* Records per case: 1 +* Record length: 130 +******************************************************************* + + +label data "American National Election Study 1948-2004 - Cumulative" + +#delimit ; +label define VCF0140A 1 "8 grades or less ('grade school')" + 2 "9-12 grades ('high school'), no diploma/equivalency" + 3 "12 grades, diploma or equivalency" + 4 "12 grades, diploma or equivalency, plus non-academic" + 5 "Some college, no degree;junior/community college level" + 6 "BA level degree" 7 "Advanced degrees incl. LLB" + 8 "DK" 9 "NA;no Pre IW;short-form 'new' Cross Section" ; +label define VCF0302 1 "Republican" 2 "Independent" + 3 "No preference;none;neither" 4 "Other" 5 "Democrat" + 8 "DK" 9 "NA;refused" ; +label define VCF0702 0 "DK;NA;no Post IW;refused to say if voted;" + 1 "No, did not vote" 2 "Yes, voted" ; +label define VCF0717 0 "DK;NA;no Post IW;form III,IV (1972);abbrev." 1 "No" + 2 "Yes" ; +label define VCF0718 0 "DK;NA;no Post IW;form III,IV (1972);abbrev." 1 "No" + 2 "Yes" ; +label define VCF0719 0 "DK;NA;no Post IW;form III,IV (1972);abbrev." 1 "No" + 2 "Yes" ; +label define VCF0720 0 "DK;NA;no Post IW;form III,IV (1972);abbrev." 1 "No" + 2 "Yes" ; +label define VCF0721 0 "DK;NA;no Post IW;form III,IV (1972);abbrev." + 1 "No (includes 'not asked for money' in 1966,1968)" + 2 "Yes (includes 'tax check-off' in 1976)" ; +label define VCF0803 + 0 "NA;no Post IW;form III,IV (1972);R not administered" + 1 "Extremely liberal" 2 "Liberal" 3 "Slightly liberal" + 4 "Moderate, middle of the road" + 5 "Slightly conservative" 6 "Conservative" + 7 "Extremely conservative" + 9 "DK;haven't thought much about it" ; +label define VCF0806 + 0 "NA;form II (1972);no Post IW;telephone IW (1984: see" + 1 "Government insurance plan" + 7 "Private insurance plan" + 9 "DK;haven't thought much about it" ; +label define VCF0809 0 "NA;no Post IW;form A (1986);telephone IW (2000)" + 1 "Government see to job and good standard of living" + 7 "Government let each person get ahead on his own" + 9 "DK;haven't thought much about it" ; +label define VCF0830 0 "NA;telephone IW (2000);no Post IW" + 1 "Government should help minority groups/blacks" + 7 "Minority groups/blacks should help themselves" + 9 "DK;haven't thought much about it" ; +label define VCF0838 0 "NA;no Post IW" + 1 "By law, abortion should never be permitted" + 2 "The law should permit abortion only in case of rape," + 3 "The law should permit abortion for reasons other than" + 4 "By law, a woman should always be able to obtain an" + 9 "DK;other" ; +label define VCF0839 0 "NA;telephone IW (2000)" + 1 "Government should provide many fewer services: reduce" + 7 "Government should provide many more services: increase" + 9 "DK;haven't thought much about it" ; +label define VCF0843 0 "NA;telephone IW (2000)" + 1 "Greatly decrease defense spending" + 7 "Greatly increase defense spending" + 9 "DK;haven't thought much about it" ; + + +#delimit cr + +******************************************************************* +infile using "anes 2004 cumulative dictionary.do" +* Replace 'X' with the name of the dictionary file. +* +* The contents of the dictionary are given at the end of this file. +* Put the dictionary into a separate file (by editing this file). +* Then specify here the name of the dictionary file. +******************************************************************* +* The md, min and max specifications were translated +* into the following "REPLACE...IF" statements: + +replace VCF0140A = . if (VCF0140A == 0) +replace VCF0140A = . if (VCF0140A >= 8 ) +replace VCF0302 = . if (VCF0302 == 8) +replace VCF0302 = . if (VCF0302 >= 9 ) +replace VCF0702 = . if (VCF0702 == 0) +replace VCF0717 = . if (VCF0717 == 0) +replace VCF0718 = . if (VCF0718 == 0) +replace VCF0719 = . if (VCF0719 == 0) +replace VCF0720 = . if (VCF0720 == 0) +replace VCF0721 = . if (VCF0721 == 0) +replace VCF0803 = . if (VCF0803 == 0) +replace VCF0806 = . if (VCF0806 == 0) +replace VCF0809 = . if (VCF0809 == 0) +replace VCF0830 = . if (VCF0830 == 0) +replace VCF0838 = . if (VCF0838 == 0) +replace VCF0839 = . if (VCF0839 == 0) +replace VCF0843 = . if (VCF0843 == 0) -- 2.45.2