<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
		<id>http://wiki.c2b2.columbia.edu/workbench/index.php?action=history&amp;feed=atom&amp;title=MakePED</id>
		<title>MakePED - Revision history</title>
		<link rel="self" type="application/atom+xml" href="http://wiki.c2b2.columbia.edu/workbench/index.php?action=history&amp;feed=atom&amp;title=MakePED"/>
		<link rel="alternate" type="text/html" href="http://wiki.c2b2.columbia.edu/workbench/index.php?title=MakePED&amp;action=history"/>
		<updated>2026-05-01T00:11:27Z</updated>
		<subtitle>Revision history for this page on the wiki</subtitle>
		<generator>MediaWiki 1.27.7</generator>

	<entry>
		<id>http://wiki.c2b2.columbia.edu/workbench/index.php?title=MakePED&amp;diff=4309&amp;oldid=prev</id>
		<title>Bjagla at 19:34, 18 October 2007</title>
		<link rel="alternate" type="text/html" href="http://wiki.c2b2.columbia.edu/workbench/index.php?title=MakePED&amp;diff=4309&amp;oldid=prev"/>
				<updated>2007-10-18T19:34:47Z</updated>
		
		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;{{BJNav}}&lt;br /&gt;
 #!/usr/bin/perl&lt;br /&gt;
 ## Create PED files for PLINK &lt;br /&gt;
 # C:\SAEC\scripts&amp;gt;perl makePED.pl -mapfile mapping_info.txt -snpfile snp_ids.txt -dir ../data -phenofile phenotypes.txt -outfile makePED.out &lt;br /&gt;
  &lt;br /&gt;
 use Getopt::Long;&lt;br /&gt;
 use Data::Dumper;&lt;br /&gt;
 use strict;&lt;br /&gt;
 &lt;br /&gt;
 my $VERSION = '1_00';&lt;br /&gt;
 my $mappingFile = 'C:/SAEC/SJS Delivery from GSK/PGX40001_Illumina1M/Documents/Locus_Annotation_Files/illumina1M.map';&lt;br /&gt;
 # Check if programm called correctly&lt;br /&gt;
 &lt;br /&gt;
 my ($snpfp, $mapfp, $datadir, $phefp, $outfp);&lt;br /&gt;
 my $result = GetOptions (&amp;quot;snpfile=s&amp;quot;	=&amp;gt; \$snpfp,&lt;br /&gt;
 						 &amp;quot;mapfile=s&amp;quot;	=&amp;gt; \$mapfp,&lt;br /&gt;
 						 &amp;quot;dir=s&amp;quot;		=&amp;gt; \$datadir,&lt;br /&gt;
 						 &amp;quot;phenofile=s&amp;quot;	=&amp;gt; \$phefp,&lt;br /&gt;
 						 &amp;quot;outfile=s&amp;quot;		=&amp;gt; \$outfp);&lt;br /&gt;
 if($snpfp eq &amp;quot;&amp;quot; || $mapfp eq &amp;quot;&amp;quot; || $datadir eq &amp;quot;&amp;quot; || $phefp eq &amp;quot;&amp;quot; || $outfp eq &amp;quot;&amp;quot; ){&lt;br /&gt;
 	print &amp;lt;&amp;lt;EOF;&lt;br /&gt;
 usage: perl makePED.pl &lt;br /&gt;
 		-snpfile &amp;lt;SNP ids (1st column)&amp;gt; &lt;br /&gt;
 		-mapfile &amp;lt;individual numbers (2nd column)&amp;gt; &lt;br /&gt;
 		-dir &amp;lt;directory with snp data files&amp;gt;  &lt;br /&gt;
 		-phenofile &amp;lt;phenotype file (1st col Individual ID, 2nd col sex, 3rd col phenotype)&amp;gt;&lt;br /&gt;
 		-outfile &amp;lt;output file&amp;gt;\n&lt;br /&gt;
 EOF&lt;br /&gt;
 	exit(-1);&lt;br /&gt;
 }&lt;br /&gt;
 &lt;br /&gt;
 open(OUTPUT2, '&amp;gt;', $outfp) || die(&amp;quot;Could not open file $outfp!&amp;quot;);&lt;br /&gt;
 print2(&amp;quot;#perl makePED.pl -snpfile $snpfp -mapfile $mapfp -dir $datadir -phenofile $phefp -outfile $outfp&amp;quot;);&lt;br /&gt;
 print2(&amp;quot;#Version: $VERSION\n&amp;quot;);&lt;br /&gt;
 &lt;br /&gt;
 &lt;br /&gt;
 &lt;br /&gt;
 #read in information about mapping between Illumina individuals and GSK individuals&lt;br /&gt;
 # file has been created from &amp;quot;PGX40001_GSK_SJS_B137_29Aug2007_DNAReport.xls&amp;quot;&lt;br /&gt;
 # we are only interested in the second column&lt;br /&gt;
 print2(&amp;quot;#Reading Mapfile $mapfp\n&amp;quot;);&lt;br /&gt;
 open(DAT, $mapfp) || die(&amp;quot;Could not open file  $mapfp!&amp;quot;);&lt;br /&gt;
 my @map_data=&amp;lt;DAT&amp;gt;;&lt;br /&gt;
 my %indi; #individual IDs&lt;br /&gt;
 foreach my $m (@map_data) {&lt;br /&gt;
 	if ($m =~ !/^#/) {#ignore comment lines&lt;br /&gt;
 		$m =~ s/[\r\n]//g; #remove end of line characters&lt;br /&gt;
 		my @a = split(/\t/, $m);&lt;br /&gt;
 		$indi{lc $a[1]} = 1;&lt;br /&gt;
 	}&lt;br /&gt;
 }&lt;br /&gt;
 close(DAT);&lt;br /&gt;
 if (!defined %indi) {&lt;br /&gt;
 	print2(&amp;quot;#%indi not defined&amp;quot;);&lt;br /&gt;
 	die(&amp;quot;%indi not defined&amp;quot;);&lt;br /&gt;
 }&lt;br /&gt;
 #print Dumper(\%indi);&lt;br /&gt;
 &lt;br /&gt;
 #read in information about which SNPs should be there&lt;br /&gt;
 print2(&amp;quot;#Reading SNP file $mappingFile\n&amp;quot;);&lt;br /&gt;
 open(DAT, $mappingFile) || die(&amp;quot;Could not open file  $mappingFile!&amp;quot;);&lt;br /&gt;
 my @snp_data=&amp;lt;DAT&amp;gt;;&lt;br /&gt;
 my %snps; #SNP names&lt;br /&gt;
 foreach my $m (@snp_data) {&lt;br /&gt;
 	if ($m =~ /^#/) { #ignore comment lines&lt;br /&gt;
 		next;&lt;br /&gt;
 	}&lt;br /&gt;
 	$m =~ s/[\r\n]//g; #remove end of line characters&lt;br /&gt;
 	my @a = split(/\t/, $m);&lt;br /&gt;
 	$snps{$a[1]} = &amp;quot; 0 0&amp;quot;; #missing value&lt;br /&gt;
 }&lt;br /&gt;
 close(DAT);&lt;br /&gt;
 &lt;br /&gt;
 if (!defined %snps) {&lt;br /&gt;
 	print2(&amp;quot;#%snps not defined&amp;quot;);&lt;br /&gt;
 	die(&amp;quot;%snps not defined&amp;quot;);&lt;br /&gt;
 }&lt;br /&gt;
 &lt;br /&gt;
 &lt;br /&gt;
 #read in information phenotypes&lt;br /&gt;
 print2(&amp;quot;#Reading Phenotype file $phefp\n&amp;quot;);&lt;br /&gt;
 open(DAT, $phefp) || die(&amp;quot;Could not open file  $phefp!&amp;quot;);&lt;br /&gt;
 my @phe_data=&amp;lt;DAT&amp;gt;;&lt;br /&gt;
 my %phes; #phenotypic data plus SNP data&lt;br /&gt;
 &lt;br /&gt;
 #print Dumper(\@phe_data);&lt;br /&gt;
 #initialize, we are only interested in things that have been declared in $mapfp&lt;br /&gt;
 foreach my $key (keys %indi) {&lt;br /&gt;
 	$phes{$key, &amp;quot;S&amp;quot;} = &amp;quot;&amp;quot;; # sex&lt;br /&gt;
 	$phes{$key, &amp;quot;P&amp;quot;} = &amp;quot;&amp;quot;; # phenotype&lt;br /&gt;
 }&lt;br /&gt;
 foreach my $m (@phe_data) {&lt;br /&gt;
 	#print $m ;&lt;br /&gt;
 	if ($m =~ /^#/) { #ignore comment lines&lt;br /&gt;
 		next;&lt;br /&gt;
 	}&lt;br /&gt;
 	$m =~ s/[\r\n]//g; #remove end of line characters&lt;br /&gt;
 	my @a = split(/\t/, $m);&lt;br /&gt;
 	#print &amp;quot;D1:&amp;quot; . Dumper(\@a);&lt;br /&gt;
 	if (exists $phes{$a[0], &amp;quot;S&amp;quot;}) {&lt;br /&gt;
 		$phes{$a[0], &amp;quot;S&amp;quot;} = $a[1];&lt;br /&gt;
 		$phes{$a[0], &amp;quot;P&amp;quot;} = $a[2];&lt;br /&gt;
 	}else {&lt;br /&gt;
 		print2(&amp;quot;#Unknown phenotype &amp;quot; . $a[0] . &amp;quot;\n&amp;quot;);&lt;br /&gt;
 	}&lt;br /&gt;
 &lt;br /&gt;
 }&lt;br /&gt;
 close(DAT);&lt;br /&gt;
 #print Dumper(\%phes);&lt;br /&gt;
 #exit(-1);&lt;br /&gt;
 &lt;br /&gt;
 foreach my $key (keys %indi) {&lt;br /&gt;
 	if ($phes{$key, &amp;quot;S&amp;quot;} eq &amp;quot;&amp;quot;) {&lt;br /&gt;
 		print2(&amp;quot;#No Information for Individual $key\n&amp;quot;);&lt;br /&gt;
 		next;&lt;br /&gt;
 	}&lt;br /&gt;
 	&lt;br /&gt;
 	if (!open(DAT, $datadir . &amp;quot;/&amp;quot; . $key . &amp;quot;.txt&amp;quot;)) {&lt;br /&gt;
 		print2(&amp;quot;#No data file for $key\n&amp;quot;);&lt;br /&gt;
 		next;&lt;br /&gt;
 	}&lt;br /&gt;
 	#initialize SNP hash&lt;br /&gt;
 	foreach my $m (keys %snps) {&lt;br /&gt;
 		$snps{$m} = &amp;quot; 0 0&amp;quot;; #missing value&lt;br /&gt;
 	}&lt;br /&gt;
 	print2( &amp;quot;#working on $key &amp;quot;);&lt;br /&gt;
 	# read the data&lt;br /&gt;
 	my $co=0;&lt;br /&gt;
 	while (&amp;lt;DAT&amp;gt;) {&lt;br /&gt;
 		#print &amp;quot;$_&amp;quot;;&lt;br /&gt;
 		if (/^#/){next;} #ignore comment lines&lt;br /&gt;
 		s/[\r\n]//g; #remove end of line characters&lt;br /&gt;
 		my @line = split(/\t/);&lt;br /&gt;
 		if (!exists $snps{$line[0]}) {&lt;br /&gt;
 			print2(&amp;quot;#Unknown SNP in Individual SNP file &amp;quot; . $line[0] . &amp;quot;\n&amp;quot;);&lt;br /&gt;
 			next;&lt;br /&gt;
 		}&lt;br /&gt;
 		$co ++;&lt;br /&gt;
 		$line[1] =~ s/-/0/;&lt;br /&gt;
 		$line[2] =~ s/-/0/;&lt;br /&gt;
 		if ($line[1] !~ /[DIACGT0]/ || $line[2] !~ /[DIACGT0]/) {&lt;br /&gt;
 			print (&amp;quot;#$line[0] $line[1] $line[2]\n&amp;quot;);&lt;br /&gt;
 		}&lt;br /&gt;
 		$snps{$line[0]} = &amp;quot; &amp;quot; . $line[1] . &amp;quot; &amp;quot; . $line[2];&lt;br /&gt;
 		#print Dumper(\@line) . &amp;quot;\n&amp;quot;;&lt;br /&gt;
 	}&lt;br /&gt;
 	close(DAT);&lt;br /&gt;
 	print2(&amp;quot;found $co SNPs\n&amp;quot;);&lt;br /&gt;
 &lt;br /&gt;
 	#print out data&lt;br /&gt;
 	my $sex = '0';&lt;br /&gt;
 	my $phenotype = &amp;quot;-9&amp;quot;;&lt;br /&gt;
 	if (uc $phes{$key, &amp;quot;S&amp;quot;} eq 'M') {&lt;br /&gt;
 		$sex = '1';&lt;br /&gt;
 	}&lt;br /&gt;
 	if (uc $phes{$key, &amp;quot;S&amp;quot;} eq 'F') {&lt;br /&gt;
 		$sex = '2';&lt;br /&gt;
 	}&lt;br /&gt;
 	if (uc $phes{$key, &amp;quot;P&amp;quot;} eq 'CO') {&lt;br /&gt;
 		$phenotype = '1';&lt;br /&gt;
 	}&lt;br /&gt;
 	if (uc $phes{$key, &amp;quot;P&amp;quot;} eq 'CA') {&lt;br /&gt;
 		$phenotype = '2';&lt;br /&gt;
 	}&lt;br /&gt;
 	my $co2=0;&lt;br /&gt;
 	foreach my $m (keys %snps) {&lt;br /&gt;
 		if ($snps{$m} eq &amp;quot; 0 0&amp;quot;) {&lt;br /&gt;
 			$co2++;&lt;br /&gt;
 		}&lt;br /&gt;
 	}&lt;br /&gt;
 	print2(&amp;quot;#found no info for $co2 SNPs\n&amp;quot;);&lt;br /&gt;
 	print2(&amp;quot;$key 1 0 0 $sex $phenotype &amp;quot;);&lt;br /&gt;
 	foreach my $m (sort keys %snps) {&lt;br /&gt;
 		#print this only to the outputfile&lt;br /&gt;
 #		print( OUTPUT2 &amp;quot;#$m&amp;quot;);&lt;br /&gt;
 		print( OUTPUT2 $snps{$m});&lt;br /&gt;
 	}&lt;br /&gt;
 	print2 (&amp;quot;\n&amp;quot;);&lt;br /&gt;
 &lt;br /&gt;
 }&lt;br /&gt;
 &lt;br /&gt;
 &lt;br /&gt;
 &lt;br /&gt;
 if (!defined %snps) {&lt;br /&gt;
 	print2(&amp;quot;#%snps not defined&amp;quot;);&lt;br /&gt;
 	die(&amp;quot;%snps not defined&amp;quot;);&lt;br /&gt;
 }&lt;br /&gt;
 &lt;br /&gt;
 #===============================&lt;br /&gt;
 &lt;br /&gt;
 sub print2 {&lt;br /&gt;
 	&lt;br /&gt;
 	my ($str) = @_;&lt;br /&gt;
 	print $str ;&lt;br /&gt;
 	print(OUTPUT2 $str);&lt;br /&gt;
 }&lt;/div&gt;</summary>
		<author><name>Bjagla</name></author>	</entry>

	</feed>