Archive

Posts Tagged ‘Amazon AWS EC2’

Sync A Large Directory Structure to S3

October 23rd, 2012

There’s a handful of commands out there that deal with command line operations for s3. The most popular (I think) is s3tool’s s3cmd. However, we have a filesystem that we would like to keep in sync with S3 while we are working on migrating. s3cmd has a sync command that works really well for filesystems with a small to medium number of files (not total file size… total file count). We have a filesystem that contains many millions of files which can be problematic for programs like s3cmd (even rsync has issues with this many files). The problem (or feature) is that they tend to calculate the changes for everything recursively all at once, then they start performing operations.

If you do not need this feature, it takes a lot less memory to calculate all the changes on a directory by directory basis. Of course, if you’re syncing a single directory with millions of files, you have bigger problems anyway and this won’t help. Luckily, we tend to split up the files into categorized directories.

So, I wrote this very simple little PHP script that keeps S3 in sync with a local directory structure. It shouldn’t be too hard to rewrite this in just about any language. It’s not complicated at all.

IMPORTANT NOTES:

  • This WILL dereference symlinks. So make sure you do not have recursive symlinks in your directory structure. For example: “ln -s . recurseme” would be bad
  • The local filesystem is always authoritative. If it doesn’t exist locally, it will get deleted from S3
  • It does not compare MD5 sums (even though you can see that I thought about it in the code)
  • It does not update the S3 side timestamp with the local timestamp and will only sync if the file size is different or the local timestamp is later than the S3 timestamp
#!/usr/bin/php
<?
require_once('AWSSDKforPHP/sdk.class.php');

$s3 = new AmazonS3();
$basepath = '/path/to/sync';
$bucket = 'your-bucket-name';

function getDirectoryList($localdir) {
    global $directoryList;

    /*
    // this is useful for testing
    if (substr_count($localdir, '/') > 2) {
        return;
    }
    */
    $d = opendir($localdir);
    while ($ent = readdir($d)) {
        if ($ent == '.' || $ent == '..') {
            continue;
        }
        if (is_dir($localdir . '/' . $ent)) {
            $directoryList[] = $localdir . '/' . $ent;
            getDirectoryList($localdir . '/' . $ent);
        }
    }
    closedir($d);
}

function syncDirectory($basepath, $localdir) {
    global $s3;

    $remotedir = preg_replace('%^' . $basepath . '/?%', '', $localdir);
    echo "getting s3 file list for $remotedir\n";
    $s3filelist = getRemoteDirectory($remotedir);
    echo "getting local file list for $localdir\n";
    $localfilelist = getLocalDirectory($basepath, $localdir);
    echo "calculating differences\n";
    foreach ($localfilelist as $key => $linfo) {
        if (! array_key_exists($key, $s3filelist)) {
            syncFile($basepath . '/' . $key, $key);
            continue;
        }
        $rinfo = $s3filelist[$key];
        if ($linfo['lastmodified'] > $rinfo['lastmodified']) {
            syncFile($basepath . '/' . $key, $key);
            continue;
        }
        if ($linfo['size'] != $rinfo['size']) {
            syncFile($basepath . '/' . $key, $key);
            continue;
        }
    }
    foreach ($s3filelist as $key => $rinfo) {
        if (! array_key_exists($key, $localfilelist)) {
            deleteFile($key);
            continue;
        }
    }
}

function getRemoteDirectory($remotedir) {
    global $s3, $bucket;

    $s3filelist = array();
    do {
        $args['delimiter'] = '/';
        if (strlen($remotedir)) {
            $args['prefix'] = $remotedir . '/';
        }
        if (isset($lastkey)) {
            $args['marker'] = $lastkey;
        }
        $response = $s3->list_objects($bucket, $args);
        if (! $response->isOK()) {
            echo "error: failed to get S3 object list for static $remotedir\n";
            return false;
        }
        foreach ($response->body->Contents as $s3object) {
            $s3filelist[(string)$s3object->Key] = array(
                    'md5' => preg_replace('/^\"(.*)\"$/', '$1',
                        (string)$s3object->ETag),
                    'size' => (string)$s3object->Size,
                    'lastmodified' => strtotime((string)$s3object->LastModified),
                    );
            $lastkey = (string)$s3object->Key;
        }
        $isTruncated = (string)$response->body->IsTruncated;
        unset($response);
    } while ($isTruncated == 'true');
    return $s3filelist;
}

function getLocalDirectory($basepath, $localdir) {
    $d = opendir($localdir);
    if (! $d) {
        return false;
    }
    $localfilelist = array();
    while ($ent = readdir($d)) {
        if ($ent == '.' || $ent == '..') {
            continue;
        }
        if (is_dir($localdir . '/' . $ent)) {
            continue;
        }
        $localfile = $localdir . '/' . $ent;
        $key = preg_replace('%^' . $basepath . '/?%', '', $localfile);
        $localfilelist[$key] = array(
                'md5' => $GLOBAL['checkmd5'] == true ? md5_file($localfile) : null,
                'size' => filesize($localfile),
                'lastmodified' => filemtime($localfile),
                );
    }
    closedir($d);
    return $localfilelist;
}

function syncFile($localfile, $remotefile) {
    global $s3, $bucket;

    echo "     sync  : $localfile -> s3://$bucket/$remotefile\n";
    try {
        $response = $s3->create_object($bucket, $remotefile,
                array('fileUpload' => $localfile));
        if (! $response->isOK()) {
            echo "error: failed to sync $localfile\n";
            echo $response->body->Code . ": " . $response->body->Message . "\n";
        }
    } catch (Exception $e) {
        echo "error: failed to sync $localfile\n";
        echo $e->getMessage . "\n";
    }
}

function deleteFile($remotefile) {
    global $s3, $bucket;

    echo "     delete: s3://$bucket/$remotefile\n";
    try {
        $response = $s3->delete_object($bucket, $key);
        if (! $response->isOK()) {
            echo "error: failed to delete s3://$bucket/$key:\n";
            echo $response->body->Code . ": " . $response->body->Message . "\n";
        }
    } catch (Exception $e) {
        echo "error: failed to sync $localfile\n";
        echo $e->getMessage . "\n";
    }
}

$directoryList = array();
getDirectoryList($basepath);
foreach ($directoryList as $localdir) {
    syncDirectory($basepath, $localdir);
}

?>

General , , ,

Connecting a Fortinet VPN to Amazon AWS VPC

May 5th, 2012

There is a lot of spotty information out there on the Internet on how to connect a Fortinet VPN router to an Amazon AWS VPC VPN, but a lot of it is confusing, wants you to use the GUI, is outdated, or simply doesn’t work that well. It took me a bit to get all of the pieces put together, but here’s the basic steps involved:

  1. Enable asymmetric routing – this allows packets to go out through one of the tunnels and come back through the other
  2. Create interface based VPN tunnels (phase1 and phase2)
  3. Configure the wan1 sub-interfaces automatically created in step 2
  4. Configure BGP
  5. Configure firewall rules

So here’s a generic configuration that does this. If you right click on the VPN gateway in the AWS Console and download the “Generic” configuration, you can easily change the values in this config.

Also, you need to make sure that the policy numbers I put in for the firewall configuration (policies 200-203) do not conflict with any existing policy numbers you have configured. If they do, just pick a different number; the number doesn’t matter. Note that these policies allow all traffic in and out of your internal network and the VPC. After you get it working, you’ll probably want to tighten those policies up quite a bit.

So without further ado:

config system settings
    set asymroute enable
end

config vpn ipsec phase1-interface
    edit "amazon1"
        set interface "wan1"
        set dpd enable
        set dhgrp 2
        set proposal aes128-sha1
        set remote-gw <CHANGE: Tunnel #1 Outside Virtual Private Gateway>
        set psksecret <CHANGE: Tunnel #1 Pre-Shared Key>
        set dpd-retryinterval 10
    next
    edit "amazon2"
        set interface "wan1"
        set dpd enable
        set dhgrp 2
        set proposal aes128-sha1
        set remote-gw <CHANGE: Tunnel #2 Outside Virtual Private Gateway>
        set psksecret <CHANGE: Tunnel #2 Pre-Shared Key>
        set dpd-retryinterval 10
    next
end

config vpn ipsec phase2-interface
    edit "amazon1"
        set dhgrp 2
        set pfs enable
        set phase1name "amazon1"
        set proposal aes128-sha1
        set replay enable
    next
    edit "amazon2"
        set dhgrp 2
        set pfs enable
        set phase1name "amazon2"
        set proposal aes128-sha1
        set replay enable
    next
end

config system interface
    edit "amazon1"
        set vdom "root"
        set ip <CHANGE: Tunnel #1 Inside Customer Gateway> 255.255.255.255
        set type tunnel
        set remote-ip <CHANGE: Tunnel #1 Inside Virtual Private Gateway>
        set interface "wan1"
    next
    edit "amazon2"
        set vdom "root"
        set ip <CHANGE: Tunnel #2 Inside Customer Gateway> 255.255.255.255
        set type tunnel
        set remote-ip <CHANGE: Tunnel #2 Inside Virtual Private Gateway>
        set interface "wan1"
    next
end

config router bgp
    set as <CHANGE: BGP Customer Gateway ASN>
        config neighbor
            edit <CHANGE: Tunnel #1 Inside Virtual Private Gateway>
                set remote-as <CHANGE: Tunnel #1 BGP Virtual Private Gateway ASN>
            next
            edit <CHANGE: Tunnel #2 Inside Virtual Private Gateway>
                set remote-as <CHANGE: Tunnel #2 BGP Virtual Private Gateway ASN>
            next
        end
        config network
            edit 1
                set prefix <CHANGE: Your Local Net> <CHANGE: Your Local netmask>
            next
        end
        config redistribute "connected"
        end
        config redistribute "rip"
        end
        config redistribute "ospf"
        end
        config redistribute "static"
        end
    set router-id <CHANGE: Tunnel #1 Inside Virtual Private Gateway>
end

config firewall policy
    edit 200
        set srcintf "internal"
        set dstintf "amazon1"
            set srcaddr "all"
            set dstaddr "all"
        set action accept
        set schedule "always"
            set service "ANY"
    next
    edit 201
        set srcintf "amazon1"
        set dstintf "internal"
            set srcaddr "all"
            set dstaddr "all"
        set action accept
        set schedule "always"
            set service "ANY"
    next
    edit 202
        set srcintf "internal"
        set dstintf "amazon2"
            set srcaddr "all"
            set dstaddr "all"
        set action accept
        set schedule "always"
            set service "ANY"
    next
    edit 203
        set srcintf "amazon2"
        set dstintf "internal"
            set srcaddr "all"
            set dstaddr "all"
        set action accept
        set schedule "always"
            set service "ANY"
    next
end

General